1 /*
  2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/macroAssembler.inline.hpp"
 27 #include "c1/c1_LIRAssembler.hpp"
 28 #include "c1/c1_MacroAssembler.hpp"
 29 #include "gc/g1/c1/g1BarrierSetC1.hpp"
 30 #include "gc/g1/g1BarrierSet.hpp"
 31 #include "gc/g1/g1BarrierSetAssembler.hpp"
 32 #include "gc/g1/g1CardTable.hpp"
 33 #include "gc/g1/g1ThreadLocalData.hpp"
 34 #include "gc/g1/heapRegion.hpp"
 35 #include "interpreter/interp_masm.hpp"
 36 #include "runtime/sharedRuntime.hpp"
 37 #include "utilities/macros.hpp"
 38 
 39 #define __ masm->
 40 
 41 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
 42                                                             Register addr, Register count) {
 43   bool dest_uninitialized = (decorators & AS_DEST_NOT_INITIALIZED) != 0;
 44 
 45   if (!dest_uninitialized) {
 46     Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
 47 #ifndef _LP64
 48     __ push(thread);
 49     __ get_thread(thread);
 50 #endif
 51 
 52     Label filtered;
 53     Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
 54     // Is marking active?
 55     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 56       __ cmpl(in_progress, 0);
 57     } else {
 58       assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 59       __ cmpb(in_progress, 0);
 60     }
 61 
 62     NOT_LP64(__ pop(thread);)
 63 
 64     __ jcc(Assembler::equal, filtered);
 65 
 66     __ pusha();                      // push registers
 67 #ifdef _LP64
 68     if (count == c_rarg0) {
 69       if (addr == c_rarg1) {
 70         // exactly backwards!!
 71         __ xchgptr(c_rarg1, c_rarg0);
 72       } else {
 73         __ movptr(c_rarg1, count);
 74         __ movptr(c_rarg0, addr);
 75       }
 76     } else {
 77       __ movptr(c_rarg0, addr);
 78       __ movptr(c_rarg1, count);
 79     }
 80     if (UseCompressedOops) {
 81       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_narrow_oop_entry), 2);
 82     } else {
 83       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry), 2);
 84     }
 85 #else
 86     __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry),
 87                     addr, count);
 88 #endif
 89     __ popa();
 90 
 91     __ bind(filtered);
 92   }
 93 }
 94 
 95 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 96                                                              Register addr, Register count, Register tmp) {
 97   __ pusha();             // push registers (overkill)
 98 #ifdef _LP64
 99   if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
100     assert_different_registers(c_rarg1, addr);
101     __ mov(c_rarg1, count);
102     __ mov(c_rarg0, addr);
103   } else {
104     assert_different_registers(c_rarg0, count);
105     __ mov(c_rarg0, addr);
106     __ mov(c_rarg1, count);
107   }
108   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry), 2);
109 #else
110   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry),
111                   addr, count);
112 #endif
113   __ popa();
114 }
115 
116 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
117                                     Register dst, Address src, Register tmp1, Register tmp_thread) {
118   bool on_oop = type == T_OBJECT || type == T_ARRAY;
119   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
120   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
121   bool on_reference = on_weak || on_phantom;
122   ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
123   if (on_oop && on_reference) {
124     const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
125     NOT_LP64(__ get_thread(thread));
126 
127     // Generate the G1 pre-barrier code to log the value of
128     // the referent field in an SATB buffer.
129     g1_write_barrier_pre(masm /* masm */,
130                          noreg /* obj */,
131                          dst /* pre_val */,
132                          thread /* thread */,
133                          tmp1 /* tmp */,
134                          true /* tosca_live */,
135                          true /* expand_call */);
136   }
137 }
138 
139 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
140                                                  Register obj,
141                                                  Register pre_val,
142                                                  Register thread,
143                                                  Register tmp,
144                                                  bool tosca_live,
145                                                  bool expand_call) {
146   // If expand_call is true then we expand the call_VM_leaf macro
147   // directly to skip generating the check by
148   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
149 
150 #ifdef _LP64
151   assert(thread == r15_thread, "must be");
152 #endif // _LP64
153 
154   Label done;
155   Label runtime;
156 
157   assert(pre_val != noreg, "check this code");
158 
159   if (obj != noreg) {
160     assert_different_registers(obj, pre_val, tmp);
161     assert(pre_val != rax, "check this code");
162   }
163 
164   Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
165   Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
166   Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
167 
168   // Is marking active?
169   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
170     __ cmpl(in_progress, 0);
171   } else {
172     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
173     __ cmpb(in_progress, 0);
174   }
175   __ jcc(Assembler::equal, done);
176 
177   // Do we need to load the previous value?
178   if (obj != noreg) {
179     __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
180   }
181 
182   // Is the previous value null?
183   __ cmpptr(pre_val, (int32_t) NULL_WORD);
184   __ jcc(Assembler::equal, done);
185 
186   // Can we store original value in the thread's buffer?
187   // Is index == 0?
188   // (The index field is typed as size_t.)
189 
190   __ movptr(tmp, index);                   // tmp := *index_adr
191   __ cmpptr(tmp, 0);                       // tmp == 0?
192   __ jcc(Assembler::equal, runtime);       // If yes, goto runtime
193 
194   __ subptr(tmp, wordSize);                // tmp := tmp - wordSize
195   __ movptr(index, tmp);                   // *index_adr := tmp
196   __ addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
197 
198   // Record the previous value
199   __ movptr(Address(tmp, 0), pre_val);
200   __ jmp(done);
201 
202   __ bind(runtime);
203   // save the live input values
204   if(tosca_live) __ push(rax);
205 
206   if (obj != noreg && obj != rax)
207     __ push(obj);
208 
209   if (pre_val != rax)
210     __ push(pre_val);
211 
212   // Calling the runtime using the regular call_VM_leaf mechanism generates
213   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
214   // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
215   //
216   // If we care generating the pre-barrier without a frame (e.g. in the
217   // intrinsified Reference.get() routine) then ebp might be pointing to
218   // the caller frame and so this check will most likely fail at runtime.
219   //
220   // Expanding the call directly bypasses the generation of the check.
221   // So when we do not have have a full interpreter frame on the stack
222   // expand_call should be passed true.
223 
224   NOT_LP64( __ push(thread); )
225 
226   if (expand_call) {
227     LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
228 #ifdef _LP64
229     if (c_rarg1 != thread) {
230       __ mov(c_rarg1, thread);
231     }
232     if (c_rarg0 != pre_val) {
233       __ mov(c_rarg0, pre_val);
234     }
235 #else
236     __ push(thread);
237     __ push(pre_val);
238 #endif
239     __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
240   } else {
241     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
242   }
243 
244   NOT_LP64( __ pop(thread); )
245 
246   // save the live input values
247   if (pre_val != rax)
248     __ pop(pre_val);
249 
250   if (obj != noreg && obj != rax)
251     __ pop(obj);
252 
253   if(tosca_live) __ pop(rax);
254 
255   __ bind(done);
256 }
257 
258 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
259                                                   Register store_addr,
260                                                   Register new_val,
261                                                   Register thread,
262                                                   Register tmp,
263                                                   Register tmp2) {
264 #ifdef _LP64
265   assert(thread == r15_thread, "must be");
266 #endif // _LP64
267 
268   Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
269   Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
270 
271   CardTableBarrierSet* ct =
272     barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
273   assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
274 
275   Label done;
276   Label runtime;
277 
278   // Does store cross heap regions?
279 
280   __ movptr(tmp, store_addr);
281   __ xorptr(tmp, new_val);
282   __ shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
283   __ jcc(Assembler::equal, done);
284 
285   // crosses regions, storing NULL?
286 
287   __ cmpptr(new_val, (int32_t) NULL_WORD);
288   __ jcc(Assembler::equal, done);
289 
290   // storing region crossing non-NULL, is card already dirty?
291 
292   const Register card_addr = tmp;
293   const Register cardtable = tmp2;
294 
295   __ movptr(card_addr, store_addr);
296   __ shrptr(card_addr, CardTable::card_shift);
297   // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
298   // a valid address and therefore is not properly handled by the relocation code.
299   __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
300   __ addptr(card_addr, cardtable);
301 
302   __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
303   __ jcc(Assembler::equal, done);
304 
305   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
306   __ cmpb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
307   __ jcc(Assembler::equal, done);
308 
309 
310   // storing a region crossing, non-NULL oop, card is clean.
311   // dirty card and log.
312 
313   __ movb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
314 
315   __ cmpl(queue_index, 0);
316   __ jcc(Assembler::equal, runtime);
317   __ subl(queue_index, wordSize);
318   __ movptr(tmp2, buffer);
319 #ifdef _LP64
320   __ movslq(rscratch1, queue_index);
321   __ addq(tmp2, rscratch1);
322   __ movq(Address(tmp2, 0), card_addr);
323 #else
324   __ addl(tmp2, queue_index);
325   __ movl(Address(tmp2, 0), card_addr);
326 #endif
327   __ jmp(done);
328 
329   __ bind(runtime);
330   // save the live input values
331   __ push(store_addr);
332   __ push(new_val);
333 #ifdef _LP64
334   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
335 #else
336   __ push(thread);
337   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
338   __ pop(thread);
339 #endif
340   __ pop(new_val);
341   __ pop(store_addr);
342 
343   __ bind(done);
344 }
345 
346 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
347                                          Address dst, Register val, Register tmp1, Register tmp2) {
348   bool in_heap = (decorators & IN_HEAP) != 0;
349   bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
350 
351   bool needs_pre_barrier = in_heap || in_concurrent_root;
352   bool needs_post_barrier = val != noreg && in_heap;
353 
354   Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi);
355   Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
356   // flatten object address if needed
357   // We do it regardless of precise because we need the registers
358   if (dst.index() == noreg && dst.disp() == 0) {
359     if (dst.base() != tmp1) {
360       __ movptr(tmp1, dst.base());
361     }
362   } else {
363     __ lea(tmp1, dst);
364   }
365 
366 #ifndef _LP64
367   InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm);
368 #endif
369 
370   NOT_LP64(__ get_thread(rcx));
371   NOT_LP64(imasm->save_bcp());
372 
373   if (needs_pre_barrier) {
374     g1_write_barrier_pre(masm /*masm*/,
375                          tmp1 /* obj */,
376                          tmp2 /* pre_val */,
377                          rthread /* thread */,
378                          tmp3  /* tmp */,
379                          val != noreg /* tosca_live */,
380                          false /* expand_call */);
381   }
382   if (val == noreg) {
383     BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
384   } else {
385     Register new_val = val;
386     if (needs_post_barrier) {
387       // G1 barrier needs uncompressed oop for region cross check.
388       if (UseCompressedOops) {
389         new_val = tmp2;
390         __ movptr(new_val, val);
391       }
392     }
393     BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
394     if (needs_post_barrier) {
395       g1_write_barrier_post(masm /*masm*/,
396                             tmp1 /* store_adr */,
397                             new_val /* new_val */,
398                             rthread /* thread */,
399                             tmp3 /* tmp */,
400                             tmp2 /* tmp2 */);
401     }
402   }
403   NOT_LP64(imasm->restore_bcp());
404 }
405 
406 #undef __
407 #define __ ce->masm()->
408 
409 void G1BarrierSetAssembler::gen_g1_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
410   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
411   // At this point we know that marking is in progress.
412   // If do_load() is true then we have to emit the
413   // load of the previous value; otherwise it has already
414   // been loaded into _pre_val.
415 
416   __ bind(*stub->entry());
417   assert(stub->pre_val()->is_register(), "Precondition.");
418 
419   Register pre_val_reg = stub->pre_val()->as_register();
420 
421   if (stub->do_load()) {
422     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
423   }
424 
425   __ cmpptr(pre_val_reg, (int32_t)NULL_WORD);
426   __ jcc(Assembler::equal, *stub->continuation());
427   ce->store_parameter(stub->pre_val()->as_register(), 0);
428   __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
429   __ jmp(*stub->continuation());
430 
431 }
432 
433 void G1BarrierSetAssembler::gen_g1_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
434   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
435   __ bind(*stub->entry());
436   assert(stub->addr()->is_register(), "Precondition.");
437   assert(stub->new_val()->is_register(), "Precondition.");
438   Register new_val_reg = stub->new_val()->as_register();
439   __ cmpptr(new_val_reg, (int32_t) NULL_WORD);
440   __ jcc(Assembler::equal, *stub->continuation());
441   ce->store_parameter(stub->addr()->as_pointer_register(), 0);
442   __ call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
443   __ jmp(*stub->continuation());
444 }
445 
446 #undef __
447 
448 #define __ sasm->
449 
450 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
451   __ prologue("g1_pre_barrier", false);
452   // arg0 : previous value of memory
453 
454   __ push(rax);
455   __ push(rdx);
456 
457   const Register pre_val = rax;
458   const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
459   const Register tmp = rdx;
460 
461   NOT_LP64(__ get_thread(thread);)
462 
463   Address queue_active(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
464   Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
465   Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
466 
467   Label done;
468   Label runtime;
469 
470   // Is marking still active?
471   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
472     __ cmpl(queue_active, 0);
473   } else {
474     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
475     __ cmpb(queue_active, 0);
476   }
477   __ jcc(Assembler::equal, done);
478 
479   // Can we store original value in the thread's buffer?
480 
481   __ movptr(tmp, queue_index);
482   __ testptr(tmp, tmp);
483   __ jcc(Assembler::zero, runtime);
484   __ subptr(tmp, wordSize);
485   __ movptr(queue_index, tmp);
486   __ addptr(tmp, buffer);
487 
488   // prev_val (rax)
489   __ load_parameter(0, pre_val);
490   __ movptr(Address(tmp, 0), pre_val);
491   __ jmp(done);
492 
493   __ bind(runtime);
494 
495   __ save_live_registers_no_oop_map(3, true);
496 
497   // load the pre-value
498   __ load_parameter(0, rcx);
499   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), rcx, thread);
500 
501   __ restore_live_registers(true);
502 
503   __ bind(done);
504 
505   __ pop(rdx);
506   __ pop(rax);
507 
508   __ epilogue();
509 }
510 
511 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
512   __ prologue("g1_post_barrier", false);
513 
514   // arg0: store_address
515   Address store_addr(rbp, 2*BytesPerWord);
516 
517   CardTableBarrierSet* ct =
518     barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
519   assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
520 
521   Label done;
522   Label enqueued;
523   Label runtime;
524 
525   // At this point we know new_value is non-NULL and the new_value crosses regions.
526   // Must check to see if card is already dirty
527 
528   const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
529 
530   Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
531   Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
532 
533   __ push(rax);
534   __ push(rcx);
535 
536   const Register cardtable = rax;
537   const Register card_addr = rcx;
538 
539   __ load_parameter(0, card_addr);
540   __ shrptr(card_addr, CardTable::card_shift);
541   // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
542   // a valid address and therefore is not properly handled by the relocation code.
543   __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
544   __ addptr(card_addr, cardtable);
545 
546   NOT_LP64(__ get_thread(thread);)
547 
548   __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
549   __ jcc(Assembler::equal, done);
550 
551   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
552   __ cmpb(Address(card_addr, 0), (int)CardTable::dirty_card_val());
553   __ jcc(Assembler::equal, done);
554 
555   // storing region crossing non-NULL, card is clean.
556   // dirty card and log.
557 
558   __ movb(Address(card_addr, 0), (int)CardTable::dirty_card_val());
559 
560   const Register tmp = rdx;
561   __ push(rdx);
562 
563   __ movptr(tmp, queue_index);
564   __ testptr(tmp, tmp);
565   __ jcc(Assembler::zero, runtime);
566   __ subptr(tmp, wordSize);
567   __ movptr(queue_index, tmp);
568   __ addptr(tmp, buffer);
569   __ movptr(Address(tmp, 0), card_addr);
570   __ jmp(enqueued);
571 
572   __ bind(runtime);
573 
574   __ save_live_registers_no_oop_map(3, true);
575 
576   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
577 
578   __ restore_live_registers(true);
579 
580   __ bind(enqueued);
581   __ pop(rdx);
582 
583   __ bind(done);
584   __ pop(rcx);
585   __ pop(rax);
586 
587   __ epilogue();
588 }
589 
590 #undef __