1 /*
  2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/macroAssembler.inline.hpp"
 27 #include "gc/g1/g1BarrierSet.hpp"
 28 #include "gc/g1/g1BarrierSetAssembler.hpp"
 29 #include "gc/g1/g1CardTable.hpp"
 30 #include "gc/g1/g1ThreadLocalData.hpp"
 31 #include "gc/g1/heapRegion.hpp"
 32 #include "interpreter/interp_masm.hpp"
 33 #include "runtime/sharedRuntime.hpp"
 34 #include "utilities/macros.hpp"
 35 #ifdef COMPILER1
 36 #include "c1/c1_LIRAssembler.hpp"
 37 #include "c1/c1_MacroAssembler.hpp"
 38 #include "gc/g1/c1/g1BarrierSetC1.hpp"
 39 #endif
 40 
 41 #define __ masm->
 42 
 43 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
 44                                                             Register addr, Register count) {
 45   bool dest_uninitialized = (decorators & AS_DEST_NOT_INITIALIZED) != 0;
 46 
 47   if (!dest_uninitialized) {
 48     Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
 49 #ifndef _LP64
 50     __ push(thread);
 51     __ get_thread(thread);
 52 #endif
 53 
 54     Label filtered;
 55     Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
 56     // Is marking active?
 57     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 58       __ cmpl(in_progress, 0);
 59     } else {
 60       assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 61       __ cmpb(in_progress, 0);
 62     }
 63 
 64     NOT_LP64(__ pop(thread);)
 65 
 66     __ jcc(Assembler::equal, filtered);
 67 
 68     __ pusha();                      // push registers
 69 #ifdef _LP64
 70     if (count == c_rarg0) {
 71       if (addr == c_rarg1) {
 72         // exactly backwards!!
 73         __ xchgptr(c_rarg1, c_rarg0);
 74       } else {
 75         __ movptr(c_rarg1, count);
 76         __ movptr(c_rarg0, addr);
 77       }
 78     } else {
 79       __ movptr(c_rarg0, addr);
 80       __ movptr(c_rarg1, count);
 81     }
 82     if (UseCompressedOops) {
 83       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_narrow_oop_entry), 2);
 84     } else {
 85       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry), 2);
 86     }
 87 #else
 88     __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry),
 89                     addr, count);
 90 #endif
 91     __ popa();
 92 
 93     __ bind(filtered);
 94   }
 95 }
 96 
 97 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 98                                                              Register addr, Register count, Register tmp) {
 99   __ pusha();             // push registers (overkill)
100 #ifdef _LP64
101   if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
102     assert_different_registers(c_rarg1, addr);
103     __ mov(c_rarg1, count);
104     __ mov(c_rarg0, addr);
105   } else {
106     assert_different_registers(c_rarg0, count);
107     __ mov(c_rarg0, addr);
108     __ mov(c_rarg1, count);
109   }
110   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry), 2);
111 #else
112   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry),
113                   addr, count);
114 #endif
115   __ popa();
116 }
117 
118 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
119                                     Register dst, Address src, Register tmp1, Register tmp_thread) {
120   bool on_oop = type == T_OBJECT || type == T_ARRAY;
121   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
122   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
123   bool on_reference = on_weak || on_phantom;
124   ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
125   if (on_oop && on_reference) {
126     const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
127     NOT_LP64(__ get_thread(thread));
128 
129     // Generate the G1 pre-barrier code to log the value of
130     // the referent field in an SATB buffer.
131     g1_write_barrier_pre(masm /* masm */,
132                          noreg /* obj */,
133                          dst /* pre_val */,
134                          thread /* thread */,
135                          tmp1 /* tmp */,
136                          true /* tosca_live */,
137                          true /* expand_call */);
138   }
139 }
140 
141 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
142                                                  Register obj,
143                                                  Register pre_val,
144                                                  Register thread,
145                                                  Register tmp,
146                                                  bool tosca_live,
147                                                  bool expand_call) {
148   // If expand_call is true then we expand the call_VM_leaf macro
149   // directly to skip generating the check by
150   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
151 
152 #ifdef _LP64
153   assert(thread == r15_thread, "must be");
154 #endif // _LP64
155 
156   Label done;
157   Label runtime;
158 
159   assert(pre_val != noreg, "check this code");
160 
161   if (obj != noreg) {
162     assert_different_registers(obj, pre_val, tmp);
163     assert(pre_val != rax, "check this code");
164   }
165 
166   Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
167   Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
168   Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
169 
170   // Is marking active?
171   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
172     __ cmpl(in_progress, 0);
173   } else {
174     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
175     __ cmpb(in_progress, 0);
176   }
177   __ jcc(Assembler::equal, done);
178 
179   // Do we need to load the previous value?
180   if (obj != noreg) {
181     __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
182   }
183 
184   // Is the previous value null?
185   __ cmpptr(pre_val, (int32_t) NULL_WORD);
186   __ jcc(Assembler::equal, done);
187 
188   // Can we store original value in the thread's buffer?
189   // Is index == 0?
190   // (The index field is typed as size_t.)
191 
192   __ movptr(tmp, index);                   // tmp := *index_adr
193   __ cmpptr(tmp, 0);                       // tmp == 0?
194   __ jcc(Assembler::equal, runtime);       // If yes, goto runtime
195 
196   __ subptr(tmp, wordSize);                // tmp := tmp - wordSize
197   __ movptr(index, tmp);                   // *index_adr := tmp
198   __ addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
199 
200   // Record the previous value
201   __ movptr(Address(tmp, 0), pre_val);
202   __ jmp(done);
203 
204   __ bind(runtime);
205   // save the live input values
206   if(tosca_live) __ push(rax);
207 
208   if (obj != noreg && obj != rax)
209     __ push(obj);
210 
211   if (pre_val != rax)
212     __ push(pre_val);
213 
214   // Calling the runtime using the regular call_VM_leaf mechanism generates
215   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
216   // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
217   //
218   // If we care generating the pre-barrier without a frame (e.g. in the
219   // intrinsified Reference.get() routine) then ebp might be pointing to
220   // the caller frame and so this check will most likely fail at runtime.
221   //
222   // Expanding the call directly bypasses the generation of the check.
223   // So when we do not have have a full interpreter frame on the stack
224   // expand_call should be passed true.
225 
226   NOT_LP64( __ push(thread); )
227 
228   if (expand_call) {
229     LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
230 #ifdef _LP64
231     if (c_rarg1 != thread) {
232       __ mov(c_rarg1, thread);
233     }
234     if (c_rarg0 != pre_val) {
235       __ mov(c_rarg0, pre_val);
236     }
237 #else
238     __ push(thread);
239     __ push(pre_val);
240 #endif
241     __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
242   } else {
243     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
244   }
245 
246   NOT_LP64( __ pop(thread); )
247 
248   // save the live input values
249   if (pre_val != rax)
250     __ pop(pre_val);
251 
252   if (obj != noreg && obj != rax)
253     __ pop(obj);
254 
255   if(tosca_live) __ pop(rax);
256 
257   __ bind(done);
258 }
259 
260 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
261                                                   Register store_addr,
262                                                   Register new_val,
263                                                   Register thread,
264                                                   Register tmp,
265                                                   Register tmp2) {
266 #ifdef _LP64
267   assert(thread == r15_thread, "must be");
268 #endif // _LP64
269 
270   Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
271   Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
272 
273   CardTableBarrierSet* ct =
274     barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
275   assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
276 
277   Label done;
278   Label runtime;
279 
280   // Does store cross heap regions?
281 
282   __ movptr(tmp, store_addr);
283   __ xorptr(tmp, new_val);
284   __ shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
285   __ jcc(Assembler::equal, done);
286 
287   // crosses regions, storing NULL?
288 
289   __ cmpptr(new_val, (int32_t) NULL_WORD);
290   __ jcc(Assembler::equal, done);
291 
292   // storing region crossing non-NULL, is card already dirty?
293 
294   const Register card_addr = tmp;
295   const Register cardtable = tmp2;
296 
297   __ movptr(card_addr, store_addr);
298   __ shrptr(card_addr, CardTable::card_shift);
299   // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
300   // a valid address and therefore is not properly handled by the relocation code.
301   __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
302   __ addptr(card_addr, cardtable);
303 
304   __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
305   __ jcc(Assembler::equal, done);
306 
307   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
308   __ cmpb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
309   __ jcc(Assembler::equal, done);
310 
311 
312   // storing a region crossing, non-NULL oop, card is clean.
313   // dirty card and log.
314 
315   __ movb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
316 
317   __ cmpl(queue_index, 0);
318   __ jcc(Assembler::equal, runtime);
319   __ subl(queue_index, wordSize);
320   __ movptr(tmp2, buffer);
321 #ifdef _LP64
322   __ movslq(rscratch1, queue_index);
323   __ addq(tmp2, rscratch1);
324   __ movq(Address(tmp2, 0), card_addr);
325 #else
326   __ addl(tmp2, queue_index);
327   __ movl(Address(tmp2, 0), card_addr);
328 #endif
329   __ jmp(done);
330 
331   __ bind(runtime);
332   // save the live input values
333   __ push(store_addr);
334   __ push(new_val);
335 #ifdef _LP64
336   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
337 #else
338   __ push(thread);
339   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
340   __ pop(thread);
341 #endif
342   __ pop(new_val);
343   __ pop(store_addr);
344 
345   __ bind(done);
346 }
347 
348 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
349                                          Address dst, Register val, Register tmp1, Register tmp2) {
350   bool in_heap = (decorators & IN_HEAP) != 0;
351   bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
352 
353   bool needs_pre_barrier = in_heap || in_concurrent_root;
354   bool needs_post_barrier = val != noreg && in_heap;
355 
356   Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi);
357   Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
358   // flatten object address if needed
359   // We do it regardless of precise because we need the registers
360   if (dst.index() == noreg && dst.disp() == 0) {
361     if (dst.base() != tmp1) {
362       __ movptr(tmp1, dst.base());
363     }
364   } else {
365     __ lea(tmp1, dst);
366   }
367 
368 #ifndef _LP64
369   InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm);
370 #endif
371 
372   NOT_LP64(__ get_thread(rcx));
373   NOT_LP64(imasm->save_bcp());
374 
375   if (needs_pre_barrier) {
376     g1_write_barrier_pre(masm /*masm*/,
377                          tmp1 /* obj */,
378                          tmp2 /* pre_val */,
379                          rthread /* thread */,
380                          tmp3  /* tmp */,
381                          val != noreg /* tosca_live */,
382                          false /* expand_call */);
383   }
384   if (val == noreg) {
385     BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
386   } else {
387     Register new_val = val;
388     if (needs_post_barrier) {
389       // G1 barrier needs uncompressed oop for region cross check.
390       if (UseCompressedOops) {
391         new_val = tmp2;
392         __ movptr(new_val, val);
393       }
394     }
395     BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
396     if (needs_post_barrier) {
397       g1_write_barrier_post(masm /*masm*/,
398                             tmp1 /* store_adr */,
399                             new_val /* new_val */,
400                             rthread /* thread */,
401                             tmp3 /* tmp */,
402                             tmp2 /* tmp2 */);
403     }
404   }
405   NOT_LP64(imasm->restore_bcp());
406 }
407 
408 #ifdef COMPILER1
409 
410 #undef __
411 #define __ ce->masm()->
412 
413 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
414   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
415   // At this point we know that marking is in progress.
416   // If do_load() is true then we have to emit the
417   // load of the previous value; otherwise it has already
418   // been loaded into _pre_val.
419 
420   __ bind(*stub->entry());
421   assert(stub->pre_val()->is_register(), "Precondition.");
422 
423   Register pre_val_reg = stub->pre_val()->as_register();
424 
425   if (stub->do_load()) {
426     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
427   }
428 
429   __ cmpptr(pre_val_reg, (int32_t)NULL_WORD);
430   __ jcc(Assembler::equal, *stub->continuation());
431   ce->store_parameter(stub->pre_val()->as_register(), 0);
432   __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
433   __ jmp(*stub->continuation());
434 
435 }
436 
437 void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
438   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
439   __ bind(*stub->entry());
440   assert(stub->addr()->is_register(), "Precondition.");
441   assert(stub->new_val()->is_register(), "Precondition.");
442   Register new_val_reg = stub->new_val()->as_register();
443   __ cmpptr(new_val_reg, (int32_t) NULL_WORD);
444   __ jcc(Assembler::equal, *stub->continuation());
445   ce->store_parameter(stub->addr()->as_pointer_register(), 0);
446   __ call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
447   __ jmp(*stub->continuation());
448 }
449 
450 #undef __
451 
452 #define __ sasm->
453 
454 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
455   __ prologue("g1_pre_barrier", false);
456   // arg0 : previous value of memory
457 
458   __ push(rax);
459   __ push(rdx);
460 
461   const Register pre_val = rax;
462   const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
463   const Register tmp = rdx;
464 
465   NOT_LP64(__ get_thread(thread);)
466 
467   Address queue_active(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
468   Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
469   Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
470 
471   Label done;
472   Label runtime;
473 
474   // Is marking still active?
475   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
476     __ cmpl(queue_active, 0);
477   } else {
478     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
479     __ cmpb(queue_active, 0);
480   }
481   __ jcc(Assembler::equal, done);
482 
483   // Can we store original value in the thread's buffer?
484 
485   __ movptr(tmp, queue_index);
486   __ testptr(tmp, tmp);
487   __ jcc(Assembler::zero, runtime);
488   __ subptr(tmp, wordSize);
489   __ movptr(queue_index, tmp);
490   __ addptr(tmp, buffer);
491 
492   // prev_val (rax)
493   __ load_parameter(0, pre_val);
494   __ movptr(Address(tmp, 0), pre_val);
495   __ jmp(done);
496 
497   __ bind(runtime);
498 
499   __ save_live_registers_no_oop_map(true);
500 
501   // load the pre-value
502   __ load_parameter(0, rcx);
503   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), rcx, thread);
504 
505   __ restore_live_registers(true);
506 
507   __ bind(done);
508 
509   __ pop(rdx);
510   __ pop(rax);
511 
512   __ epilogue();
513 }
514 
515 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
516   __ prologue("g1_post_barrier", false);
517 
518   // arg0: store_address
519   Address store_addr(rbp, 2*BytesPerWord);
520 
521   CardTableBarrierSet* ct =
522     barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
523   assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
524 
525   Label done;
526   Label enqueued;
527   Label runtime;
528 
529   // At this point we know new_value is non-NULL and the new_value crosses regions.
530   // Must check to see if card is already dirty
531 
532   const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
533 
534   Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
535   Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
536 
537   __ push(rax);
538   __ push(rcx);
539 
540   const Register cardtable = rax;
541   const Register card_addr = rcx;
542 
543   __ load_parameter(0, card_addr);
544   __ shrptr(card_addr, CardTable::card_shift);
545   // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
546   // a valid address and therefore is not properly handled by the relocation code.
547   __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
548   __ addptr(card_addr, cardtable);
549 
550   NOT_LP64(__ get_thread(thread);)
551 
552   __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
553   __ jcc(Assembler::equal, done);
554 
555   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
556   __ cmpb(Address(card_addr, 0), (int)CardTable::dirty_card_val());
557   __ jcc(Assembler::equal, done);
558 
559   // storing region crossing non-NULL, card is clean.
560   // dirty card and log.
561 
562   __ movb(Address(card_addr, 0), (int)CardTable::dirty_card_val());
563 
564   const Register tmp = rdx;
565   __ push(rdx);
566 
567   __ movptr(tmp, queue_index);
568   __ testptr(tmp, tmp);
569   __ jcc(Assembler::zero, runtime);
570   __ subptr(tmp, wordSize);
571   __ movptr(queue_index, tmp);
572   __ addptr(tmp, buffer);
573   __ movptr(Address(tmp, 0), card_addr);
574   __ jmp(enqueued);
575 
576   __ bind(runtime);
577 
578   __ save_live_registers_no_oop_map(true);
579 
580   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
581 
582   __ restore_live_registers(true);
583 
584   __ bind(enqueued);
585   __ pop(rdx);
586 
587   __ bind(done);
588   __ pop(rcx);
589   __ pop(rax);
590 
591   __ epilogue();
592 }
593 
594 #undef __
595 
596 #endif // COMPILER1