1 /*
  2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/macroAssembler.inline.hpp"
 27 #include "c1/c1_LIRAssembler.hpp"
 28 #include "c1/c1_MacroAssembler.hpp"
 29 #include "gc/g1/c1/g1BarrierSetC1.hpp"
 30 #include "gc/g1/g1BarrierSet.hpp"
 31 #include "gc/g1/g1BarrierSetAssembler.hpp"
 32 #include "gc/g1/g1CardTable.hpp"
 33 #include "gc/g1/g1ThreadLocalData.hpp"
 34 #include "gc/g1/heapRegion.hpp"
 35 #include "interpreter/interp_masm.hpp"
 36 #include "runtime/sharedRuntime.hpp"
 37 #include "utilities/macros.hpp"
 38 
 39 #define __ masm->
 40 
 41 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
 42                                                             Register addr, Register count) {
 43   bool dest_uninitialized = (decorators & AS_DEST_NOT_INITIALIZED) != 0;
 44   // With G1, don't generate the call if we statically know that the target in uninitialized
 45   if (!dest_uninitialized) {
 46     Register tmp = O5;
 47     assert_different_registers(addr, count, tmp);
 48     Label filtered;
 49     // Is marking active?
 50     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 51       __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
 52     } else {
 53       guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 54       __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
 55     }
 56     // Is marking active?
 57     __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 58 
 59     __ save_frame(0);
 60     // Save the necessary global regs... will be used after.
 61     if (addr->is_global()) {
 62       __ mov(addr, L0);
 63     }
 64     if (count->is_global()) {
 65       __ mov(count, L1);
 66     }
 67     __ mov(addr->after_save(), O0);
 68     // Get the count into O1
 69     address slowpath = UseCompressedOops ? CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_narrow_oop_entry)
 70                                          : CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry);
 71     __ call(slowpath);
 72     __ delayed()->mov(count->after_save(), O1);
 73     if (addr->is_global()) {
 74       __ mov(L0, addr);
 75     }
 76     if (count->is_global()) {
 77       __ mov(L1, count);
 78     }
 79     __ restore();
 80 
 81     __ bind(filtered);
 82     DEBUG_ONLY(__ set(0xDEADC0DE, tmp);) // we have killed tmp
 83   }
 84 }
 85 
 86 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 87                                                              Register addr, Register count, Register tmp) {
 88   // Get some new fresh output registers.
 89   __ save_frame(0);
 90   __ mov(addr->after_save(), O0);
 91   __ call(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry));
 92   __ delayed()->mov(count->after_save(), O1);
 93   __ restore();
 94 }
 95 
 96 #undef __
 97 
 98 static address satb_log_enqueue_with_frame = NULL;
 99 static u_char* satb_log_enqueue_with_frame_end = NULL;
100 
101 static address satb_log_enqueue_frameless = NULL;
102 static u_char* satb_log_enqueue_frameless_end = NULL;
103 
104 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
105 
106 static void generate_satb_log_enqueue(bool with_frame) {
107   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
108   CodeBuffer buf(bb);
109   MacroAssembler masm(&buf);
110 
111 #define __ masm.
112 
113   address start = __ pc();
114   Register pre_val;
115 
116   Label refill, restart;
117   if (with_frame) {
118     __ save_frame(0);
119     pre_val = I0;  // Was O0 before the save.
120   } else {
121     pre_val = O0;
122   }
123 
124   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
125   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
126 
127   assert(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t) &&
128          in_bytes(SATBMarkQueue::byte_width_of_buf()) == sizeof(intptr_t),
129          "check sizes in assembly below");
130 
131   __ bind(restart);
132 
133   // Load the index into the SATB buffer. SATBMarkQueue::_index is a size_t
134   // so ld_ptr is appropriate.
135   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
136 
137   // index == 0?
138   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
139 
140   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
141   __ sub(L0, oopSize, L0);
142 
143   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
144   if (!with_frame) {
145     // Use return-from-leaf
146     __ retl();
147     __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
148   } else {
149     // Not delayed.
150     __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
151   }
152   if (with_frame) {
153     __ ret();
154     __ delayed()->restore();
155   }
156   __ bind(refill);
157 
158   address handle_zero =
159     CAST_FROM_FN_PTR(address,
160                      &SATBMarkQueueSet::handle_zero_index_for_thread);
161   // This should be rare enough that we can afford to save all the
162   // scratch registers that the calling context might be using.
163   __ mov(G1_scratch, L0);
164   __ mov(G3_scratch, L1);
165   __ mov(G4, L2);
166   // We need the value of O0 above (for the write into the buffer), so we
167   // save and restore it.
168   __ mov(O0, L3);
169   // Since the call will overwrite O7, we save and restore that, as well.
170   __ mov(O7, L4);
171   __ call_VM_leaf(L5, handle_zero, G2_thread);
172   __ mov(L0, G1_scratch);
173   __ mov(L1, G3_scratch);
174   __ mov(L2, G4);
175   __ mov(L3, O0);
176   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
177   __ delayed()->mov(L4, O7);
178 
179   if (with_frame) {
180     satb_log_enqueue_with_frame = start;
181     satb_log_enqueue_with_frame_end = __ pc();
182   } else {
183     satb_log_enqueue_frameless = start;
184     satb_log_enqueue_frameless_end = __ pc();
185   }
186 
187 #undef __
188 }
189 
190 #define __ masm->
191 
192 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
193                                                  Register obj,
194                                                  Register index,
195                                                  int offset,
196                                                  Register pre_val,
197                                                  Register tmp,
198                                                  bool preserve_o_regs) {
199   Label filtered;
200 
201   if (obj == noreg) {
202     // We are not loading the previous value so make
203     // sure that we don't trash the value in pre_val
204     // with the code below.
205     assert_different_registers(pre_val, tmp);
206   } else {
207     // We will be loading the previous value
208     // in this code so...
209     assert(offset == 0 || index == noreg, "choose one");
210     assert(pre_val == noreg, "check this code");
211   }
212 
213   // Is marking active?
214   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
215     __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
216   } else {
217     guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
218     __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
219   }
220 
221   // Is marking active?
222   __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
223 
224   // Do we need to load the previous value?
225   if (obj != noreg) {
226     // Load the previous value...
227     if (index == noreg) {
228       if (Assembler::is_simm13(offset)) {
229         __ load_heap_oop(obj, offset, tmp);
230       } else {
231         __ set(offset, tmp);
232         __ load_heap_oop(obj, tmp, tmp);
233       }
234     } else {
235       __ load_heap_oop(obj, index, tmp);
236     }
237     // Previous value has been loaded into tmp
238     pre_val = tmp;
239   }
240 
241   assert(pre_val != noreg, "must have a real register");
242 
243   // Is the previous value null?
244   __ cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
245 
246   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
247   // case, pre_val will be a scratch G-reg, but there are some cases in
248   // which it's an O-reg.  In the first case, do a normal call.  In the
249   // latter, do a save here and call the frameless version.
250 
251   guarantee(pre_val->is_global() || pre_val->is_out(),
252             "Or we need to think harder.");
253 
254   if (pre_val->is_global() && !preserve_o_regs) {
255     __ call(satb_log_enqueue_with_frame);
256     __ delayed()->mov(pre_val, O0);
257   } else {
258     __ save_frame(0);
259     __ call(satb_log_enqueue_frameless);
260     __ delayed()->mov(pre_val->after_save(), O0);
261     __ restore();
262   }
263 
264   __ bind(filtered);
265 }
266 
267 #undef __
268 
269 static address dirty_card_log_enqueue = 0;
270 static u_char* dirty_card_log_enqueue_end = 0;
271 
272 // This gets to assume that o0 contains the object address.
273 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
274   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
275   CodeBuffer buf(bb);
276   MacroAssembler masm(&buf);
277 #define __ masm.
278   address start = __ pc();
279 
280   Label not_already_dirty, restart, refill, young_card;
281 
282   __ srlx(O0, CardTable::card_shift, O0);
283   AddressLiteral addrlit(byte_map_base);
284   __ set(addrlit, O1); // O1 := <card table base>
285   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
286 
287   __ cmp_and_br_short(O2, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
288 
289   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
290   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
291 
292   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
293   __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
294 
295   __ bind(young_card);
296   // We didn't take the branch, so we're already dirty: return.
297   // Use return-from-leaf
298   __ retl();
299   __ delayed()->nop();
300 
301   // Not dirty.
302   __ bind(not_already_dirty);
303 
304   // Get O0 + O1 into a reg by itself
305   __ add(O0, O1, O3);
306 
307   // First, dirty it.
308   __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
309 
310   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
311   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
312   __ bind(restart);
313 
314   // Load the index into the update buffer. DirtyCardQueue::_index is
315   // a size_t so ld_ptr is appropriate here.
316   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
317 
318   // index == 0?
319   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
320 
321   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
322   __ sub(L0, oopSize, L0);
323 
324   __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
325   // Use return-from-leaf
326   __ retl();
327   __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
328 
329   __ bind(refill);
330   address handle_zero =
331     CAST_FROM_FN_PTR(address,
332                      &DirtyCardQueueSet::handle_zero_index_for_thread);
333   // This should be rare enough that we can afford to save all the
334   // scratch registers that the calling context might be using.
335   __ mov(G1_scratch, L3);
336   __ mov(G3_scratch, L5);
337   // We need the value of O3 above (for the write into the buffer), so we
338   // save and restore it.
339   __ mov(O3, L6);
340   // Since the call will overwrite O7, we save and restore that, as well.
341   __ mov(O7, L4);
342 
343   __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
344   __ mov(L3, G1_scratch);
345   __ mov(L5, G3_scratch);
346   __ mov(L6, O3);
347   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
348   __ delayed()->mov(L4, O7);
349 
350   dirty_card_log_enqueue = start;
351   dirty_card_log_enqueue_end = __ pc();
352   // XXX Should have a guarantee here about not going off the end!
353   // Does it already do so?  Do an experiment...
354 
355 #undef __
356 
357 }
358 
359 #define __ masm->
360 
361 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) {
362   Label filtered;
363   MacroAssembler* post_filter_masm = masm;
364 
365   if (new_val == G0) return;
366 
367   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
368 
369   if (G1RSBarrierRegionFilter) {
370     __ xor3(store_addr, new_val, tmp);
371     __ srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
372 
373     __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
374   }
375 
376   // If the "store_addr" register is an "in" or "local" register, move it to
377   // a scratch reg so we can pass it as an argument.
378   bool use_scr = !(store_addr->is_global() || store_addr->is_out());
379   // Pick a scratch register different from "tmp".
380   Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
381   // Make sure we use up the delay slot!
382   if (use_scr) {
383     post_filter_masm->mov(store_addr, scr);
384   } else {
385     post_filter_masm->nop();
386   }
387   __ save_frame(0);
388   __ call(dirty_card_log_enqueue);
389   if (use_scr) {
390     __ delayed()->mov(scr, O0);
391   } else {
392     __ delayed()->mov(store_addr->after_save(), O0);
393   }
394   __ restore();
395 
396   __ bind(filtered);
397 }
398 
399 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
400                                          Register val, Address dst, Register tmp) {
401   bool in_heap = (decorators & IN_HEAP) != 0;
402   bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
403 
404   bool needs_pre_barrier = in_heap || in_concurrent_root;
405   // No need for post barrier if storing NULL
406   bool needs_post_barrier = val != G0 && in_heap;
407 
408   bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
409   bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
410   bool precise = on_array || on_anonymous;
411 
412   Register index = dst.has_index() ? dst.index() : noreg;
413   int disp = dst.has_disp() ? dst.disp() : 0;
414 
415   if (needs_pre_barrier) {
416     // Load and record the previous value.
417     g1_write_barrier_pre(masm, dst.base(), index, disp,
418                          noreg /* pre_val */,
419                          tmp, true /*preserve_o_regs*/);
420   }
421 
422   Register new_val = val;
423   if (needs_post_barrier) {
424     // G1 barrier needs uncompressed oop for region cross check.
425     if (UseCompressedOops && val != G0) {
426       new_val = tmp;
427       __ mov(val, new_val);
428     }
429   }
430 
431   BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp);
432 
433   if (needs_post_barrier) {
434     Register base = dst.base();
435     if (precise) {
436       if (!dst.has_index()) {
437         __ add(base, disp, base);
438       } else {
439         assert(!dst.has_disp(), "not supported yet");
440         __ add(base, index, base);
441       }
442     }
443     g1_write_barrier_post(masm, base, new_val, tmp);
444   }
445 }
446 
447 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
448                                     Address src, Register dst, Register tmp) {
449   bool on_oop = type == T_OBJECT || type == T_ARRAY;
450   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
451   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
452   bool on_reference = on_weak || on_phantom;
453   // Load the value of the referent field.
454   ModRefBarrierSetAssembler::load_at(masm, decorators, type, src, dst, tmp);
455   if (on_oop && on_reference) {
456     // Generate the G1 pre-barrier code to log the value of
457     // the referent field in an SATB buffer. Note with
458     // these parameters the pre-barrier does not generate
459     // the load of the previous value
460 
461     Register pre_val = dst;
462     bool saved = false;
463     if (pre_val->is_in()) {
464       // The g1_write_barrier_pre method assumes that the pre_val
465       // is not in an input register.
466       __ save_frame_and_mov(0, pre_val, O0);
467       pre_val = O0;
468       saved = true;
469     }
470 
471     g1_write_barrier_pre(masm, noreg /* obj */, noreg /* index */, 0 /* offset */,
472                          pre_val /* pre_val */,
473                          tmp /* tmp */,
474                          true /* preserve_o_regs */);
475 
476     if (saved) {
477       __ restore();
478     }
479   }
480 }
481 
482 void G1BarrierSetAssembler::barrier_stubs_init() {
483   if (dirty_card_log_enqueue == 0) {
484     G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
485     CardTable *ct = bs->card_table();
486     generate_dirty_card_log_enqueue(ct->byte_map_base());
487     assert(dirty_card_log_enqueue != 0, "postcondition.");
488   }
489   if (satb_log_enqueue_with_frame == 0) {
490     generate_satb_log_enqueue(true);
491     assert(satb_log_enqueue_with_frame != 0, "postcondition.");
492   }
493   if (satb_log_enqueue_frameless == 0) {
494     generate_satb_log_enqueue(false);
495     assert(satb_log_enqueue_frameless != 0, "postcondition.");
496   }
497 }
498 
499 #ifdef COMPILER1
500 
501 #undef __
502 #define __ ce->masm()->
503 
504 void G1BarrierSetAssembler::gen_g1_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
505   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
506   // At this point we know that marking is in progress.
507   // If do_load() is true then we have to emit the
508   // load of the previous value; otherwise it has already
509   // been loaded into _pre_val.
510 
511   __ bind(*stub->entry());
512 
513   assert(stub->pre_val()->is_register(), "Precondition.");
514   Register pre_val_reg = stub->pre_val()->as_register();
515 
516   if (stub->do_load()) {
517     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
518   }
519 
520   if (__ is_in_wdisp16_range(*stub->continuation())) {
521     __ br_null(pre_val_reg, /*annul*/false, Assembler::pt, *stub->continuation());
522   } else {
523     __ cmp(pre_val_reg, G0);
524     __ brx(Assembler::equal, false, Assembler::pn, *stub->continuation());
525   }
526   __ delayed()->nop();
527 
528   __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin());
529   __ delayed()->mov(pre_val_reg, G4);
530   __ br(Assembler::always, false, Assembler::pt, *stub->continuation());
531   __ delayed()->nop();
532 }
533 
534 void G1BarrierSetAssembler::gen_g1_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
535   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
536   __ bind(*stub->entry());
537 
538   assert(stub->addr()->is_register(), "Precondition.");
539   assert(stub->new_val()->is_register(), "Precondition.");
540   Register addr_reg = stub->addr()->as_pointer_register();
541   Register new_val_reg = stub->new_val()->as_register();
542 
543   if (__ is_in_wdisp16_range(*stub->continuation())) {
544     __ br_null(new_val_reg, /*annul*/false, Assembler::pt, *stub->continuation());
545   } else {
546     __ cmp(new_val_reg, G0);
547     __ brx(Assembler::equal, false, Assembler::pn, *stub->continuation());
548   }
549   __ delayed()->nop();
550 
551   __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin());
552   __ delayed()->mov(addr_reg, G4);
553   __ br(Assembler::always, false, Assembler::pt, *stub->continuation());
554   __ delayed()->nop();
555 }
556 
557 #undef __
558 #define __ sasm->
559 
560 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
561   __ prologue("g1_pre_barrier", false);
562 
563   // G4: previous value of memory
564 
565   Register pre_val = G4;
566   Register tmp  = G1_scratch;
567   Register tmp2 = G3_scratch;
568 
569   Label refill, restart;
570   int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
571   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
572   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
573 
574   // Is marking still active?
575   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
576     __ ld(G2_thread, satb_q_active_byte_offset, tmp);
577   } else {
578     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
579     __ ldsb(G2_thread, satb_q_active_byte_offset, tmp);
580   }
581   __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, restart);
582   __ retl();
583   __ delayed()->nop();
584 
585   __ bind(restart);
586   // Load the index into the SATB buffer. SATBMarkQueue::_index is a
587   // size_t so ld_ptr is appropriate
588   __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
589 
590   // index == 0?
591   __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill);
592 
593   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
594   __ sub(tmp, oopSize, tmp);
595 
596   __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
597   // Use return-from-leaf
598   __ retl();
599   __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);
600 
601   __ bind(refill);
602 
603   __ save_live_registers_no_oop_map(true);
604 
605   __ call_VM_leaf(L7_thread_cache,
606                   CAST_FROM_FN_PTR(address,
607                                    SATBMarkQueueSet::handle_zero_index_for_thread),
608                                    G2_thread);
609 
610   __ restore_live_registers(true);
611 
612   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
613   __ epilogue();
614 }
615 
616 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
617   __ prologue("g1_post_barrier", false);
618 
619   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
620 
621   Register addr = G4;
622   Register cardtable = G5;
623   Register tmp  = G1_scratch;
624   Register tmp2 = G3_scratch;
625   jbyte* byte_map_base = bs->card_table()->byte_map_base();
626 
627   Label not_already_dirty, restart, refill, young_card;
628 
629 #ifdef _LP64
630   __ srlx(addr, CardTable::card_shift, addr);
631 #else
632   __ srl(addr, CardTable::card_shift, addr);
633 #endif
634 
635   AddressLiteral rs((address)byte_map_base);
636   __ set(rs, cardtable);         // cardtable := <card table base>
637   __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
638 
639   __ cmp_and_br_short(tmp, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
640 
641   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
642   __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
643 
644   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
645   __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
646 
647   __ bind(young_card);
648   // We didn't take the branch, so we're already dirty: return.
649   // Use return-from-leaf
650   __ retl();
651   __ delayed()->nop();
652 
653   // Not dirty.
654   __ bind(not_already_dirty);
655 
656   // Get cardtable + tmp into a reg by itself
657   __ add(addr, cardtable, tmp2);
658 
659   // First, dirty it.
660   __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).
661 
662   Register tmp3 = cardtable;
663   Register tmp4 = tmp;
664 
665   // these registers are now dead
666   addr = cardtable = tmp = noreg;
667 
668   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
669   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
670 
671   __ bind(restart);
672 
673   // Get the index into the update buffer. DirtyCardQueue::_index is
674   // a size_t so ld_ptr is appropriate here.
675   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
676 
677   // index == 0?
678   __ cmp_and_brx_short(tmp3, G0, Assembler::equal,  Assembler::pn, refill);
679 
680   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
681   __ sub(tmp3, oopSize, tmp3);
682 
683   __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
684   // Use return-from-leaf
685   __ retl();
686   __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);
687 
688   __ bind(refill);
689 
690   __ save_live_registers_no_oop_map(true);
691 
692   __ call_VM_leaf(L7_thread_cache,
693                   CAST_FROM_FN_PTR(address,
694                                    DirtyCardQueueSet::handle_zero_index_for_thread),
695                                    G2_thread);
696 
697   __ restore_live_registers(true);
698 
699   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
700   __ epilogue();
701 }
702 
703 #undef __
704 
705 #endif // COMPILER1