1 /*
  2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/macroAssembler.inline.hpp"
 27 #include "gc/g1/g1BarrierSet.hpp"
 28 #include "gc/g1/g1BarrierSetAssembler.hpp"
 29 #include "gc/g1/g1BarrierSetRuntime.hpp"
 30 #include "gc/g1/g1CardTable.hpp"
 31 #include "gc/g1/g1ThreadLocalData.hpp"
 32 #include "gc/g1/heapRegion.hpp"
 33 #include "interpreter/interp_masm.hpp"
 34 #include "runtime/sharedRuntime.hpp"
 35 #include "utilities/macros.hpp"
 36 #ifdef COMPILER1
 37 #include "c1/c1_LIRAssembler.hpp"
 38 #include "c1/c1_MacroAssembler.hpp"
 39 #include "gc/g1/c1/g1BarrierSetC1.hpp"
 40 #endif
 41 
 42 #define __ masm->
 43 
 44 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
 45                                                             Register addr, Register count) {
 46   bool dest_uninitialized = (decorators & AS_DEST_NOT_INITIALIZED) != 0;
 47   // With G1, don't generate the call if we statically know that the target in uninitialized
 48   if (!dest_uninitialized) {
 49     Register tmp = O5;
 50     assert_different_registers(addr, count, tmp);
 51     Label filtered;
 52     // Is marking active?
 53     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 54       __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
 55     } else {
 56       guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 57       __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
 58     }
 59     // Is marking active?
 60     __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 61 
 62     __ save_frame(0);
 63     // Save the necessary global regs... will be used after.
 64     if (addr->is_global()) {
 65       __ mov(addr, L0);
 66     }
 67     if (count->is_global()) {
 68       __ mov(count, L1);
 69     }
 70     __ mov(addr->after_save(), O0);
 71     // Get the count into O1
 72     address slowpath = UseCompressedOops ? CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry)
 73                                          : CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry);
 74     __ call(slowpath);
 75     __ delayed()->mov(count->after_save(), O1);
 76     if (addr->is_global()) {
 77       __ mov(L0, addr);
 78     }
 79     if (count->is_global()) {
 80       __ mov(L1, count);
 81     }
 82     __ restore();
 83 
 84     __ bind(filtered);
 85     DEBUG_ONLY(__ set(0xDEADC0DE, tmp);) // we have killed tmp
 86   }
 87 }
 88 
 89 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 90                                                              Register addr, Register count, Register tmp) {
 91   // Get some new fresh output registers.
 92   __ save_frame(0);
 93   __ mov(addr->after_save(), O0);
 94   __ call(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry));
 95   __ delayed()->mov(count->after_save(), O1);
 96   __ restore();
 97 }
 98 
 99 #undef __
100 
101 static address satb_log_enqueue_with_frame = NULL;
102 static u_char* satb_log_enqueue_with_frame_end = NULL;
103 
104 static address satb_log_enqueue_frameless = NULL;
105 static u_char* satb_log_enqueue_frameless_end = NULL;
106 
107 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
108 
109 static void generate_satb_log_enqueue(bool with_frame) {
110   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
111   CodeBuffer buf(bb);
112   MacroAssembler masm(&buf);
113 
114 #define __ masm.
115 
116   address start = __ pc();
117   Register pre_val;
118 
119   Label refill, restart;
120   if (with_frame) {
121     __ save_frame(0);
122     pre_val = I0;  // Was O0 before the save.
123   } else {
124     pre_val = O0;
125   }
126 
127   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
128   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
129 
130   assert(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t) &&
131          in_bytes(SATBMarkQueue::byte_width_of_buf()) == sizeof(intptr_t),
132          "check sizes in assembly below");
133 
134   __ bind(restart);
135 
136   // Load the index into the SATB buffer. SATBMarkQueue::_index is a size_t
137   // so ld_ptr is appropriate.
138   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
139 
140   // index == 0?
141   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
142 
143   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
144   __ sub(L0, oopSize, L0);
145 
146   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
147   if (!with_frame) {
148     // Use return-from-leaf
149     __ retl();
150     __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
151   } else {
152     // Not delayed.
153     __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
154   }
155   if (with_frame) {
156     __ ret();
157     __ delayed()->restore();
158   }
159   __ bind(refill);
160 
161   address handle_zero =
162     CAST_FROM_FN_PTR(address,
163                      &SATBMarkQueueSet::handle_zero_index_for_thread);
164   // This should be rare enough that we can afford to save all the
165   // scratch registers that the calling context might be using.
166   __ mov(G1_scratch, L0);
167   __ mov(G3_scratch, L1);
168   __ mov(G4, L2);
169   // We need the value of O0 above (for the write into the buffer), so we
170   // save and restore it.
171   __ mov(O0, L3);
172   // Since the call will overwrite O7, we save and restore that, as well.
173   __ mov(O7, L4);
174   __ call_VM_leaf(L5, handle_zero, G2_thread);
175   __ mov(L0, G1_scratch);
176   __ mov(L1, G3_scratch);
177   __ mov(L2, G4);
178   __ mov(L3, O0);
179   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
180   __ delayed()->mov(L4, O7);
181 
182   if (with_frame) {
183     satb_log_enqueue_with_frame = start;
184     satb_log_enqueue_with_frame_end = __ pc();
185   } else {
186     satb_log_enqueue_frameless = start;
187     satb_log_enqueue_frameless_end = __ pc();
188   }
189 
190 #undef __
191 }
192 
193 #define __ masm->
194 
195 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
196                                                  Register obj,
197                                                  Register index,
198                                                  int offset,
199                                                  Register pre_val,
200                                                  Register tmp,
201                                                  bool preserve_o_regs) {
202   Label filtered;
203 
204   if (obj == noreg) {
205     // We are not loading the previous value so make
206     // sure that we don't trash the value in pre_val
207     // with the code below.
208     assert_different_registers(pre_val, tmp);
209   } else {
210     // We will be loading the previous value
211     // in this code so...
212     assert(offset == 0 || index == noreg, "choose one");
213     assert(pre_val == noreg, "check this code");
214   }
215 
216   // Is marking active?
217   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
218     __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
219   } else {
220     guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
221     __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
222   }
223 
224   // Is marking active?
225   __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
226 
227   // Do we need to load the previous value?
228   if (obj != noreg) {
229     // Load the previous value...
230     if (index == noreg) {
231       if (Assembler::is_simm13(offset)) {
232         __ load_heap_oop(obj, offset, tmp);
233       } else {
234         __ set(offset, tmp);
235         __ load_heap_oop(obj, tmp, tmp);
236       }
237     } else {
238       __ load_heap_oop(obj, index, tmp);
239     }
240     // Previous value has been loaded into tmp
241     pre_val = tmp;
242   }
243 
244   assert(pre_val != noreg, "must have a real register");
245 
246   // Is the previous value null?
247   __ cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
248 
249   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
250   // case, pre_val will be a scratch G-reg, but there are some cases in
251   // which it's an O-reg.  In the first case, do a normal call.  In the
252   // latter, do a save here and call the frameless version.
253 
254   guarantee(pre_val->is_global() || pre_val->is_out(),
255             "Or we need to think harder.");
256 
257   if (pre_val->is_global() && !preserve_o_regs) {
258     __ call(satb_log_enqueue_with_frame);
259     __ delayed()->mov(pre_val, O0);
260   } else {
261     __ save_frame(0);
262     __ call(satb_log_enqueue_frameless);
263     __ delayed()->mov(pre_val->after_save(), O0);
264     __ restore();
265   }
266 
267   __ bind(filtered);
268 }
269 
270 #undef __
271 
272 static address dirty_card_log_enqueue = 0;
273 static u_char* dirty_card_log_enqueue_end = 0;
274 
275 // This gets to assume that o0 contains the object address.
276 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
277   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
278   CodeBuffer buf(bb);
279   MacroAssembler masm(&buf);
280 #define __ masm.
281   address start = __ pc();
282 
283   Label not_already_dirty, restart, refill, young_card;
284 
285   __ srlx(O0, CardTable::card_shift, O0);
286   AddressLiteral addrlit(byte_map_base);
287   __ set(addrlit, O1); // O1 := <card table base>
288   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
289 
290   __ cmp_and_br_short(O2, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
291 
292   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
293   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
294 
295   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
296   __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
297 
298   __ bind(young_card);
299   // We didn't take the branch, so we're already dirty: return.
300   // Use return-from-leaf
301   __ retl();
302   __ delayed()->nop();
303 
304   // Not dirty.
305   __ bind(not_already_dirty);
306 
307   // Get O0 + O1 into a reg by itself
308   __ add(O0, O1, O3);
309 
310   // First, dirty it.
311   __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
312 
313   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
314   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
315   __ bind(restart);
316 
317   // Load the index into the update buffer. DirtyCardQueue::_index is
318   // a size_t so ld_ptr is appropriate here.
319   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
320 
321   // index == 0?
322   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
323 
324   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
325   __ sub(L0, oopSize, L0);
326 
327   __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
328   // Use return-from-leaf
329   __ retl();
330   __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
331 
332   __ bind(refill);
333   address handle_zero =
334     CAST_FROM_FN_PTR(address,
335                      &DirtyCardQueueSet::handle_zero_index_for_thread);
336   // This should be rare enough that we can afford to save all the
337   // scratch registers that the calling context might be using.
338   __ mov(G1_scratch, L3);
339   __ mov(G3_scratch, L5);
340   // We need the value of O3 above (for the write into the buffer), so we
341   // save and restore it.
342   __ mov(O3, L6);
343   // Since the call will overwrite O7, we save and restore that, as well.
344   __ mov(O7, L4);
345 
346   __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
347   __ mov(L3, G1_scratch);
348   __ mov(L5, G3_scratch);
349   __ mov(L6, O3);
350   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
351   __ delayed()->mov(L4, O7);
352 
353   dirty_card_log_enqueue = start;
354   dirty_card_log_enqueue_end = __ pc();
355   // XXX Should have a guarantee here about not going off the end!
356   // Does it already do so?  Do an experiment...
357 
358 #undef __
359 
360 }
361 
362 #define __ masm->
363 
364 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) {
365   Label filtered;
366   MacroAssembler* post_filter_masm = masm;
367 
368   if (new_val == G0) return;
369 
370   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
371 
372   if (G1RSBarrierRegionFilter) {
373     __ xor3(store_addr, new_val, tmp);
374     __ srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
375 
376     __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
377   }
378 
379   // If the "store_addr" register is an "in" or "local" register, move it to
380   // a scratch reg so we can pass it as an argument.
381   bool use_scr = !(store_addr->is_global() || store_addr->is_out());
382   // Pick a scratch register different from "tmp".
383   Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
384   // Make sure we use up the delay slot!
385   if (use_scr) {
386     post_filter_masm->mov(store_addr, scr);
387   } else {
388     post_filter_masm->nop();
389   }
390   __ save_frame(0);
391   __ call(dirty_card_log_enqueue);
392   if (use_scr) {
393     __ delayed()->mov(scr, O0);
394   } else {
395     __ delayed()->mov(store_addr->after_save(), O0);
396   }
397   __ restore();
398 
399   __ bind(filtered);
400 }
401 
402 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
403                                          Register val, Address dst, Register tmp) {
404   bool in_heap = (decorators & IN_HEAP) != 0;
405   bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
406 
407   bool needs_pre_barrier = in_heap || in_concurrent_root;
408   // No need for post barrier if storing NULL
409   bool needs_post_barrier = val != G0 && in_heap;
410 
411   bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
412   bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
413   bool precise = on_array || on_anonymous;
414 
415   Register index = dst.has_index() ? dst.index() : noreg;
416   int disp = dst.has_disp() ? dst.disp() : 0;
417 
418   if (needs_pre_barrier) {
419     // Load and record the previous value.
420     g1_write_barrier_pre(masm, dst.base(), index, disp,
421                          noreg /* pre_val */,
422                          tmp, true /*preserve_o_regs*/);
423   }
424 
425   Register new_val = val;
426   if (needs_post_barrier) {
427     // G1 barrier needs uncompressed oop for region cross check.
428     if (UseCompressedOops && val != G0) {
429       new_val = tmp;
430       __ mov(val, new_val);
431     }
432   }
433 
434   BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp);
435 
436   if (needs_post_barrier) {
437     Register base = dst.base();
438     if (precise) {
439       if (!dst.has_index()) {
440         __ add(base, disp, base);
441       } else {
442         assert(!dst.has_disp(), "not supported yet");
443         __ add(base, index, base);
444       }
445     }
446     g1_write_barrier_post(masm, base, new_val, tmp);
447   }
448 }
449 
450 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
451                                     Address src, Register dst, Register tmp) {
452   bool on_oop = type == T_OBJECT || type == T_ARRAY;
453   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
454   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
455   bool on_reference = on_weak || on_phantom;
456   // Load the value of the referent field.
457   ModRefBarrierSetAssembler::load_at(masm, decorators, type, src, dst, tmp);
458   if (on_oop && on_reference) {
459     // Generate the G1 pre-barrier code to log the value of
460     // the referent field in an SATB buffer. Note with
461     // these parameters the pre-barrier does not generate
462     // the load of the previous value
463 
464     Register pre_val = dst;
465     bool saved = false;
466     if (pre_val->is_in()) {
467       // The g1_write_barrier_pre method assumes that the pre_val
468       // is not in an input register.
469       __ save_frame_and_mov(0, pre_val, O0);
470       pre_val = O0;
471       saved = true;
472     }
473 
474     g1_write_barrier_pre(masm, noreg /* obj */, noreg /* index */, 0 /* offset */,
475                          pre_val /* pre_val */,
476                          tmp /* tmp */,
477                          true /* preserve_o_regs */);
478 
479     if (saved) {
480       __ restore();
481     }
482   }
483 }
484 
485 void G1BarrierSetAssembler::barrier_stubs_init() {
486   if (dirty_card_log_enqueue == 0) {
487     G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
488     CardTable *ct = bs->card_table();
489     generate_dirty_card_log_enqueue(ct->byte_map_base());
490     assert(dirty_card_log_enqueue != 0, "postcondition.");
491   }
492   if (satb_log_enqueue_with_frame == 0) {
493     generate_satb_log_enqueue(true);
494     assert(satb_log_enqueue_with_frame != 0, "postcondition.");
495   }
496   if (satb_log_enqueue_frameless == 0) {
497     generate_satb_log_enqueue(false);
498     assert(satb_log_enqueue_frameless != 0, "postcondition.");
499   }
500 }
501 
502 #ifdef COMPILER1
503 
504 #undef __
505 #define __ ce->masm()->
506 
507 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
508   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
509   // At this point we know that marking is in progress.
510   // If do_load() is true then we have to emit the
511   // load of the previous value; otherwise it has already
512   // been loaded into _pre_val.
513 
514   __ bind(*stub->entry());
515 
516   assert(stub->pre_val()->is_register(), "Precondition.");
517   Register pre_val_reg = stub->pre_val()->as_register();
518 
519   if (stub->do_load()) {
520     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
521   }
522 
523   if (__ is_in_wdisp16_range(*stub->continuation())) {
524     __ br_null(pre_val_reg, /*annul*/false, Assembler::pt, *stub->continuation());
525   } else {
526     __ cmp(pre_val_reg, G0);
527     __ brx(Assembler::equal, false, Assembler::pn, *stub->continuation());
528   }
529   __ delayed()->nop();
530 
531   __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin());
532   __ delayed()->mov(pre_val_reg, G4);
533   __ br(Assembler::always, false, Assembler::pt, *stub->continuation());
534   __ delayed()->nop();
535 }
536 
537 void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
538   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
539   __ bind(*stub->entry());
540 
541   assert(stub->addr()->is_register(), "Precondition.");
542   assert(stub->new_val()->is_register(), "Precondition.");
543   Register addr_reg = stub->addr()->as_pointer_register();
544   Register new_val_reg = stub->new_val()->as_register();
545 
546   if (__ is_in_wdisp16_range(*stub->continuation())) {
547     __ br_null(new_val_reg, /*annul*/false, Assembler::pt, *stub->continuation());
548   } else {
549     __ cmp(new_val_reg, G0);
550     __ brx(Assembler::equal, false, Assembler::pn, *stub->continuation());
551   }
552   __ delayed()->nop();
553 
554   __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin());
555   __ delayed()->mov(addr_reg, G4);
556   __ br(Assembler::always, false, Assembler::pt, *stub->continuation());
557   __ delayed()->nop();
558 }
559 
560 #undef __
561 #define __ sasm->
562 
563 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
564   __ prologue("g1_pre_barrier", false);
565 
566   // G4: previous value of memory
567 
568   Register pre_val = G4;
569   Register tmp  = G1_scratch;
570   Register tmp2 = G3_scratch;
571 
572   Label refill, restart;
573   int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
574   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
575   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
576 
577   // Is marking still active?
578   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
579     __ ld(G2_thread, satb_q_active_byte_offset, tmp);
580   } else {
581     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
582     __ ldsb(G2_thread, satb_q_active_byte_offset, tmp);
583   }
584   __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, restart);
585   __ retl();
586   __ delayed()->nop();
587 
588   __ bind(restart);
589   // Load the index into the SATB buffer. SATBMarkQueue::_index is a
590   // size_t so ld_ptr is appropriate
591   __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
592 
593   // index == 0?
594   __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill);
595 
596   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
597   __ sub(tmp, oopSize, tmp);
598 
599   __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
600   // Use return-from-leaf
601   __ retl();
602   __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);
603 
604   __ bind(refill);
605 
606   __ save_live_registers_no_oop_map(true);
607 
608   __ call_VM_leaf(L7_thread_cache,
609                   CAST_FROM_FN_PTR(address,
610                                    SATBMarkQueueSet::handle_zero_index_for_thread),
611                                    G2_thread);
612 
613   __ restore_live_registers(true);
614 
615   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
616   __ epilogue();
617 }
618 
619 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
620   __ prologue("g1_post_barrier", false);
621 
622   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
623 
624   Register addr = G4;
625   Register cardtable = G5;
626   Register tmp  = G1_scratch;
627   Register tmp2 = G3_scratch;
628   jbyte* byte_map_base = bs->card_table()->byte_map_base();
629 
630   Label not_already_dirty, restart, refill, young_card;
631 
632 #ifdef _LP64
633   __ srlx(addr, CardTable::card_shift, addr);
634 #else
635   __ srl(addr, CardTable::card_shift, addr);
636 #endif
637 
638   AddressLiteral rs((address)byte_map_base);
639   __ set(rs, cardtable);         // cardtable := <card table base>
640   __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
641 
642   __ cmp_and_br_short(tmp, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
643 
644   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
645   __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
646 
647   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
648   __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
649 
650   __ bind(young_card);
651   // We didn't take the branch, so we're already dirty: return.
652   // Use return-from-leaf
653   __ retl();
654   __ delayed()->nop();
655 
656   // Not dirty.
657   __ bind(not_already_dirty);
658 
659   // Get cardtable + tmp into a reg by itself
660   __ add(addr, cardtable, tmp2);
661 
662   // First, dirty it.
663   __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).
664 
665   Register tmp3 = cardtable;
666   Register tmp4 = tmp;
667 
668   // these registers are now dead
669   addr = cardtable = tmp = noreg;
670 
671   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
672   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
673 
674   __ bind(restart);
675 
676   // Get the index into the update buffer. DirtyCardQueue::_index is
677   // a size_t so ld_ptr is appropriate here.
678   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
679 
680   // index == 0?
681   __ cmp_and_brx_short(tmp3, G0, Assembler::equal,  Assembler::pn, refill);
682 
683   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
684   __ sub(tmp3, oopSize, tmp3);
685 
686   __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
687   // Use return-from-leaf
688   __ retl();
689   __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);
690 
691   __ bind(refill);
692 
693   __ save_live_registers_no_oop_map(true);
694 
695   __ call_VM_leaf(L7_thread_cache,
696                   CAST_FROM_FN_PTR(address,
697                                    DirtyCardQueueSet::handle_zero_index_for_thread),
698                                    G2_thread);
699 
700   __ restore_live_registers(true);
701 
702   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
703   __ epilogue();
704 }
705 
706 #undef __
707 
708 #endif // COMPILER1