1 /*
  2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2018, SAP SE. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #include "precompiled.hpp"
 27 #include "asm/macroAssembler.inline.hpp"
 28 #include "gc/g1/g1BarrierSet.hpp"
 29 #include "gc/g1/g1BarrierSetAssembler.hpp"
 30 #include "gc/g1/g1CardTable.hpp"
 31 #include "gc/g1/g1ThreadLocalData.hpp"
 32 #include "gc/g1/heapRegion.hpp"
 33 #include "interpreter/interp_masm.hpp"
 34 #include "runtime/sharedRuntime.hpp"
 35 #ifdef COMPILER1
 36 #include "c1/c1_LIRAssembler.hpp"
 37 #include "c1/c1_MacroAssembler.hpp"
 38 #include "gc/g1/c1/g1BarrierSetC1.hpp"
 39 #endif
 40 
 41 #define __ masm->
 42 
 43 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
 44                                                             Register from, Register to, Register count,
 45                                                             Register preserve1, Register preserve2) {
 46   bool dest_uninitialized = (decorators & AS_DEST_NOT_INITIALIZED) != 0;
 47   // With G1, don't generate the call if we statically know that the target in uninitialized
 48   if (!dest_uninitialized) {
 49     int spill_slots = 3;
 50     if (preserve1 != noreg) { spill_slots++; }
 51     if (preserve2 != noreg) { spill_slots++; }
 52     const int frame_size = align_up(frame::abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
 53     Label filtered;
 54 
 55     // Is marking active?
 56     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 57       __ lwz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
 58     } else {
 59       guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 60       __ lbz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
 61     }
 62     __ cmpdi(CCR0, R0, 0);
 63     __ beq(CCR0, filtered);
 64 
 65     __ save_LR_CR(R0);
 66     __ push_frame(frame_size, R0);
 67     int slot_nr = 0;
 68     __ std(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
 69     __ std(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
 70     __ std(count, frame_size - (++slot_nr) * wordSize, R1_SP);
 71     if (preserve1 != noreg) { __ std(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
 72     if (preserve2 != noreg) { __ std(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
 73 
 74     if (UseCompressedOops) {
 75       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_narrow_oop_entry), to, count);
 76     } else {
 77       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry), to, count);
 78     }
 79 
 80     slot_nr = 0;
 81     __ ld(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
 82     __ ld(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
 83     __ ld(count, frame_size - (++slot_nr) * wordSize, R1_SP);
 84     if (preserve1 != noreg) { __ ld(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
 85     if (preserve2 != noreg) { __ ld(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
 86     __ addi(R1_SP, R1_SP, frame_size); // pop_frame()
 87     __ restore_LR_CR(R0);
 88 
 89     __ bind(filtered);
 90   }
 91 }
 92 
 93 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 94                                                              Register addr, Register count, Register preserve) {
 95   int spill_slots = (preserve != noreg) ? 1 : 0;
 96   const int frame_size = align_up(frame::abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
 97 
 98   __ save_LR_CR(R0);
 99   __ push_frame(frame_size, R0);
100   if (preserve != noreg) { __ std(preserve, frame_size - 1 * wordSize, R1_SP); }
101   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry), addr, count);
102   if (preserve != noreg) { __ ld(preserve, frame_size - 1 * wordSize, R1_SP); }
103   __ addi(R1_SP, R1_SP, frame_size); // pop_frame();
104   __ restore_LR_CR(R0);
105 }
106 
107 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators, Register obj, RegisterOrConstant ind_or_offs, Register pre_val,
108                                                  Register tmp1, Register tmp2, bool needs_frame) {
109   bool not_null  = (decorators & OOP_NOT_NULL) != 0,
110        preloaded = obj == noreg;
111   Register nv_save = noreg;
112 
113   if (preloaded) {
114     // We are not loading the previous value so make
115     // sure that we don't trash the value in pre_val
116     // with the code below.
117     assert_different_registers(pre_val, tmp1, tmp2);
118     if (pre_val->is_volatile()) {
119       nv_save = !tmp1->is_volatile() ? tmp1 : tmp2;
120       assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register");
121     }
122   }
123 
124   Label runtime, filtered;
125 
126   // Is marking active?
127   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
128     __ lwz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
129   } else {
130     guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
131     __ lbz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
132   }
133   __ cmpdi(CCR0, tmp1, 0);
134   __ beq(CCR0, filtered);
135 
136   // Do we need to load the previous value?
137   if (!preloaded) {
138     // Load the previous value...
139     if (UseCompressedOops) {
140       __ lwz(pre_val, ind_or_offs, obj);
141     } else {
142       __ ld(pre_val, ind_or_offs, obj);
143     }
144     // Previous value has been loaded into Rpre_val.
145   }
146   assert(pre_val != noreg, "must have a real register");
147 
148   // Is the previous value null?
149   if (preloaded && not_null) {
150 #ifdef ASSERT
151     __ cmpdi(CCR0, pre_val, 0);
152     __ asm_assert_ne("null oop not allowed (G1 pre)", 0x321); // Checked by caller.
153 #endif
154   } else {
155     __ cmpdi(CCR0, pre_val, 0);
156     __ beq(CCR0, filtered);
157   }
158 
159   if (!preloaded && UseCompressedOops) {
160     __ decode_heap_oop_not_null(pre_val);
161   }
162 
163   // OK, it's not filtered, so we'll need to call enqueue. In the normal
164   // case, pre_val will be a scratch G-reg, but there are some cases in
165   // which it's an O-reg. In the first case, do a normal call. In the
166   // latter, do a save here and call the frameless version.
167 
168   // Can we store original value in the thread's buffer?
169   // Is index == 0?
170   // (The index field is typed as size_t.)
171   const Register Rbuffer = tmp1, Rindex = tmp2;
172 
173   __ ld(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
174   __ cmpdi(CCR0, Rindex, 0);
175   __ beq(CCR0, runtime); // If index == 0, goto runtime.
176   __ ld(Rbuffer, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread);
177 
178   __ addi(Rindex, Rindex, -wordSize); // Decrement index.
179   __ std(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
180 
181   // Record the previous value.
182   __ stdx(pre_val, Rbuffer, Rindex);
183   __ b(filtered);
184 
185   __ bind(runtime);
186 
187   // May need to preserve LR. Also needed if current frame is not compatible with C calling convention.
188   if (needs_frame) {
189     __ save_LR_CR(tmp1);
190     __ push_frame_reg_args(0, tmp2);
191   }
192 
193   if (pre_val->is_volatile() && preloaded) { __ mr(nv_save, pre_val); } // Save pre_val across C call if it was preloaded.
194   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, R16_thread);
195   if (pre_val->is_volatile() && preloaded) { __ mr(pre_val, nv_save); } // restore
196 
197   if (needs_frame) {
198     __ pop_frame();
199     __ restore_LR_CR(tmp1);
200   }
201 
202   __ bind(filtered);
203 }
204 
205 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators, Register store_addr, Register new_val,
206                                                   Register tmp1, Register tmp2, Register tmp3) {
207   bool not_null = (decorators & OOP_NOT_NULL) != 0;
208 
209   Label runtime, filtered;
210   assert_different_registers(store_addr, new_val, tmp1, tmp2);
211 
212   CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
213   assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
214 
215   // Does store cross heap regions?
216   if (G1RSBarrierRegionFilter) {
217     __ xorr(tmp1, store_addr, new_val);
218     __ srdi_(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
219     __ beq(CCR0, filtered);
220   }
221 
222   // Crosses regions, storing NULL?
223   if (not_null) {
224 #ifdef ASSERT
225     __ cmpdi(CCR0, new_val, 0);
226     __ asm_assert_ne("null oop not allowed (G1 post)", 0x322); // Checked by caller.
227 #endif
228   } else {
229     __ cmpdi(CCR0, new_val, 0);
230     __ beq(CCR0, filtered);
231   }
232 
233   // Storing region crossing non-NULL, is card already dirty?
234   const Register Rcard_addr = tmp1;
235   Register Rbase = tmp2;
236   __ load_const_optimized(Rbase, (address)(ct->card_table()->byte_map_base()), /*temp*/ tmp3);
237 
238   __ srdi(Rcard_addr, store_addr, CardTable::card_shift);
239 
240   // Get the address of the card.
241   __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr);
242   __ cmpwi(CCR0, tmp3, (int)G1CardTable::g1_young_card_val());
243   __ beq(CCR0, filtered);
244 
245   __ membar(Assembler::StoreLoad);
246   __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr);  // Reload after membar.
247   __ cmpwi(CCR0, tmp3 /* card value */, (int)G1CardTable::dirty_card_val());
248   __ beq(CCR0, filtered);
249 
250   // Storing a region crossing, non-NULL oop, card is clean.
251   // Dirty card and log.
252   __ li(tmp3, (int)G1CardTable::dirty_card_val());
253   //release(); // G1: oops are allowed to get visible after dirty marking.
254   __ stbx(tmp3, Rbase, Rcard_addr);
255 
256   __ add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued.
257   Rbase = noreg; // end of lifetime
258 
259   const Register Rqueue_index = tmp2,
260                  Rqueue_buf   = tmp3;
261   __ ld(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
262   __ cmpdi(CCR0, Rqueue_index, 0);
263   __ beq(CCR0, runtime); // index == 0 then jump to runtime
264   __ ld(Rqueue_buf, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()), R16_thread);
265 
266   __ addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index
267   __ std(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
268 
269   __ stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card
270   __ b(filtered);
271 
272   __ bind(runtime);
273 
274   // Save the live input values.
275   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, R16_thread);
276 
277   __ bind(filtered);
278 }
279 
280 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
281                                        Register base, RegisterOrConstant ind_or_offs, Register val,
282                                        Register tmp1, Register tmp2, Register tmp3, bool needs_frame) {
283   bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
284   bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
285   bool precise = on_array || on_anonymous;
286   // Load and record the previous value.
287   g1_write_barrier_pre(masm, decorators, base, ind_or_offs,
288                        tmp1, tmp2, tmp3, needs_frame);
289 
290   BarrierSetAssembler::store_at(masm, decorators, type, base, ind_or_offs, val, tmp1, tmp2, tmp3, needs_frame);
291 
292   // No need for post barrier if storing NULL
293   if (val != noreg) {
294     if (precise) {
295       if (ind_or_offs.is_constant()) {
296         __ add_const_optimized(base, base, ind_or_offs.as_constant(), tmp1);
297       } else {
298         __ add(base, ind_or_offs.as_register(), base);
299       }
300     }
301     g1_write_barrier_post(masm, decorators, base, val, tmp1, tmp2, tmp3);
302   }
303 }
304 
305 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
306                                     Register base, RegisterOrConstant ind_or_offs, Register dst,
307                                     Register tmp1, Register tmp2, bool needs_frame, Label *L_handle_null) {
308   bool on_oop = type == T_OBJECT || type == T_ARRAY;
309   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
310   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
311   bool on_reference = on_weak || on_phantom;
312   Label done;
313   if (on_oop && on_reference && L_handle_null == NULL) { L_handle_null = &done; }
314   // Load the value of the referent field.
315   ModRefBarrierSetAssembler::load_at(masm, decorators, type, base, ind_or_offs, dst, tmp1, tmp2, needs_frame, L_handle_null);
316   if (on_oop && on_reference) {
317     // Generate the G1 pre-barrier code to log the value of
318     // the referent field in an SATB buffer. Note with
319     // these parameters the pre-barrier does not generate
320     // the load of the previous value
321     // We only reach here if value is not null.
322     g1_write_barrier_pre(masm, decorators | OOP_NOT_NULL, noreg /* obj */, (intptr_t)0, dst /* pre_val */,
323                          tmp1, tmp2, needs_frame);
324   }
325   __ bind(done);
326 }
327 
328 void G1BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2, bool needs_frame) {
329   Label done, not_weak;
330   __ cmpdi(CCR0, value, 0);
331   __ beq(CCR0, done);         // Use NULL as-is.
332 
333   __ clrrdi(tmp1, value, JNIHandles::weak_tag_size);
334   __ andi_(tmp2, value, JNIHandles::weak_tag_mask);
335   __ ld(value, 0, tmp1);      // Resolve (untagged) jobject.
336 
337   __ beq(CCR0, not_weak);     // Test for jweak tag.
338   __ verify_oop(value);
339   g1_write_barrier_pre(masm, IN_ROOT | ON_PHANTOM_OOP_REF,
340                        noreg, noreg, value,
341                        tmp1, tmp2, needs_frame);
342   __ bind(not_weak);
343   __ verify_oop(value);
344   __ bind(done);
345 }
346 
347 #ifdef COMPILER1
348 
349 #undef __
350 #define __ ce->masm()->
351 
352 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
353   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
354   // At this point we know that marking is in progress.
355   // If do_load() is true then we have to emit the
356   // load of the previous value; otherwise it has already
357   // been loaded into _pre_val.
358 
359   __ bind(*stub->entry());
360 
361   assert(stub->pre_val()->is_register(), "Precondition.");
362   Register pre_val_reg = stub->pre_val()->as_register();
363 
364   if (stub->do_load()) {
365     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
366   }
367 
368   __ cmpdi(CCR0, pre_val_reg, 0);
369   __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation());
370 
371   address c_code = bs->pre_barrier_c1_runtime_code_blob()->code_begin();
372   //__ load_const_optimized(R0, c_code);
373   __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(c_code));
374   __ std(pre_val_reg, -8, R1_SP); // Pass pre_val on stack.
375   __ mtctr(R0);
376   __ bctrl();
377   __ b(*stub->continuation());
378 }
379 
380 void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
381   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
382   __ bind(*stub->entry());
383 
384   assert(stub->addr()->is_register(), "Precondition.");
385   assert(stub->new_val()->is_register(), "Precondition.");
386   Register addr_reg = stub->addr()->as_pointer_register();
387   Register new_val_reg = stub->new_val()->as_register();
388 
389   __ cmpdi(CCR0, new_val_reg, 0);
390   __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation());
391 
392   address c_code = bs->post_barrier_c1_runtime_code_blob()->code_begin();
393   //__ load_const_optimized(R0, c_code);
394   __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(c_code));
395   __ mtctr(R0);
396   __ mr(R0, addr_reg); // Pass addr in R0.
397   __ bctrl();
398   __ b(*stub->continuation());
399 }
400 
401 #undef __
402 #define __ sasm->
403 
404 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
405   BarrierSet* bs = BarrierSet::barrier_set();
406 
407   __ set_info("g1_pre_barrier_slow_id", false);
408 
409   // Using stack slots: pre_val (pre-pushed), spill tmp, spill tmp2.
410   const int stack_slots = 3;
411   Register pre_val = R0; // previous value of memory
412   Register tmp  = R14;
413   Register tmp2 = R15;
414 
415   Label refill, restart, marking_not_active;
416   int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
417   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
418   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
419 
420   // Spill
421   __ std(tmp, -16, R1_SP);
422   __ std(tmp2, -24, R1_SP);
423 
424   // Is marking still active?
425   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
426     __ lwz(tmp, satb_q_active_byte_offset, R16_thread);
427   } else {
428     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
429     __ lbz(tmp, satb_q_active_byte_offset, R16_thread);
430   }
431   __ cmpdi(CCR0, tmp, 0);
432   __ beq(CCR0, marking_not_active);
433 
434   __ bind(restart);
435   // Load the index into the SATB buffer. SATBMarkQueue::_index is a
436   // size_t so ld_ptr is appropriate.
437   __ ld(tmp, satb_q_index_byte_offset, R16_thread);
438 
439   // index == 0?
440   __ cmpdi(CCR0, tmp, 0);
441   __ beq(CCR0, refill);
442 
443   __ ld(tmp2, satb_q_buf_byte_offset, R16_thread);
444   __ ld(pre_val, -8, R1_SP); // Load from stack.
445   __ addi(tmp, tmp, -oopSize);
446 
447   __ std(tmp, satb_q_index_byte_offset, R16_thread);
448   __ stdx(pre_val, tmp2, tmp); // [_buf + index] := <address_of_card>
449 
450   __ bind(marking_not_active);
451   // Restore temp registers and return-from-leaf.
452   __ ld(tmp2, -24, R1_SP);
453   __ ld(tmp, -16, R1_SP);
454   __ blr();
455 
456   __ bind(refill);
457   const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
458   __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
459   __ mflr(R0);
460   __ std(R0, _abi(lr), R1_SP);
461   __ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
462   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SATBMarkQueueSet::handle_zero_index_for_thread), R16_thread);
463   __ pop_frame();
464   __ ld(R0, _abi(lr), R1_SP);
465   __ mtlr(R0);
466   __ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
467   __ b(restart);
468 }
469 
470 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
471   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
472 
473   __ set_info("g1_post_barrier_slow_id", false);
474 
475   // Using stack slots: spill addr, spill tmp2
476   const int stack_slots = 2;
477   Register tmp = R0;
478   Register addr = R14;
479   Register tmp2 = R15;
480   jbyte* byte_map_base = bs->card_table()->byte_map_base();
481 
482   Label restart, refill, ret;
483 
484   // Spill
485   __ std(addr, -8, R1_SP);
486   __ std(tmp2, -16, R1_SP);
487 
488   __ srdi(addr, R0, CardTable::card_shift); // Addr is passed in R0.
489   __ load_const_optimized(/*cardtable*/ tmp2, byte_map_base, tmp);
490   __ add(addr, tmp2, addr);
491   __ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
492 
493   // Return if young card.
494   __ cmpwi(CCR0, tmp, G1CardTable::g1_young_card_val());
495   __ beq(CCR0, ret);
496 
497   // Return if sequential consistent value is already dirty.
498   __ membar(Assembler::StoreLoad);
499   __ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
500 
501   __ cmpwi(CCR0, tmp, G1CardTable::dirty_card_val());
502   __ beq(CCR0, ret);
503 
504   // Not dirty.
505 
506   // First, dirty it.
507   __ li(tmp, G1CardTable::dirty_card_val());
508   __ stb(tmp, 0, addr);
509 
510   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
511   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
512 
513   __ bind(restart);
514 
515   // Get the index into the update buffer. DirtyCardQueue::_index is
516   // a size_t so ld_ptr is appropriate here.
517   __ ld(tmp2, dirty_card_q_index_byte_offset, R16_thread);
518 
519   // index == 0?
520   __ cmpdi(CCR0, tmp2, 0);
521   __ beq(CCR0, refill);
522 
523   __ ld(tmp, dirty_card_q_buf_byte_offset, R16_thread);
524   __ addi(tmp2, tmp2, -oopSize);
525 
526   __ std(tmp2, dirty_card_q_index_byte_offset, R16_thread);
527   __ add(tmp2, tmp, tmp2);
528   __ std(addr, 0, tmp2); // [_buf + index] := <address_of_card>
529 
530   // Restore temp registers and return-from-leaf.
531   __ bind(ret);
532   __ ld(tmp2, -16, R1_SP);
533   __ ld(addr, -8, R1_SP);
534   __ blr();
535 
536   __ bind(refill);
537   const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
538   __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
539   __ mflr(R0);
540   __ std(R0, _abi(lr), R1_SP);
541   __ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
542   __ call_VM_leaf(CAST_FROM_FN_PTR(address, DirtyCardQueueSet::handle_zero_index_for_thread), R16_thread);
543   __ pop_frame();
544   __ ld(R0, _abi(lr), R1_SP);
545   __ mtlr(R0);
546   __ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
547   __ b(restart);
548 }
549 
550 #undef __
551 
552 #endif // COMPILER1