1 /*
  2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2018, SAP SE. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #include "precompiled.hpp"
 27 #include "asm/macroAssembler.inline.hpp"
 28 #include "gc/g1/g1BarrierSet.hpp"
 29 #include "gc/g1/g1BarrierSetAssembler.hpp"
 30 #include "gc/g1/g1BarrierSetRuntime.hpp"
 31 #include "gc/g1/g1CardTable.hpp"
 32 #include "gc/g1/g1ThreadLocalData.hpp"
 33 #include "gc/g1/heapRegion.hpp"
 34 #include "interpreter/interp_masm.hpp"
 35 #include "runtime/sharedRuntime.hpp"
 36 #ifdef COMPILER1
 37 #include "c1/c1_LIRAssembler.hpp"
 38 #include "c1/c1_MacroAssembler.hpp"
 39 #include "gc/g1/c1/g1BarrierSetC1.hpp"
 40 #endif
 41 
 42 #define __ masm->
 43 
 44 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
 45                                                             Register from, Register to, Register count,
 46                                                             Register preserve1, Register preserve2) {
 47   bool dest_uninitialized = (decorators & AS_DEST_NOT_INITIALIZED) != 0;
 48   // With G1, don't generate the call if we statically know that the target in uninitialized
 49   if (!dest_uninitialized) {
 50     int spill_slots = 3;
 51     if (preserve1 != noreg) { spill_slots++; }
 52     if (preserve2 != noreg) { spill_slots++; }
 53     const int frame_size = align_up(frame::abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
 54     Label filtered;
 55 
 56     // Is marking active?
 57     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 58       __ lwz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
 59     } else {
 60       guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 61       __ lbz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
 62     }
 63     __ cmpdi(CCR0, R0, 0);
 64     __ beq(CCR0, filtered);
 65 
 66     __ save_LR_CR(R0);
 67     __ push_frame(frame_size, R0);
 68     int slot_nr = 0;
 69     __ std(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
 70     __ std(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
 71     __ std(count, frame_size - (++slot_nr) * wordSize, R1_SP);
 72     if (preserve1 != noreg) { __ std(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
 73     if (preserve2 != noreg) { __ std(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
 74 
 75     if (UseCompressedOops) {
 76       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), to, count);
 77     } else {
 78       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), to, count);
 79     }
 80 
 81     slot_nr = 0;
 82     __ ld(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
 83     __ ld(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
 84     __ ld(count, frame_size - (++slot_nr) * wordSize, R1_SP);
 85     if (preserve1 != noreg) { __ ld(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
 86     if (preserve2 != noreg) { __ ld(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
 87     __ addi(R1_SP, R1_SP, frame_size); // pop_frame()
 88     __ restore_LR_CR(R0);
 89 
 90     __ bind(filtered);
 91   }
 92 }
 93 
 94 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 95                                                              Register addr, Register count, Register preserve) {
 96   int spill_slots = (preserve != noreg) ? 1 : 0;
 97   const int frame_size = align_up(frame::abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
 98 
 99   __ save_LR_CR(R0);
100   __ push_frame(frame_size, R0);
101   if (preserve != noreg) { __ std(preserve, frame_size - 1 * wordSize, R1_SP); }
102   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), addr, count);
103   if (preserve != noreg) { __ ld(preserve, frame_size - 1 * wordSize, R1_SP); }
104   __ addi(R1_SP, R1_SP, frame_size); // pop_frame();
105   __ restore_LR_CR(R0);
106 }
107 
108 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators, Register obj, RegisterOrConstant ind_or_offs, Register pre_val,
109                                                  Register tmp1, Register tmp2, bool needs_frame) {
110   bool not_null  = (decorators & OOP_NOT_NULL) != 0,
111        preloaded = obj == noreg;
112   Register nv_save = noreg;
113 
114   if (preloaded) {
115     // We are not loading the previous value so make
116     // sure that we don't trash the value in pre_val
117     // with the code below.
118     assert_different_registers(pre_val, tmp1, tmp2);
119     if (pre_val->is_volatile()) {
120       nv_save = !tmp1->is_volatile() ? tmp1 : tmp2;
121       assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register");
122     }
123   }
124 
125   Label runtime, filtered;
126 
127   // Is marking active?
128   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
129     __ lwz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
130   } else {
131     guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
132     __ lbz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
133   }
134   __ cmpdi(CCR0, tmp1, 0);
135   __ beq(CCR0, filtered);
136 
137   // Do we need to load the previous value?
138   if (!preloaded) {
139     // Load the previous value...
140     if (UseCompressedOops) {
141       __ lwz(pre_val, ind_or_offs, obj);
142     } else {
143       __ ld(pre_val, ind_or_offs, obj);
144     }
145     // Previous value has been loaded into Rpre_val.
146   }
147   assert(pre_val != noreg, "must have a real register");
148 
149   // Is the previous value null?
150   if (preloaded && not_null) {
151 #ifdef ASSERT
152     __ cmpdi(CCR0, pre_val, 0);
153     __ asm_assert_ne("null oop not allowed (G1 pre)", 0x321); // Checked by caller.
154 #endif
155   } else {
156     __ cmpdi(CCR0, pre_val, 0);
157     __ beq(CCR0, filtered);
158   }
159 
160   if (!preloaded && UseCompressedOops) {
161     __ decode_heap_oop_not_null(pre_val);
162   }
163 
164   // OK, it's not filtered, so we'll need to call enqueue. In the normal
165   // case, pre_val will be a scratch G-reg, but there are some cases in
166   // which it's an O-reg. In the first case, do a normal call. In the
167   // latter, do a save here and call the frameless version.
168 
169   // Can we store original value in the thread's buffer?
170   // Is index == 0?
171   // (The index field is typed as size_t.)
172   const Register Rbuffer = tmp1, Rindex = tmp2;
173 
174   __ ld(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
175   __ cmpdi(CCR0, Rindex, 0);
176   __ beq(CCR0, runtime); // If index == 0, goto runtime.
177   __ ld(Rbuffer, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread);
178 
179   __ addi(Rindex, Rindex, -wordSize); // Decrement index.
180   __ std(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
181 
182   // Record the previous value.
183   __ stdx(pre_val, Rbuffer, Rindex);
184   __ b(filtered);
185 
186   __ bind(runtime);
187 
188   // May need to preserve LR. Also needed if current frame is not compatible with C calling convention.
189   if (needs_frame) {
190     __ save_LR_CR(tmp1);
191     __ push_frame_reg_args(0, tmp2);
192   }
193 
194   if (pre_val->is_volatile() && preloaded) { __ mr(nv_save, pre_val); } // Save pre_val across C call if it was preloaded.
195   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, R16_thread);
196   if (pre_val->is_volatile() && preloaded) { __ mr(pre_val, nv_save); } // restore
197 
198   if (needs_frame) {
199     __ pop_frame();
200     __ restore_LR_CR(tmp1);
201   }
202 
203   __ bind(filtered);
204 }
205 
206 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators, Register store_addr, Register new_val,
207                                                   Register tmp1, Register tmp2, Register tmp3) {
208   bool not_null = (decorators & OOP_NOT_NULL) != 0;
209 
210   Label runtime, filtered;
211   assert_different_registers(store_addr, new_val, tmp1, tmp2);
212 
213   CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
214   assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
215 
216   // Does store cross heap regions?
217   if (G1RSBarrierRegionFilter) {
218     __ xorr(tmp1, store_addr, new_val);
219     __ srdi_(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
220     __ beq(CCR0, filtered);
221   }
222 
223   // Crosses regions, storing NULL?
224   if (not_null) {
225 #ifdef ASSERT
226     __ cmpdi(CCR0, new_val, 0);
227     __ asm_assert_ne("null oop not allowed (G1 post)", 0x322); // Checked by caller.
228 #endif
229   } else {
230     __ cmpdi(CCR0, new_val, 0);
231     __ beq(CCR0, filtered);
232   }
233 
234   // Storing region crossing non-NULL, is card already dirty?
235   const Register Rcard_addr = tmp1;
236   Register Rbase = tmp2;
237   __ load_const_optimized(Rbase, (address)(ct->card_table()->byte_map_base()), /*temp*/ tmp3);
238 
239   __ srdi(Rcard_addr, store_addr, CardTable::card_shift);
240 
241   // Get the address of the card.
242   __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr);
243   __ cmpwi(CCR0, tmp3, (int)G1CardTable::g1_young_card_val());
244   __ beq(CCR0, filtered);
245 
246   __ membar(Assembler::StoreLoad);
247   __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr);  // Reload after membar.
248   __ cmpwi(CCR0, tmp3 /* card value */, (int)G1CardTable::dirty_card_val());
249   __ beq(CCR0, filtered);
250 
251   // Storing a region crossing, non-NULL oop, card is clean.
252   // Dirty card and log.
253   __ li(tmp3, (int)G1CardTable::dirty_card_val());
254   //release(); // G1: oops are allowed to get visible after dirty marking.
255   __ stbx(tmp3, Rbase, Rcard_addr);
256 
257   __ add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued.
258   Rbase = noreg; // end of lifetime
259 
260   const Register Rqueue_index = tmp2,
261                  Rqueue_buf   = tmp3;
262   __ ld(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
263   __ cmpdi(CCR0, Rqueue_index, 0);
264   __ beq(CCR0, runtime); // index == 0 then jump to runtime
265   __ ld(Rqueue_buf, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()), R16_thread);
266 
267   __ addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index
268   __ std(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
269 
270   __ stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card
271   __ b(filtered);
272 
273   __ bind(runtime);
274 
275   // Save the live input values.
276   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), Rcard_addr, R16_thread);
277 
278   __ bind(filtered);
279 }
280 
281 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
282                                        Register base, RegisterOrConstant ind_or_offs, Register val,
283                                        Register tmp1, Register tmp2, Register tmp3, bool needs_frame) {
284   bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
285   bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
286   bool precise = on_array || on_anonymous;
287   // Load and record the previous value.
288   g1_write_barrier_pre(masm, decorators, base, ind_or_offs,
289                        tmp1, tmp2, tmp3, needs_frame);
290 
291   BarrierSetAssembler::store_at(masm, decorators, type, base, ind_or_offs, val, tmp1, tmp2, tmp3, needs_frame);
292 
293   // No need for post barrier if storing NULL
294   if (val != noreg) {
295     if (precise) {
296       if (ind_or_offs.is_constant()) {
297         __ add_const_optimized(base, base, ind_or_offs.as_constant(), tmp1);
298       } else {
299         __ add(base, ind_or_offs.as_register(), base);
300       }
301     }
302     g1_write_barrier_post(masm, decorators, base, val, tmp1, tmp2, tmp3);
303   }
304 }
305 
306 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
307                                     Register base, RegisterOrConstant ind_or_offs, Register dst,
308                                     Register tmp1, Register tmp2, bool needs_frame, Label *L_handle_null) {
309   bool on_oop = type == T_OBJECT || type == T_ARRAY;
310   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
311   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
312   bool on_reference = on_weak || on_phantom;
313   Label done;
314   if (on_oop && on_reference && L_handle_null == NULL) { L_handle_null = &done; }
315   // Load the value of the referent field.
316   ModRefBarrierSetAssembler::load_at(masm, decorators, type, base, ind_or_offs, dst, tmp1, tmp2, needs_frame, L_handle_null);
317   if (on_oop && on_reference) {
318     // Generate the G1 pre-barrier code to log the value of
319     // the referent field in an SATB buffer. Note with
320     // these parameters the pre-barrier does not generate
321     // the load of the previous value
322     // We only reach here if value is not null.
323     g1_write_barrier_pre(masm, decorators | OOP_NOT_NULL, noreg /* obj */, (intptr_t)0, dst /* pre_val */,
324                          tmp1, tmp2, needs_frame);
325   }
326   __ bind(done);
327 }
328 
329 void G1BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2, bool needs_frame) {
330   Label done, not_weak;
331   __ cmpdi(CCR0, value, 0);
332   __ beq(CCR0, done);         // Use NULL as-is.
333 
334   __ clrrdi(tmp1, value, JNIHandles::weak_tag_size);
335   __ andi_(tmp2, value, JNIHandles::weak_tag_mask);
336   __ ld(value, 0, tmp1);      // Resolve (untagged) jobject.
337 
338   __ beq(CCR0, not_weak);     // Test for jweak tag.
339   __ verify_oop(value);
340   g1_write_barrier_pre(masm, IN_ROOT | ON_PHANTOM_OOP_REF,
341                        noreg, noreg, value,
342                        tmp1, tmp2, needs_frame);
343   __ bind(not_weak);
344   __ verify_oop(value);
345   __ bind(done);
346 }
347 
348 #ifdef COMPILER1
349 
350 #undef __
351 #define __ ce->masm()->
352 
353 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
354   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
355   // At this point we know that marking is in progress.
356   // If do_load() is true then we have to emit the
357   // load of the previous value; otherwise it has already
358   // been loaded into _pre_val.
359 
360   __ bind(*stub->entry());
361 
362   assert(stub->pre_val()->is_register(), "Precondition.");
363   Register pre_val_reg = stub->pre_val()->as_register();
364 
365   if (stub->do_load()) {
366     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
367   }
368 
369   __ cmpdi(CCR0, pre_val_reg, 0);
370   __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation());
371 
372   address c_code = bs->pre_barrier_c1_runtime_code_blob()->code_begin();
373   //__ load_const_optimized(R0, c_code);
374   __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(c_code));
375   __ std(pre_val_reg, -8, R1_SP); // Pass pre_val on stack.
376   __ mtctr(R0);
377   __ bctrl();
378   __ b(*stub->continuation());
379 }
380 
381 void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
382   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
383   __ bind(*stub->entry());
384 
385   assert(stub->addr()->is_register(), "Precondition.");
386   assert(stub->new_val()->is_register(), "Precondition.");
387   Register addr_reg = stub->addr()->as_pointer_register();
388   Register new_val_reg = stub->new_val()->as_register();
389 
390   __ cmpdi(CCR0, new_val_reg, 0);
391   __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation());
392 
393   address c_code = bs->post_barrier_c1_runtime_code_blob()->code_begin();
394   //__ load_const_optimized(R0, c_code);
395   __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(c_code));
396   __ mtctr(R0);
397   __ mr(R0, addr_reg); // Pass addr in R0.
398   __ bctrl();
399   __ b(*stub->continuation());
400 }
401 
402 #undef __
403 #define __ sasm->
404 
405 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
406   BarrierSet* bs = BarrierSet::barrier_set();
407 
408   __ set_info("g1_pre_barrier_slow_id", false);
409 
410   // Using stack slots: pre_val (pre-pushed), spill tmp, spill tmp2.
411   const int stack_slots = 3;
412   Register pre_val = R0; // previous value of memory
413   Register tmp  = R14;
414   Register tmp2 = R15;
415 
416   Label refill, restart, marking_not_active;
417   int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
418   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
419   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
420 
421   // Spill
422   __ std(tmp, -16, R1_SP);
423   __ std(tmp2, -24, R1_SP);
424 
425   // Is marking still active?
426   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
427     __ lwz(tmp, satb_q_active_byte_offset, R16_thread);
428   } else {
429     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
430     __ lbz(tmp, satb_q_active_byte_offset, R16_thread);
431   }
432   __ cmpdi(CCR0, tmp, 0);
433   __ beq(CCR0, marking_not_active);
434 
435   __ bind(restart);
436   // Load the index into the SATB buffer. SATBMarkQueue::_index is a
437   // size_t so ld_ptr is appropriate.
438   __ ld(tmp, satb_q_index_byte_offset, R16_thread);
439 
440   // index == 0?
441   __ cmpdi(CCR0, tmp, 0);
442   __ beq(CCR0, refill);
443 
444   __ ld(tmp2, satb_q_buf_byte_offset, R16_thread);
445   __ ld(pre_val, -8, R1_SP); // Load from stack.
446   __ addi(tmp, tmp, -oopSize);
447 
448   __ std(tmp, satb_q_index_byte_offset, R16_thread);
449   __ stdx(pre_val, tmp2, tmp); // [_buf + index] := <address_of_card>
450 
451   __ bind(marking_not_active);
452   // Restore temp registers and return-from-leaf.
453   __ ld(tmp2, -24, R1_SP);
454   __ ld(tmp, -16, R1_SP);
455   __ blr();
456 
457   __ bind(refill);
458   const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
459   __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
460   __ mflr(R0);
461   __ std(R0, _abi(lr), R1_SP);
462   __ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
463   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SATBMarkQueueSet::handle_zero_index_for_thread), R16_thread);
464   __ pop_frame();
465   __ ld(R0, _abi(lr), R1_SP);
466   __ mtlr(R0);
467   __ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
468   __ b(restart);
469 }
470 
471 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
472   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
473 
474   __ set_info("g1_post_barrier_slow_id", false);
475 
476   // Using stack slots: spill addr, spill tmp2
477   const int stack_slots = 2;
478   Register tmp = R0;
479   Register addr = R14;
480   Register tmp2 = R15;
481   jbyte* byte_map_base = bs->card_table()->byte_map_base();
482 
483   Label restart, refill, ret;
484 
485   // Spill
486   __ std(addr, -8, R1_SP);
487   __ std(tmp2, -16, R1_SP);
488 
489   __ srdi(addr, R0, CardTable::card_shift); // Addr is passed in R0.
490   __ load_const_optimized(/*cardtable*/ tmp2, byte_map_base, tmp);
491   __ add(addr, tmp2, addr);
492   __ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
493 
494   // Return if young card.
495   __ cmpwi(CCR0, tmp, G1CardTable::g1_young_card_val());
496   __ beq(CCR0, ret);
497 
498   // Return if sequential consistent value is already dirty.
499   __ membar(Assembler::StoreLoad);
500   __ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
501 
502   __ cmpwi(CCR0, tmp, G1CardTable::dirty_card_val());
503   __ beq(CCR0, ret);
504 
505   // Not dirty.
506 
507   // First, dirty it.
508   __ li(tmp, G1CardTable::dirty_card_val());
509   __ stb(tmp, 0, addr);
510 
511   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
512   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
513 
514   __ bind(restart);
515 
516   // Get the index into the update buffer. DirtyCardQueue::_index is
517   // a size_t so ld_ptr is appropriate here.
518   __ ld(tmp2, dirty_card_q_index_byte_offset, R16_thread);
519 
520   // index == 0?
521   __ cmpdi(CCR0, tmp2, 0);
522   __ beq(CCR0, refill);
523 
524   __ ld(tmp, dirty_card_q_buf_byte_offset, R16_thread);
525   __ addi(tmp2, tmp2, -oopSize);
526 
527   __ std(tmp2, dirty_card_q_index_byte_offset, R16_thread);
528   __ add(tmp2, tmp, tmp2);
529   __ std(addr, 0, tmp2); // [_buf + index] := <address_of_card>
530 
531   // Restore temp registers and return-from-leaf.
532   __ bind(ret);
533   __ ld(tmp2, -16, R1_SP);
534   __ ld(addr, -8, R1_SP);
535   __ blr();
536 
537   __ bind(refill);
538   const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
539   __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
540   __ mflr(R0);
541   __ std(R0, _abi(lr), R1_SP);
542   __ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
543   __ call_VM_leaf(CAST_FROM_FN_PTR(address, DirtyCardQueueSet::handle_zero_index_for_thread), R16_thread);
544   __ pop_frame();
545   __ ld(R0, _abi(lr), R1_SP);
546   __ mtlr(R0);
547   __ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
548   __ b(restart);
549 }
550 
551 #undef __
552 
553 #endif // COMPILER1