1 /*
   2  * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2018, SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BarrierSetAssembler.hpp"
  30 #include "gc/g1/g1BarrierSetRuntime.hpp"
  31 #include "gc/g1/g1CardTable.hpp"
  32 #include "gc/g1/g1DirtyCardQueue.hpp"
  33 #include "gc/g1/g1SATBMarkQueueSet.hpp"
  34 #include "gc/g1/g1ThreadLocalData.hpp"
  35 #include "gc/g1/heapRegion.hpp"
  36 #include "interpreter/interp_masm.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #ifdef COMPILER1
  39 #include "c1/c1_LIRAssembler.hpp"
  40 #include "c1/c1_MacroAssembler.hpp"
  41 #include "gc/g1/c1/g1BarrierSetC1.hpp"
  42 #endif
  43 
  44 #define __ masm->
  45 
  46 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
  47                                                             Register from, Register to, Register count,
  48                                                             Register preserve1, Register preserve2) {
  49   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  50   // With G1, don't generate the call if we statically know that the target in uninitialized
  51   if (!dest_uninitialized) {
  52     int spill_slots = 3;
  53     if (preserve1 != noreg) { spill_slots++; }
  54     if (preserve2 != noreg) { spill_slots++; }
  55     const int frame_size = align_up(frame::abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
  56     Label filtered;
  57 
  58     // Is marking active?
  59     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
  60       __ lwz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
  61     } else {
  62       guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
  63       __ lbz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
  64     }
  65     __ cmpdi(CCR0, R0, 0);
  66     __ beq(CCR0, filtered);
  67 
  68     __ save_LR_CR(R0);
  69     __ push_frame(frame_size, R0);
  70     int slot_nr = 0;
  71     __ std(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
  72     __ std(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
  73     __ std(count, frame_size - (++slot_nr) * wordSize, R1_SP);
  74     if (preserve1 != noreg) { __ std(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
  75     if (preserve2 != noreg) { __ std(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
  76 
  77     if (UseCompressedOops) {
  78       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), to, count);
  79     } else {
  80       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), to, count);
  81     }
  82 
  83     slot_nr = 0;
  84     __ ld(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
  85     __ ld(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
  86     __ ld(count, frame_size - (++slot_nr) * wordSize, R1_SP);
  87     if (preserve1 != noreg) { __ ld(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
  88     if (preserve2 != noreg) { __ ld(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
  89     __ addi(R1_SP, R1_SP, frame_size); // pop_frame()
  90     __ restore_LR_CR(R0);
  91 
  92     __ bind(filtered);
  93   }
  94 }
  95 
  96 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
  97                                                              Register addr, Register count, Register preserve) {
  98   int spill_slots = (preserve != noreg) ? 1 : 0;
  99   const int frame_size = align_up(frame::abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
 100 
 101   __ save_LR_CR(R0);
 102   __ push_frame(frame_size, R0);
 103   if (preserve != noreg) { __ std(preserve, frame_size - 1 * wordSize, R1_SP); }
 104   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), addr, count);
 105   if (preserve != noreg) { __ ld(preserve, frame_size - 1 * wordSize, R1_SP); }
 106   __ addi(R1_SP, R1_SP, frame_size); // pop_frame();
 107   __ restore_LR_CR(R0);
 108 }
 109 
 110 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators, Register obj, RegisterOrConstant ind_or_offs, Register pre_val,
 111                                                  Register tmp1, Register tmp2, bool needs_frame) {
 112   bool not_null  = (decorators & IS_NOT_NULL) != 0,
 113        preloaded = obj == noreg;
 114   Register nv_save = noreg;
 115 
 116   if (preloaded) {
 117     // We are not loading the previous value so make
 118     // sure that we don't trash the value in pre_val
 119     // with the code below.
 120     assert_different_registers(pre_val, tmp1, tmp2);
 121     if (pre_val->is_volatile()) {
 122       nv_save = !tmp1->is_volatile() ? tmp1 : tmp2;
 123       assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register");
 124     }
 125   }
 126 
 127   Label runtime, filtered;
 128 
 129   // Is marking active?
 130   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 131     __ lwz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
 132   } else {
 133     guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 134     __ lbz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
 135   }
 136   __ cmpdi(CCR0, tmp1, 0);
 137   __ beq(CCR0, filtered);
 138 
 139   // Do we need to load the previous value?
 140   if (!preloaded) {
 141     // Load the previous value...
 142     if (UseCompressedOops) {
 143       __ lwz(pre_val, ind_or_offs, obj);
 144     } else {
 145       __ ld(pre_val, ind_or_offs, obj);
 146     }
 147     // Previous value has been loaded into Rpre_val.
 148   }
 149   assert(pre_val != noreg, "must have a real register");
 150 
 151   // Is the previous value null?
 152   if (preloaded && not_null) {
 153 #ifdef ASSERT
 154     __ cmpdi(CCR0, pre_val, 0);
 155     __ asm_assert_ne("null oop not allowed (G1 pre)", 0x321); // Checked by caller.
 156 #endif
 157   } else {
 158     __ cmpdi(CCR0, pre_val, 0);
 159     __ beq(CCR0, filtered);
 160   }
 161 
 162   if (!preloaded && UseCompressedOops) {
 163     __ decode_heap_oop_not_null(pre_val);
 164   }
 165 
 166   // OK, it's not filtered, so we'll need to call enqueue. In the normal
 167   // case, pre_val will be a scratch G-reg, but there are some cases in
 168   // which it's an O-reg. In the first case, do a normal call. In the
 169   // latter, do a save here and call the frameless version.
 170 
 171   // Can we store original value in the thread's buffer?
 172   // Is index == 0?
 173   // (The index field is typed as size_t.)
 174   const Register Rbuffer = tmp1, Rindex = tmp2;
 175 
 176   __ ld(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
 177   __ cmpdi(CCR0, Rindex, 0);
 178   __ beq(CCR0, runtime); // If index == 0, goto runtime.
 179   __ ld(Rbuffer, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread);
 180 
 181   __ addi(Rindex, Rindex, -wordSize); // Decrement index.
 182   __ std(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
 183 
 184   // Record the previous value.
 185   __ stdx(pre_val, Rbuffer, Rindex);
 186   __ b(filtered);
 187 
 188   __ bind(runtime);
 189 
 190   // May need to preserve LR. Also needed if current frame is not compatible with C calling convention.
 191   if (needs_frame) {
 192     __ save_LR_CR(tmp1);
 193     __ push_frame_reg_args(0, tmp2);
 194   }
 195 
 196   if (pre_val->is_volatile() && preloaded) { __ mr(nv_save, pre_val); } // Save pre_val across C call if it was preloaded.
 197   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, R16_thread);
 198   if (pre_val->is_volatile() && preloaded) { __ mr(pre_val, nv_save); } // restore
 199 
 200   if (needs_frame) {
 201     __ pop_frame();
 202     __ restore_LR_CR(tmp1);
 203   }
 204 
 205   __ bind(filtered);
 206 }
 207 
 208 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators, Register store_addr, Register new_val,
 209                                                   Register tmp1, Register tmp2, Register tmp3) {
 210   bool not_null = (decorators & IS_NOT_NULL) != 0;
 211 
 212   Label runtime, filtered;
 213   assert_different_registers(store_addr, new_val, tmp1, tmp2);
 214 
 215   CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
 216   assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
 217 
 218   // Does store cross heap regions?
 219   __ xorr(tmp1, store_addr, new_val);
 220   __ srdi_(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
 221   __ beq(CCR0, filtered);
 222 
 223   // Crosses regions, storing NULL?
 224   if (not_null) {
 225 #ifdef ASSERT
 226     __ cmpdi(CCR0, new_val, 0);
 227     __ asm_assert_ne("null oop not allowed (G1 post)", 0x322); // Checked by caller.
 228 #endif
 229   } else {
 230     __ cmpdi(CCR0, new_val, 0);
 231     __ beq(CCR0, filtered);
 232   }
 233 
 234   // Storing region crossing non-NULL, is card already dirty?
 235   const Register Rcard_addr = tmp1;
 236   Register Rbase = tmp2;
 237   __ load_const_optimized(Rbase, (address)(ct->card_table()->byte_map_base()), /*temp*/ tmp3);
 238 
 239   __ srdi(Rcard_addr, store_addr, CardTable::card_shift);
 240 
 241   // Get the address of the card.
 242   __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr);
 243   __ cmpwi(CCR0, tmp3, (int)G1CardTable::g1_young_card_val());
 244   __ beq(CCR0, filtered);
 245 
 246   __ membar(Assembler::StoreLoad);
 247   __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr);  // Reload after membar.
 248   __ cmpwi(CCR0, tmp3 /* card value */, (int)G1CardTable::dirty_card_val());
 249   __ beq(CCR0, filtered);
 250 
 251   // Storing a region crossing, non-NULL oop, card is clean.
 252   // Dirty card and log.
 253   __ li(tmp3, (int)G1CardTable::dirty_card_val());
 254   //release(); // G1: oops are allowed to get visible after dirty marking.
 255   __ stbx(tmp3, Rbase, Rcard_addr);
 256 
 257   __ add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued.
 258   Rbase = noreg; // end of lifetime
 259 
 260   const Register Rqueue_index = tmp2,
 261                  Rqueue_buf   = tmp3;
 262   __ ld(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
 263   __ cmpdi(CCR0, Rqueue_index, 0);
 264   __ beq(CCR0, runtime); // index == 0 then jump to runtime
 265   __ ld(Rqueue_buf, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()), R16_thread);
 266 
 267   __ addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index
 268   __ std(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
 269 
 270   __ stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card
 271   __ b(filtered);
 272 
 273   __ bind(runtime);
 274 
 275   // Save the live input values.
 276   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), Rcard_addr, R16_thread);
 277 
 278   __ bind(filtered);
 279 }
 280 
 281 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 282                                        Register base, RegisterOrConstant ind_or_offs, Register val,
 283                                        Register tmp1, Register tmp2, Register tmp3, bool needs_frame) {
 284   bool is_array = (decorators & IS_ARRAY) != 0;
 285   bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
 286   bool precise = is_array || on_anonymous;
 287   // Load and record the previous value.
 288   g1_write_barrier_pre(masm, decorators, base, ind_or_offs,
 289                        tmp1, tmp2, tmp3, needs_frame);
 290 
 291   BarrierSetAssembler::store_at(masm, decorators, type, base, ind_or_offs, val, tmp1, tmp2, tmp3, needs_frame);
 292 
 293   // No need for post barrier if storing NULL
 294   if (val != noreg) {
 295     if (precise) {
 296       if (ind_or_offs.is_constant()) {
 297         __ add_const_optimized(base, base, ind_or_offs.as_constant(), tmp1);
 298       } else {
 299         __ add(base, ind_or_offs.as_register(), base);
 300       }
 301     }
 302     g1_write_barrier_post(masm, decorators, base, val, tmp1, tmp2, tmp3);
 303   }
 304 }
 305 
 306 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 307                                     Register base, RegisterOrConstant ind_or_offs, Register dst,
 308                                     Register tmp1, Register tmp2, bool needs_frame, Label *L_handle_null) {
 309   bool on_oop = type == T_OBJECT || type == T_ARRAY;
 310   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
 311   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
 312   bool on_reference = on_weak || on_phantom;
 313   Label done;
 314   if (on_oop && on_reference && L_handle_null == NULL) { L_handle_null = &done; }
 315   // Load the value of the referent field.
 316   ModRefBarrierSetAssembler::load_at(masm, decorators, type, base, ind_or_offs, dst, tmp1, tmp2, needs_frame, L_handle_null);
 317   if (on_oop && on_reference) {
 318     // Generate the G1 pre-barrier code to log the value of
 319     // the referent field in an SATB buffer. Note with
 320     // these parameters the pre-barrier does not generate
 321     // the load of the previous value
 322     // We only reach here if value is not null.
 323     g1_write_barrier_pre(masm, decorators | IS_NOT_NULL, noreg /* obj */, (intptr_t)0, dst /* pre_val */,
 324                          tmp1, tmp2, needs_frame);
 325   }
 326   __ bind(done);
 327 }
 328 
 329 void G1BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2, bool needs_frame) {
 330   Label done, not_weak;
 331   __ cmpdi(CCR0, value, 0);
 332   __ beq(CCR0, done);         // Use NULL as-is.
 333 
 334   __ clrrdi(tmp1, value, JNIHandles::weak_tag_size);
 335   __ andi_(tmp2, value, JNIHandles::weak_tag_mask);
 336   __ ld(value, 0, tmp1);      // Resolve (untagged) jobject.
 337 
 338   __ beq(CCR0, not_weak);     // Test for jweak tag.
 339   __ verify_oop(value);
 340   g1_write_barrier_pre(masm, IN_NATIVE | ON_PHANTOM_OOP_REF,
 341                        noreg, noreg, value,
 342                        tmp1, tmp2, needs_frame);
 343   __ bind(not_weak);
 344   __ verify_oop(value);
 345   __ bind(done);
 346 }
 347 
 348 #ifdef COMPILER1
 349 
 350 #undef __
 351 #define __ ce->masm()->
 352 
 353 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
 354   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 355   // At this point we know that marking is in progress.
 356   // If do_load() is true then we have to emit the
 357   // load of the previous value; otherwise it has already
 358   // been loaded into _pre_val.
 359 
 360   __ bind(*stub->entry());
 361 
 362   assert(stub->pre_val()->is_register(), "Precondition.");
 363   Register pre_val_reg = stub->pre_val()->as_register();
 364 
 365   if (stub->do_load()) {
 366     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
 367   }
 368 
 369   __ cmpdi(CCR0, pre_val_reg, 0);
 370   __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation());
 371 
 372   address c_code = bs->pre_barrier_c1_runtime_code_blob()->code_begin();
 373   //__ load_const_optimized(R0, c_code);
 374   __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(c_code));
 375   __ std(pre_val_reg, -8, R1_SP); // Pass pre_val on stack.
 376   __ mtctr(R0);
 377   __ bctrl();
 378   __ b(*stub->continuation());
 379 }
 380 
 381 void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
 382   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 383   __ bind(*stub->entry());
 384 
 385   assert(stub->addr()->is_register(), "Precondition.");
 386   assert(stub->new_val()->is_register(), "Precondition.");
 387   Register addr_reg = stub->addr()->as_pointer_register();
 388   Register new_val_reg = stub->new_val()->as_register();
 389 
 390   __ cmpdi(CCR0, new_val_reg, 0);
 391   __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation());
 392 
 393   address c_code = bs->post_barrier_c1_runtime_code_blob()->code_begin();
 394   //__ load_const_optimized(R0, c_code);
 395   __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(c_code));
 396   __ mtctr(R0);
 397   __ mr(R0, addr_reg); // Pass addr in R0.
 398   __ bctrl();
 399   __ b(*stub->continuation());
 400 }
 401 
 402 #undef __
 403 #define __ sasm->
 404 
 405 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 406   BarrierSet* bs = BarrierSet::barrier_set();
 407 
 408   __ set_info("g1_pre_barrier_slow_id", false);
 409 
 410   // Using stack slots: pre_val (pre-pushed), spill tmp, spill tmp2.
 411   const int stack_slots = 3;
 412   Register pre_val = R0; // previous value of memory
 413   Register tmp  = R14;
 414   Register tmp2 = R15;
 415 
 416   Label refill, restart, marking_not_active;
 417   int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
 418   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
 419   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
 420 
 421   // Spill
 422   __ std(tmp, -16, R1_SP);
 423   __ std(tmp2, -24, R1_SP);
 424 
 425   // Is marking still active?
 426   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 427     __ lwz(tmp, satb_q_active_byte_offset, R16_thread);
 428   } else {
 429     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 430     __ lbz(tmp, satb_q_active_byte_offset, R16_thread);
 431   }
 432   __ cmpdi(CCR0, tmp, 0);
 433   __ beq(CCR0, marking_not_active);
 434 
 435   __ bind(restart);
 436   // Load the index into the SATB buffer. SATBMarkQueue::_index is a
 437   // size_t so ld_ptr is appropriate.
 438   __ ld(tmp, satb_q_index_byte_offset, R16_thread);
 439 
 440   // index == 0?
 441   __ cmpdi(CCR0, tmp, 0);
 442   __ beq(CCR0, refill);
 443 
 444   __ ld(tmp2, satb_q_buf_byte_offset, R16_thread);
 445   __ ld(pre_val, -8, R1_SP); // Load from stack.
 446   __ addi(tmp, tmp, -oopSize);
 447 
 448   __ std(tmp, satb_q_index_byte_offset, R16_thread);
 449   __ stdx(pre_val, tmp2, tmp); // [_buf + index] := <address_of_card>
 450 
 451   __ bind(marking_not_active);
 452   // Restore temp registers and return-from-leaf.
 453   __ ld(tmp2, -24, R1_SP);
 454   __ ld(tmp, -16, R1_SP);
 455   __ blr();
 456 
 457   __ bind(refill);
 458   const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
 459   __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
 460   __ mflr(R0);
 461   __ std(R0, _abi(lr), R1_SP);
 462   __ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
 463   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1SATBMarkQueueSet::handle_zero_index_for_thread), R16_thread);
 464   __ pop_frame();
 465   __ ld(R0, _abi(lr), R1_SP);
 466   __ mtlr(R0);
 467   __ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
 468   __ b(restart);
 469 }
 470 
 471 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
 472   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
 473 
 474   __ set_info("g1_post_barrier_slow_id", false);
 475 
 476   // Using stack slots: spill addr, spill tmp2
 477   const int stack_slots = 2;
 478   Register tmp = R0;
 479   Register addr = R14;
 480   Register tmp2 = R15;
 481   jbyte* byte_map_base = bs->card_table()->byte_map_base();
 482 
 483   Label restart, refill, ret;
 484 
 485   // Spill
 486   __ std(addr, -8, R1_SP);
 487   __ std(tmp2, -16, R1_SP);
 488 
 489   __ srdi(addr, R0, CardTable::card_shift); // Addr is passed in R0.
 490   __ load_const_optimized(/*cardtable*/ tmp2, byte_map_base, tmp);
 491   __ add(addr, tmp2, addr);
 492   __ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
 493 
 494   // Return if young card.
 495   __ cmpwi(CCR0, tmp, G1CardTable::g1_young_card_val());
 496   __ beq(CCR0, ret);
 497 
 498   // Return if sequential consistent value is already dirty.
 499   __ membar(Assembler::StoreLoad);
 500   __ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
 501 
 502   __ cmpwi(CCR0, tmp, G1CardTable::dirty_card_val());
 503   __ beq(CCR0, ret);
 504 
 505   // Not dirty.
 506 
 507   // First, dirty it.
 508   __ li(tmp, G1CardTable::dirty_card_val());
 509   __ stb(tmp, 0, addr);
 510 
 511   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
 512   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
 513 
 514   __ bind(restart);
 515 
 516   // Get the index into the update buffer. G1DirtyCardQueue::_index is
 517   // a size_t so ld_ptr is appropriate here.
 518   __ ld(tmp2, dirty_card_q_index_byte_offset, R16_thread);
 519 
 520   // index == 0?
 521   __ cmpdi(CCR0, tmp2, 0);
 522   __ beq(CCR0, refill);
 523 
 524   __ ld(tmp, dirty_card_q_buf_byte_offset, R16_thread);
 525   __ addi(tmp2, tmp2, -oopSize);
 526 
 527   __ std(tmp2, dirty_card_q_index_byte_offset, R16_thread);
 528   __ add(tmp2, tmp, tmp2);
 529   __ std(addr, 0, tmp2); // [_buf + index] := <address_of_card>
 530 
 531   // Restore temp registers and return-from-leaf.
 532   __ bind(ret);
 533   __ ld(tmp2, -16, R1_SP);
 534   __ ld(addr, -8, R1_SP);
 535   __ blr();
 536 
 537   __ bind(refill);
 538   const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
 539   __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
 540   __ mflr(R0);
 541   __ std(R0, _abi(lr), R1_SP);
 542   __ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
 543   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1DirtyCardQueueSet::handle_zero_index_for_thread), R16_thread);
 544   __ pop_frame();
 545   __ ld(R0, _abi(lr), R1_SP);
 546   __ mtlr(R0);
 547   __ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
 548   __ b(restart);
 549 }
 550 
 551 #undef __
 552 
 553 #endif // COMPILER1