1 /*
   2  * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "c1/c1_LIRAssembler.hpp"
  28 #include "c1/c1_MacroAssembler.hpp"
  29 #include "gc/g1/c1G1BSCodeGen.hpp"
  30 #include "gc/g1/g1BarrierSet.hpp"
  31 #include "gc/g1/g1CardTable.hpp"
  32 #include "gc/g1/g1BSCodeGen.hpp"
  33 #include "gc/g1/heapRegion.hpp"
  34 #include "gc/shared/collectedHeap.hpp"
  35 #include "runtime/thread.hpp"
  36 #include "interpreter/interp_masm.hpp"
  37 
  38 #define __ masm->
  39 
  40 void G1BSCodeGen::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count) {
  41   bool dest_uninitialized = (decorators & DEST_NOT_INITIALIZED) != 0;
  42   // With G1, don't generate the call if we statically know that the target in uninitialized
  43   if (!dest_uninitialized) {
  44     __ save_frame(0);
  45     // Save the necessary global regs... will be used after.
  46     if (addr->is_global()) {
  47       __ mov(addr, L0);
  48     }
  49     if (count->is_global()) {
  50       __ mov(count, L1);
  51     }
  52     __ mov(addr->after_save(), O0);
  53     // Get the count into O1
  54     __ call(CAST_FROM_FN_PTR(address, ModRefBarrierSet::static_write_ref_array_pre));
  55     __ delayed()->mov(count->after_save(), O1);
  56     if (addr->is_global()) {
  57       __ mov(L0, addr);
  58     }
  59     if (count->is_global()) {
  60       __ mov(L1, count);
  61     }
  62     __ restore();
  63   }
  64 }
  65 
  66 void G1BSCodeGen::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp) {
  67   // Get some new fresh output registers.
  68   __ save_frame(0);
  69   __ mov(addr->after_save(), O0);
  70   __ call(CAST_FROM_FN_PTR(address, ModRefBarrierSet::static_write_ref_array_post));
  71   __ delayed()->mov(count->after_save(), O1);
  72   __ restore();
  73 }
  74 
  75 #undef __
  76 
  77 static address satb_log_enqueue_with_frame = NULL;
  78 static u_char* satb_log_enqueue_with_frame_end = NULL;
  79 
  80 static address satb_log_enqueue_frameless = NULL;
  81 static u_char* satb_log_enqueue_frameless_end = NULL;
  82 
  83 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
  84 
  85 static void generate_satb_log_enqueue(bool with_frame) {
  86   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
  87   CodeBuffer buf(bb);
  88   MacroAssembler masm(&buf);
  89 
  90 #define __ masm.
  91 
  92   address start = __ pc();
  93   Register pre_val;
  94 
  95   Label refill, restart;
  96   if (with_frame) {
  97     __ save_frame(0);
  98     pre_val = I0;  // Was O0 before the save.
  99   } else {
 100     pre_val = O0;
 101   }
 102 
 103   int satb_q_index_byte_offset =
 104     in_bytes(JavaThread::satb_mark_queue_offset() +
 105              SATBMarkQueue::byte_offset_of_index());
 106 
 107   int satb_q_buf_byte_offset =
 108     in_bytes(JavaThread::satb_mark_queue_offset() +
 109              SATBMarkQueue::byte_offset_of_buf());
 110 
 111   assert(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t) &&
 112          in_bytes(SATBMarkQueue::byte_width_of_buf()) == sizeof(intptr_t),
 113          "check sizes in assembly below");
 114 
 115   __ bind(restart);
 116 
 117   // Load the index into the SATB buffer. SATBMarkQueue::_index is a size_t
 118   // so ld_ptr is appropriate.
 119   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
 120 
 121   // index == 0?
 122   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
 123 
 124   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
 125   __ sub(L0, oopSize, L0);
 126 
 127   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
 128   if (!with_frame) {
 129     // Use return-from-leaf
 130     __ retl();
 131     __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
 132   } else {
 133     // Not delayed.
 134     __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
 135   }
 136   if (with_frame) {
 137     __ ret();
 138     __ delayed()->restore();
 139   }
 140   __ bind(refill);
 141 
 142   address handle_zero =
 143     CAST_FROM_FN_PTR(address,
 144                      &SATBMarkQueueSet::handle_zero_index_for_thread);
 145   // This should be rare enough that we can afford to save all the
 146   // scratch registers that the calling context might be using.
 147   __ mov(G1_scratch, L0);
 148   __ mov(G3_scratch, L1);
 149   __ mov(G4, L2);
 150   // We need the value of O0 above (for the write into the buffer), so we
 151   // save and restore it.
 152   __ mov(O0, L3);
 153   // Since the call will overwrite O7, we save and restore that, as well.
 154   __ mov(O7, L4);
 155   __ call_VM_leaf(L5, handle_zero, G2_thread);
 156   __ mov(L0, G1_scratch);
 157   __ mov(L1, G3_scratch);
 158   __ mov(L2, G4);
 159   __ mov(L3, O0);
 160   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 161   __ delayed()->mov(L4, O7);
 162 
 163   if (with_frame) {
 164     satb_log_enqueue_with_frame = start;
 165     satb_log_enqueue_with_frame_end = __ pc();
 166   } else {
 167     satb_log_enqueue_frameless = start;
 168     satb_log_enqueue_frameless_end = __ pc();
 169   }
 170 
 171 #undef __
 172 }
 173 
 174 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
 175   if (with_frame) {
 176     if (satb_log_enqueue_with_frame == 0) {
 177       generate_satb_log_enqueue(with_frame);
 178       assert(satb_log_enqueue_with_frame != 0, "postcondition.");
 179     }
 180   } else {
 181     if (satb_log_enqueue_frameless == 0) {
 182       generate_satb_log_enqueue(with_frame);
 183       assert(satb_log_enqueue_frameless != 0, "postcondition.");
 184     }
 185   }
 186 }
 187 
 188 #define __ masm->
 189 
 190 void G1BSCodeGen::g1_write_barrier_pre(MacroAssembler* masm,
 191                                        Register obj,
 192                                        Register index,
 193                                        int offset,
 194                                        Register pre_val,
 195                                        Register tmp,
 196                                        bool preserve_o_regs) {
 197   Label filtered;
 198 
 199   if (obj == noreg) {
 200     // We are not loading the previous value so make
 201     // sure that we don't trash the value in pre_val
 202     // with the code below.
 203     assert_different_registers(pre_val, tmp);
 204   } else {
 205     // We will be loading the previous value
 206     // in this code so...
 207     assert(offset == 0 || index == noreg, "choose one");
 208     assert(pre_val == noreg, "check this code");
 209   }
 210 
 211   // Is marking active?
 212   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 213     __ ld(G2,
 214           in_bytes(JavaThread::satb_mark_queue_offset() +
 215                    SATBMarkQueue::byte_offset_of_active()),
 216           tmp);
 217   } else {
 218     guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1,
 219               "Assumption");
 220     __ ldsb(G2,
 221             in_bytes(JavaThread::satb_mark_queue_offset() +
 222                      SATBMarkQueue::byte_offset_of_active()),
 223             tmp);
 224   }
 225 
 226   // Is marking active?
 227   __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 228 
 229   // Do we need to load the previous value?
 230   if (obj != noreg) {
 231     // Load the previous value...
 232     if (index == noreg) {
 233       if (Assembler::is_simm13(offset)) {
 234         __ load_heap_oop(obj, offset, tmp);
 235       } else {
 236         __ set(offset, tmp);
 237         __ load_heap_oop(obj, tmp, tmp);
 238       }
 239     } else {
 240       __ load_heap_oop(obj, index, tmp);
 241     }
 242     // Previous value has been loaded into tmp
 243     pre_val = tmp;
 244   }
 245 
 246   assert(pre_val != noreg, "must have a real register");
 247 
 248   // Is the previous value null?
 249   __ cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
 250 
 251   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
 252   // case, pre_val will be a scratch G-reg, but there are some cases in
 253   // which it's an O-reg.  In the first case, do a normal call.  In the
 254   // latter, do a save here and call the frameless version.
 255 
 256   guarantee(pre_val->is_global() || pre_val->is_out(),
 257             "Or we need to think harder.");
 258 
 259   if (pre_val->is_global() && !preserve_o_regs) {
 260     generate_satb_log_enqueue_if_necessary(true); // with frame
 261 
 262     __ call(satb_log_enqueue_with_frame);
 263     __ delayed()->mov(pre_val, O0);
 264   } else {
 265     generate_satb_log_enqueue_if_necessary(false); // frameless
 266 
 267     __ save_frame(0);
 268     __ call(satb_log_enqueue_frameless);
 269     __ delayed()->mov(pre_val->after_save(), O0);
 270     __ restore();
 271   }
 272 
 273   __ bind(filtered);
 274 }
 275 
 276 #undef __
 277 
 278 static address dirty_card_log_enqueue = 0;
 279 static u_char* dirty_card_log_enqueue_end = 0;
 280 
 281 // This gets to assume that o0 contains the object address.
 282 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
 283   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
 284   CodeBuffer buf(bb);
 285   MacroAssembler masm(&buf);
 286 #define __ masm.
 287   address start = __ pc();
 288 
 289   Label not_already_dirty, restart, refill, young_card;
 290 
 291 #ifdef _LP64
 292   __ srlx(O0, CardTable::card_shift, O0);
 293 #else
 294   __ srl(O0, CardTable::card_shift, O0);
 295 #endif
 296   AddressLiteral addrlit((address)byte_map_base);
 297   __ set(addrlit, O1); // O1 := <card table base>
 298   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
 299 
 300   __ cmp_and_br_short(O2, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
 301 
 302   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
 303   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
 304 
 305   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
 306   __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 307 
 308   __ bind(young_card);
 309   // We didn't take the branch, so we're already dirty: return.
 310   // Use return-from-leaf
 311   __ retl();
 312   __ delayed()->nop();
 313 
 314   // Not dirty.
 315   __ bind(not_already_dirty);
 316 
 317   // Get O0 + O1 into a reg by itself
 318   __ add(O0, O1, O3);
 319 
 320   // First, dirty it.
 321   __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
 322 
 323   int dirty_card_q_index_byte_offset =
 324     in_bytes(JavaThread::dirty_card_queue_offset() +
 325              DirtyCardQueue::byte_offset_of_index());
 326   int dirty_card_q_buf_byte_offset =
 327     in_bytes(JavaThread::dirty_card_queue_offset() +
 328              DirtyCardQueue::byte_offset_of_buf());
 329   __ bind(restart);
 330 
 331   // Load the index into the update buffer. DirtyCardQueue::_index is
 332   // a size_t so ld_ptr is appropriate here.
 333   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
 334 
 335   // index == 0?
 336   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
 337 
 338   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
 339   __ sub(L0, oopSize, L0);
 340 
 341   __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
 342   // Use return-from-leaf
 343   __ retl();
 344   __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
 345 
 346   __ bind(refill);
 347   address handle_zero =
 348     CAST_FROM_FN_PTR(address,
 349                      &DirtyCardQueueSet::handle_zero_index_for_thread);
 350   // This should be rare enough that we can afford to save all the
 351   // scratch registers that the calling context might be using.
 352   __ mov(G1_scratch, L3);
 353   __ mov(G3_scratch, L5);
 354   // We need the value of O3 above (for the write into the buffer), so we
 355   // save and restore it.
 356   __ mov(O3, L6);
 357   // Since the call will overwrite O7, we save and restore that, as well.
 358   __ mov(O7, L4);
 359 
 360   __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
 361   __ mov(L3, G1_scratch);
 362   __ mov(L5, G3_scratch);
 363   __ mov(L6, O3);
 364   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 365   __ delayed()->mov(L4, O7);
 366 
 367   dirty_card_log_enqueue = start;
 368   dirty_card_log_enqueue_end = __ pc();
 369   // XXX Should have a guarantee here about not going off the end!
 370   // Does it already do so?  Do an experiment...
 371 
 372 #undef __
 373 
 374 }
 375 
 376 static inline void
 377 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
 378   if (dirty_card_log_enqueue == 0) {
 379     generate_dirty_card_log_enqueue(byte_map_base);
 380     assert(dirty_card_log_enqueue != 0, "postcondition.");
 381   }
 382 }
 383 
 384 #define __ masm->
 385 
 386 void G1BSCodeGen::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) {
 387   Label filtered;
 388   MacroAssembler* post_filter_masm = masm;
 389 
 390   if (new_val == G0) return;
 391 
 392   G1BarrierSet* bs =
 393     barrier_set_cast<G1BarrierSet>(Universe::heap()->barrier_set());
 394 
 395   if (G1RSBarrierRegionFilter) {
 396     __ xor3(store_addr, new_val, tmp);
 397 #ifdef _LP64
 398     __ srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
 399 #else
 400     __ srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
 401 #endif
 402 
 403     __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 404   }
 405 
 406   // If the "store_addr" register is an "in" or "local" register, move it to
 407   // a scratch reg so we can pass it as an argument.
 408   bool use_scr = !(store_addr->is_global() || store_addr->is_out());
 409   // Pick a scratch register different from "tmp".
 410   Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
 411   // Make sure we use up the delay slot!
 412   if (use_scr) {
 413     post_filter_masm->mov(store_addr, scr);
 414   } else {
 415     post_filter_masm->nop();
 416   }
 417   generate_dirty_card_log_enqueue_if_necessary(bs->card_table()->byte_map_base());
 418   __ save_frame(0);
 419   __ call(dirty_card_log_enqueue);
 420   if (use_scr) {
 421     __ delayed()->mov(scr, O0);
 422   } else {
 423     __ delayed()->mov(store_addr->after_save(), O0);
 424   }
 425   __ restore();
 426 
 427   __ bind(filtered);
 428 }
 429 
 430 void G1BSCodeGen::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 431                                             Register base, Register index, int offset, Register val, Register tmp) {
 432   bool on_array = (decorators & ACCESS_ON_ARRAY) != 0;
 433   bool on_anonymous = (decorators & ACCESS_ON_ANONYMOUS) != 0;
 434   bool precise = on_array || on_anonymous;
 435   // Load and record the previous value.
 436   g1_write_barrier_pre(masm, base, index, offset,
 437                        noreg /* pre_val */,
 438                        tmp, true /*preserve_o_regs*/);
 439 
 440   // G1 barrier needs uncompressed oop for region cross check.
 441   Register new_val = val;
 442   if (UseCompressedOops && val != G0) {
 443     new_val = tmp;
 444     __ mov(val, new_val);
 445   }
 446 
 447   BarrierSetCodeGen::store_at(masm, decorators, type, base, index, offset, val, tmp);
 448 
 449   // No need for post barrier if storing NULL
 450   if (val != G0) {
 451     if (precise) {
 452       if (index == noreg) {
 453         __ add(base, offset, base);
 454       } else {
 455         __ add(base, index, base);
 456       }
 457     }
 458     g1_write_barrier_post(masm, base, new_val, tmp);
 459   }
 460 }
 461 
 462 void G1BSCodeGen::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 463                           Register base, Register index, int offset, Register dst, Register tmp) {
 464   bool on_oop = type == T_OBJECT || type == T_ARRAY;
 465   bool on_weak = (decorators & GC_ACCESS_ON_WEAK) != 0;
 466   bool on_phantom = (decorators & GC_ACCESS_ON_PHANTOM) != 0;
 467   bool on_reference = on_weak || on_phantom;
 468   // Load the value of the referent field.
 469   ModRefBSCodeGen::load_at(masm, decorators, type, base, index, offset, dst, tmp);
 470   if (on_oop && on_reference) {
 471     // Generate the G1 pre-barrier code to log the value of
 472     // the referent field in an SATB buffer. Note with
 473     // these parameters the pre-barrier does not generate
 474     // the load of the previous value
 475 
 476     g1_write_barrier_pre(masm, noreg /* obj */, noreg /* index */, 0 /* offset */,
 477                          dst /* pre_val */,
 478                          tmp /* tmp */,
 479                          true /* preserve_o_regs */);
 480   }
 481 }
 482 
 483 #undef __
 484 #define __ ce->masm()->
 485 
 486 void G1BSCodeGen::gen_g1_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
 487   C1G1BSCodeGen* code_gen = (C1G1BSCodeGen*)Universe::heap()->barrier_set()->c1_code_gen();
 488   // At this point we know that marking is in progress.
 489   // If do_load() is true then we have to emit the
 490   // load of the previous value; otherwise it has already
 491   // been loaded into _pre_val.
 492 
 493   __ bind(stub->_entry);
 494 
 495   assert(stub->pre_val()->is_register(), "Precondition.");
 496   Register pre_val_reg = stub->pre_val()->as_register();
 497 
 498   if (stub->do_load()) {
 499     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
 500   }
 501 
 502   if (__ is_in_wdisp16_range(stub->_continuation)) {
 503     __ br_null(pre_val_reg, /*annul*/false, Assembler::pt, stub->_continuation);
 504   } else {
 505     __ cmp(pre_val_reg, G0);
 506     __ brx(Assembler::equal, false, Assembler::pn, stub->_continuation);
 507   }
 508   __ delayed()->nop();
 509 
 510   __ call(code_gen->pre_barrier_c1_runtime_code_blob()->code_begin());
 511   __ delayed()->mov(pre_val_reg, G4);
 512   __ br(Assembler::always, false, Assembler::pt, stub->_continuation);
 513   __ delayed()->nop();
 514 }
 515 
 516 void G1BSCodeGen::gen_g1_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
 517   C1G1BSCodeGen* code_gen = (C1G1BSCodeGen*)Universe::heap()->barrier_set()->c1_code_gen();
 518   __ bind(stub->_entry);
 519 
 520   assert(stub->addr()->is_register(), "Precondition.");
 521   assert(stub->new_val()->is_register(), "Precondition.");
 522   Register addr_reg = stub->addr()->as_pointer_register();
 523   Register new_val_reg = stub->new_val()->as_register();
 524 
 525   if (__ is_in_wdisp16_range(stub->_continuation)) {
 526     __ br_null(new_val_reg, /*annul*/false, Assembler::pt, stub->_continuation);
 527   } else {
 528     __ cmp(new_val_reg, G0);
 529     __ brx(Assembler::equal, false, Assembler::pn, stub->_continuation);
 530   }
 531   __ delayed()->nop();
 532 
 533   __ call(code_gen->post_barrier_c1_runtime_code_blob()->code_begin());
 534   __ delayed()->mov(addr_reg, G4);
 535   __ br(Assembler::always, false, Assembler::pt, stub->_continuation);
 536   __ delayed()->nop();
 537 }
 538 
 539 #undef __
 540 #define __ sasm->
 541 
 542 void G1BSCodeGen::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 543   __ prologue("g1_pre_barrier", false);
 544 
 545   // G4: previous value of memory
 546 
 547   Register pre_val = G4;
 548   Register tmp  = G1_scratch;
 549   Register tmp2 = G3_scratch;
 550 
 551   Label refill, restart;
 552   int satb_q_active_byte_offset =
 553     in_bytes(JavaThread::satb_mark_queue_offset() +
 554              SATBMarkQueue::byte_offset_of_active());
 555   int satb_q_index_byte_offset =
 556     in_bytes(JavaThread::satb_mark_queue_offset() +
 557              SATBMarkQueue::byte_offset_of_index());
 558   int satb_q_buf_byte_offset =
 559     in_bytes(JavaThread::satb_mark_queue_offset() +
 560              SATBMarkQueue::byte_offset_of_buf());
 561 
 562   // Is marking still active?
 563   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 564     __ ld(G2_thread, satb_q_active_byte_offset, tmp);
 565   } else {
 566     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 567     __ ldsb(G2_thread, satb_q_active_byte_offset, tmp);
 568   }
 569   __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, restart);
 570   __ retl();
 571   __ delayed()->nop();
 572 
 573   __ bind(restart);
 574   // Load the index into the SATB buffer. SATBMarkQueue::_index is a
 575   // size_t so ld_ptr is appropriate
 576   __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
 577 
 578   // index == 0?
 579   __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill);
 580 
 581   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
 582   __ sub(tmp, oopSize, tmp);
 583 
 584   __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
 585   // Use return-from-leaf
 586   __ retl();
 587   __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);
 588 
 589   __ bind(refill);
 590 
 591   __ save_live_registers_no_oop_map(true);
 592 
 593   __ call_VM_leaf(L7_thread_cache,
 594                   CAST_FROM_FN_PTR(address,
 595                                    SATBMarkQueueSet::handle_zero_index_for_thread),
 596                                    G2_thread);
 597 
 598   __ restore_live_registers(true);
 599 
 600   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 601   __ epilogue();
 602 }
 603 
 604 void G1BSCodeGen::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
 605   __ prologue("g1_post_barrier", false);
 606 
 607   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(Universe::heap()->barrier_set());
 608 
 609   Register addr = G4;
 610   Register cardtable = G5;
 611   Register tmp  = G1_scratch;
 612   Register tmp2 = G3_scratch;
 613   jbyte* byte_map_base = bs->card_table()->byte_map_base();
 614 
 615   Label not_already_dirty, restart, refill, young_card;
 616 
 617 #ifdef _LP64
 618   __ srlx(addr, CardTable::card_shift, addr);
 619 #else
 620   __ srl(addr, CardTable::card_shift, addr);
 621 #endif
 622 
 623   AddressLiteral rs((address)byte_map_base);
 624   __ set(rs, cardtable);         // cardtable := <card table base>
 625   __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
 626 
 627   __ cmp_and_br_short(tmp, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
 628 
 629   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
 630   __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
 631 
 632   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
 633   __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 634 
 635   __ bind(young_card);
 636   // We didn't take the branch, so we're already dirty: return.
 637   // Use return-from-leaf
 638   __ retl();
 639   __ delayed()->nop();
 640 
 641   // Not dirty.
 642   __ bind(not_already_dirty);
 643 
 644   // Get cardtable + tmp into a reg by itself
 645   __ add(addr, cardtable, tmp2);
 646 
 647   // First, dirty it.
 648   __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).
 649 
 650   Register tmp3 = cardtable;
 651   Register tmp4 = tmp;
 652 
 653   // these registers are now dead
 654   addr = cardtable = tmp = noreg;
 655 
 656   int dirty_card_q_index_byte_offset =
 657     in_bytes(JavaThread::dirty_card_queue_offset() +
 658              DirtyCardQueue::byte_offset_of_index());
 659   int dirty_card_q_buf_byte_offset =
 660     in_bytes(JavaThread::dirty_card_queue_offset() +
 661              DirtyCardQueue::byte_offset_of_buf());
 662 
 663   __ bind(restart);
 664 
 665   // Get the index into the update buffer. DirtyCardQueue::_index is
 666   // a size_t so ld_ptr is appropriate here.
 667   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
 668 
 669   // index == 0?
 670   __ cmp_and_brx_short(tmp3, G0, Assembler::equal,  Assembler::pn, refill);
 671 
 672   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
 673   __ sub(tmp3, oopSize, tmp3);
 674 
 675   __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
 676   // Use return-from-leaf
 677   __ retl();
 678   __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);
 679 
 680   __ bind(refill);
 681 
 682   __ save_live_registers_no_oop_map(true);
 683 
 684   __ call_VM_leaf(L7_thread_cache,
 685                   CAST_FROM_FN_PTR(address,
 686                                    DirtyCardQueueSet::handle_zero_index_for_thread),
 687                                    G2_thread);
 688 
 689   __ restore_live_registers(true);
 690 
 691   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 692   __ epilogue();
 693 }
 694 
 695 #undef __