1 /*
   2  * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "gc/g1/g1BarrierSet.hpp"
  28 #include "gc/g1/g1BarrierSetAssembler.hpp"
  29 #include "gc/g1/g1BarrierSetRuntime.hpp"
  30 #include "gc/g1/g1CardTable.hpp"
  31 #include "gc/g1/g1DirtyCardQueue.hpp"
  32 #include "gc/g1/g1SATBMarkQueueSet.hpp"
  33 #include "gc/g1/g1ThreadLocalData.hpp"
  34 #include "gc/g1/heapRegion.hpp"
  35 #include "interpreter/interp_masm.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "utilities/macros.hpp"
  38 #ifdef COMPILER1
  39 #include "c1/c1_LIRAssembler.hpp"
  40 #include "c1/c1_MacroAssembler.hpp"
  41 #include "gc/g1/c1/g1BarrierSetC1.hpp"
  42 #endif
  43 
  44 #define __ masm->
  45 
  46 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
  47                                                             Register addr, Register count) {
  48   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  49   // With G1, don't generate the call if we statically know that the target in uninitialized
  50   if (!dest_uninitialized) {
  51     Register tmp = O5;
  52     assert_different_registers(addr, count, tmp);
  53     Label filtered;
  54     // Is marking active?
  55     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
  56       __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
  57     } else {
  58       guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
  59       __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
  60     }
  61     // Is marking active?
  62     __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
  63 
  64     __ save_frame(0);
  65     // Save the necessary global regs... will be used after.
  66     if (addr->is_global()) {
  67       __ mov(addr, L0);
  68     }
  69     if (count->is_global()) {
  70       __ mov(count, L1);
  71     }
  72     __ mov(addr->after_save(), O0);
  73     // Get the count into O1
  74     address slowpath = UseCompressedOops ? CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry)
  75                                          : CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry);
  76     __ call(slowpath);
  77     __ delayed()->mov(count->after_save(), O1);
  78     if (addr->is_global()) {
  79       __ mov(L0, addr);
  80     }
  81     if (count->is_global()) {
  82       __ mov(L1, count);
  83     }
  84     __ restore();
  85 
  86     __ bind(filtered);
  87     DEBUG_ONLY(__ set(0xDEADC0DE, tmp);) // we have killed tmp
  88   }
  89 }
  90 
  91 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
  92                                                              Register addr, Register count, Register tmp) {
  93   // Get some new fresh output registers.
  94   __ save_frame(0);
  95   __ mov(addr->after_save(), O0);
  96   __ call(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry));
  97   __ delayed()->mov(count->after_save(), O1);
  98   __ restore();
  99 }
 100 
 101 #undef __
 102 
 103 static address satb_log_enqueue_with_frame = NULL;
 104 static u_char* satb_log_enqueue_with_frame_end = NULL;
 105 
 106 static address satb_log_enqueue_frameless = NULL;
 107 static u_char* satb_log_enqueue_frameless_end = NULL;
 108 
 109 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
 110 
 111 static void generate_satb_log_enqueue(bool with_frame) {
 112   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
 113   CodeBuffer buf(bb);
 114   MacroAssembler masm(&buf);
 115 
 116 #define __ masm.
 117 
 118   address start = __ pc();
 119   Register pre_val;
 120 
 121   Label refill, restart;
 122   if (with_frame) {
 123     __ save_frame(0);
 124     pre_val = I0;  // Was O0 before the save.
 125   } else {
 126     pre_val = O0;
 127   }
 128 
 129   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
 130   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
 131 
 132   assert(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t) &&
 133          in_bytes(SATBMarkQueue::byte_width_of_buf()) == sizeof(intptr_t),
 134          "check sizes in assembly below");
 135 
 136   __ bind(restart);
 137 
 138   // Load the index into the SATB buffer. SATBMarkQueue::_index is a size_t
 139   // so ld_ptr is appropriate.
 140   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
 141 
 142   // index == 0?
 143   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
 144 
 145   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
 146   __ sub(L0, oopSize, L0);
 147 
 148   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
 149   if (!with_frame) {
 150     // Use return-from-leaf
 151     __ retl();
 152     __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
 153   } else {
 154     // Not delayed.
 155     __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
 156   }
 157   if (with_frame) {
 158     __ ret();
 159     __ delayed()->restore();
 160   }
 161   __ bind(refill);
 162 
 163   address handle_zero =
 164     CAST_FROM_FN_PTR(address,
 165                      &G1SATBMarkQueueSet::handle_zero_index_for_thread);
 166   // This should be rare enough that we can afford to save all the
 167   // scratch registers that the calling context might be using.
 168   __ mov(G1_scratch, L0);
 169   __ mov(G3_scratch, L1);
 170   __ mov(G4, L2);
 171   // We need the value of O0 above (for the write into the buffer), so we
 172   // save and restore it.
 173   __ mov(O0, L3);
 174   // Since the call will overwrite O7, we save and restore that, as well.
 175   __ mov(O7, L4);
 176   __ call_VM_leaf(L5, handle_zero, G2_thread);
 177   __ mov(L0, G1_scratch);
 178   __ mov(L1, G3_scratch);
 179   __ mov(L2, G4);
 180   __ mov(L3, O0);
 181   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 182   __ delayed()->mov(L4, O7);
 183 
 184   if (with_frame) {
 185     satb_log_enqueue_with_frame = start;
 186     satb_log_enqueue_with_frame_end = __ pc();
 187   } else {
 188     satb_log_enqueue_frameless = start;
 189     satb_log_enqueue_frameless_end = __ pc();
 190   }
 191 
 192 #undef __
 193 }
 194 
 195 #define __ masm->
 196 
 197 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
 198                                                  Register obj,
 199                                                  Register index,
 200                                                  int offset,
 201                                                  Register pre_val,
 202                                                  Register tmp,
 203                                                  bool preserve_o_regs) {
 204   Label filtered;
 205 
 206   if (obj == noreg) {
 207     // We are not loading the previous value so make
 208     // sure that we don't trash the value in pre_val
 209     // with the code below.
 210     assert_different_registers(pre_val, tmp);
 211   } else {
 212     // We will be loading the previous value
 213     // in this code so...
 214     assert(offset == 0 || index == noreg, "choose one");
 215     assert(pre_val == noreg, "check this code");
 216   }
 217 
 218   // Is marking active?
 219   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 220     __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
 221   } else {
 222     guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 223     __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
 224   }
 225 
 226   // Is marking active?
 227   __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 228 
 229   // Do we need to load the previous value?
 230   if (obj != noreg) {
 231     // Load the previous value...
 232     if (index == noreg) {
 233       if (Assembler::is_simm13(offset)) {
 234         __ load_heap_oop(obj, offset, tmp);
 235       } else {
 236         __ set(offset, tmp);
 237         __ load_heap_oop(obj, tmp, tmp);
 238       }
 239     } else {
 240       __ load_heap_oop(obj, index, tmp);
 241     }
 242     // Previous value has been loaded into tmp
 243     pre_val = tmp;
 244   }
 245 
 246   assert(pre_val != noreg, "must have a real register");
 247 
 248   // Is the previous value null?
 249   __ cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
 250 
 251   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
 252   // case, pre_val will be a scratch G-reg, but there are some cases in
 253   // which it's an O-reg.  In the first case, do a normal call.  In the
 254   // latter, do a save here and call the frameless version.
 255 
 256   guarantee(pre_val->is_global() || pre_val->is_out(),
 257             "Or we need to think harder.");
 258 
 259   if (pre_val->is_global() && !preserve_o_regs) {
 260     __ call(satb_log_enqueue_with_frame);
 261     __ delayed()->mov(pre_val, O0);
 262   } else {
 263     __ save_frame(0);
 264     __ call(satb_log_enqueue_frameless);
 265     __ delayed()->mov(pre_val->after_save(), O0);
 266     __ restore();
 267   }
 268 
 269   __ bind(filtered);
 270 }
 271 
 272 #undef __
 273 
 274 static address dirty_card_log_enqueue = 0;
 275 static u_char* dirty_card_log_enqueue_end = 0;
 276 
 277 // This gets to assume that o0 contains the object address.
 278 static void generate_dirty_card_log_enqueue(CardTable::CardValue* byte_map_base) {
 279   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
 280   CodeBuffer buf(bb);
 281   MacroAssembler masm(&buf);
 282 #define __ masm.
 283   address start = __ pc();
 284 
 285   Label not_already_dirty, restart, refill, young_card;
 286 
 287   __ srlx(O0, CardTable::card_shift, O0);
 288   AddressLiteral addrlit(byte_map_base);
 289   __ set(addrlit, O1); // O1 := <card table base>
 290   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
 291 
 292   __ cmp_and_br_short(O2, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
 293 
 294   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
 295   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
 296 
 297   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
 298   __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 299 
 300   __ bind(young_card);
 301   // We didn't take the branch, so we're already dirty: return.
 302   // Use return-from-leaf
 303   __ retl();
 304   __ delayed()->nop();
 305 
 306   // Not dirty.
 307   __ bind(not_already_dirty);
 308 
 309   // Get O0 + O1 into a reg by itself
 310   __ add(O0, O1, O3);
 311 
 312   // First, dirty it.
 313   __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
 314 
 315   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
 316   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
 317   __ bind(restart);
 318 
 319   // Load the index into the update buffer. G1DirtyCardQueue::_index is
 320   // a size_t so ld_ptr is appropriate here.
 321   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
 322 
 323   // index == 0?
 324   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
 325 
 326   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
 327   __ sub(L0, oopSize, L0);
 328 
 329   __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
 330   // Use return-from-leaf
 331   __ retl();
 332   __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
 333 
 334   __ bind(refill);
 335   address handle_zero =
 336     CAST_FROM_FN_PTR(address,
 337                      &G1DirtyCardQueueSet::handle_zero_index_for_thread);
 338   // This should be rare enough that we can afford to save all the
 339   // scratch registers that the calling context might be using.
 340   __ mov(G1_scratch, L3);
 341   __ mov(G3_scratch, L5);
 342   // We need the value of O3 above (for the write into the buffer), so we
 343   // save and restore it.
 344   __ mov(O3, L6);
 345   // Since the call will overwrite O7, we save and restore that, as well.
 346   __ mov(O7, L4);
 347 
 348   __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
 349   __ mov(L3, G1_scratch);
 350   __ mov(L5, G3_scratch);
 351   __ mov(L6, O3);
 352   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 353   __ delayed()->mov(L4, O7);
 354 
 355   dirty_card_log_enqueue = start;
 356   dirty_card_log_enqueue_end = __ pc();
 357   // XXX Should have a guarantee here about not going off the end!
 358   // Does it already do so?  Do an experiment...
 359 
 360 #undef __
 361 
 362 }
 363 
 364 #define __ masm->
 365 
 366 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) {
 367   Label filtered;
 368   MacroAssembler* post_filter_masm = masm;
 369 
 370   if (new_val == G0) return;
 371 
 372   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
 373 
 374   __ xor3(store_addr, new_val, tmp);
 375   __ srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
 376 
 377   __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 378 
 379   // If the "store_addr" register is an "in" or "local" register, move it to
 380   // a scratch reg so we can pass it as an argument.
 381   bool use_scr = !(store_addr->is_global() || store_addr->is_out());
 382   // Pick a scratch register different from "tmp".
 383   Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
 384   // Make sure we use up the delay slot!
 385   if (use_scr) {
 386     post_filter_masm->mov(store_addr, scr);
 387   } else {
 388     post_filter_masm->nop();
 389   }
 390   __ save_frame(0);
 391   __ call(dirty_card_log_enqueue);
 392   if (use_scr) {
 393     __ delayed()->mov(scr, O0);
 394   } else {
 395     __ delayed()->mov(store_addr->after_save(), O0);
 396   }
 397   __ restore();
 398 
 399   __ bind(filtered);
 400 }
 401 
 402 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 403                                          Register val, Address dst, Register tmp) {
 404   bool in_heap = (decorators & IN_HEAP) != 0;
 405   bool as_normal = (decorators & AS_NORMAL) != 0;
 406   assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
 407 
 408   bool needs_pre_barrier = as_normal;
 409   // No need for post barrier if storing NULL
 410   bool needs_post_barrier = val != G0 && in_heap;
 411 
 412   bool is_array = (decorators & IS_ARRAY) != 0;
 413   bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
 414   bool precise = is_array || on_anonymous;
 415 
 416   Register index = dst.has_index() ? dst.index() : noreg;
 417   int disp = dst.has_disp() ? dst.disp() : 0;
 418 
 419   if (needs_pre_barrier) {
 420     // Load and record the previous value.
 421     g1_write_barrier_pre(masm, dst.base(), index, disp,
 422                          noreg /* pre_val */,
 423                          tmp, true /*preserve_o_regs*/);
 424   }
 425 
 426   Register new_val = val;
 427   if (needs_post_barrier) {
 428     // G1 barrier needs uncompressed oop for region cross check.
 429     if (UseCompressedOops && val != G0) {
 430       new_val = tmp;
 431       __ mov(val, new_val);
 432     }
 433   }
 434 
 435   BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp);
 436 
 437   if (needs_post_barrier) {
 438     Register base = dst.base();
 439     if (precise) {
 440       if (!dst.has_index()) {
 441         __ add(base, disp, base);
 442       } else {
 443         assert(!dst.has_disp(), "not supported yet");
 444         __ add(base, index, base);
 445       }
 446     }
 447     g1_write_barrier_post(masm, base, new_val, tmp);
 448   }
 449 }
 450 
 451 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 452                                     Address src, Register dst, Register tmp) {
 453   bool on_oop = is_reference_type(type);
 454   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
 455   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
 456   bool on_reference = on_weak || on_phantom;
 457   // Load the value of the referent field.
 458   ModRefBarrierSetAssembler::load_at(masm, decorators, type, src, dst, tmp);
 459   if (on_oop && on_reference) {
 460     // Generate the G1 pre-barrier code to log the value of
 461     // the referent field in an SATB buffer. Note with
 462     // these parameters the pre-barrier does not generate
 463     // the load of the previous value
 464 
 465     Register pre_val = dst;
 466     bool saved = false;
 467     if (pre_val->is_in()) {
 468       // The g1_write_barrier_pre method assumes that the pre_val
 469       // is not in an input register.
 470       __ save_frame_and_mov(0, pre_val, O0);
 471       pre_val = O0;
 472       saved = true;
 473     }
 474 
 475     g1_write_barrier_pre(masm, noreg /* obj */, noreg /* index */, 0 /* offset */,
 476                          pre_val /* pre_val */,
 477                          tmp /* tmp */,
 478                          true /* preserve_o_regs */);
 479 
 480     if (saved) {
 481       __ restore();
 482     }
 483   }
 484 }
 485 
 486 void G1BarrierSetAssembler::barrier_stubs_init() {
 487   if (dirty_card_log_enqueue == 0) {
 488     G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
 489     CardTable *ct = bs->card_table();
 490     generate_dirty_card_log_enqueue(ct->byte_map_base());
 491     assert(dirty_card_log_enqueue != 0, "postcondition.");
 492   }
 493   if (satb_log_enqueue_with_frame == 0) {
 494     generate_satb_log_enqueue(true);
 495     assert(satb_log_enqueue_with_frame != 0, "postcondition.");
 496   }
 497   if (satb_log_enqueue_frameless == 0) {
 498     generate_satb_log_enqueue(false);
 499     assert(satb_log_enqueue_frameless != 0, "postcondition.");
 500   }
 501 }
 502 
 503 #ifdef COMPILER1
 504 
 505 #undef __
 506 #define __ ce->masm()->
 507 
 508 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
 509   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 510   // At this point we know that marking is in progress.
 511   // If do_load() is true then we have to emit the
 512   // load of the previous value; otherwise it has already
 513   // been loaded into _pre_val.
 514 
 515   __ bind(*stub->entry());
 516 
 517   assert(stub->pre_val()->is_register(), "Precondition.");
 518   Register pre_val_reg = stub->pre_val()->as_register();
 519 
 520   if (stub->do_load()) {
 521     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
 522   }
 523 
 524   if (__ is_in_wdisp16_range(*stub->continuation())) {
 525     __ br_null(pre_val_reg, /*annul*/false, Assembler::pt, *stub->continuation());
 526   } else {
 527     __ cmp(pre_val_reg, G0);
 528     __ brx(Assembler::equal, false, Assembler::pn, *stub->continuation());
 529   }
 530   __ delayed()->nop();
 531 
 532   __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin());
 533   __ delayed()->mov(pre_val_reg, G4);
 534   __ br(Assembler::always, false, Assembler::pt, *stub->continuation());
 535   __ delayed()->nop();
 536 }
 537 
 538 void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
 539   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 540   __ bind(*stub->entry());
 541 
 542   assert(stub->addr()->is_register(), "Precondition.");
 543   assert(stub->new_val()->is_register(), "Precondition.");
 544   Register addr_reg = stub->addr()->as_pointer_register();
 545   Register new_val_reg = stub->new_val()->as_register();
 546 
 547   if (__ is_in_wdisp16_range(*stub->continuation())) {
 548     __ br_null(new_val_reg, /*annul*/false, Assembler::pt, *stub->continuation());
 549   } else {
 550     __ cmp(new_val_reg, G0);
 551     __ brx(Assembler::equal, false, Assembler::pn, *stub->continuation());
 552   }
 553   __ delayed()->nop();
 554 
 555   __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin());
 556   __ delayed()->mov(addr_reg, G4);
 557   __ br(Assembler::always, false, Assembler::pt, *stub->continuation());
 558   __ delayed()->nop();
 559 }
 560 
 561 #undef __
 562 #define __ sasm->
 563 
 564 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 565   __ prologue("g1_pre_barrier", false);
 566 
 567   // G4: previous value of memory
 568 
 569   Register pre_val = G4;
 570   Register tmp  = G1_scratch;
 571   Register tmp2 = G3_scratch;
 572 
 573   Label refill, restart;
 574   int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
 575   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
 576   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
 577 
 578   // Is marking still active?
 579   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 580     __ ld(G2_thread, satb_q_active_byte_offset, tmp);
 581   } else {
 582     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 583     __ ldsb(G2_thread, satb_q_active_byte_offset, tmp);
 584   }
 585   __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, restart);
 586   __ retl();
 587   __ delayed()->nop();
 588 
 589   __ bind(restart);
 590   // Load the index into the SATB buffer. SATBMarkQueue::_index is a
 591   // size_t so ld_ptr is appropriate
 592   __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
 593 
 594   // index == 0?
 595   __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill);
 596 
 597   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
 598   __ sub(tmp, oopSize, tmp);
 599 
 600   __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
 601   // Use return-from-leaf
 602   __ retl();
 603   __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);
 604 
 605   __ bind(refill);
 606 
 607   __ save_live_registers_no_oop_map(true);
 608 
 609   __ call_VM_leaf(L7_thread_cache,
 610                   CAST_FROM_FN_PTR(address,
 611                                    G1SATBMarkQueueSet::handle_zero_index_for_thread),
 612                   G2_thread);
 613 
 614   __ restore_live_registers(true);
 615 
 616   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 617   __ epilogue();
 618 }
 619 
 620 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
 621   __ prologue("g1_post_barrier", false);
 622 
 623   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
 624 
 625   Register addr = G4;
 626   Register cardtable = G5;
 627   Register tmp  = G1_scratch;
 628   Register tmp2 = G3_scratch;
 629   CardTable::CardValue* byte_map_base = bs->card_table()->byte_map_base();
 630 
 631   Label not_already_dirty, restart, refill, young_card;
 632 
 633 #ifdef _LP64
 634   __ srlx(addr, CardTable::card_shift, addr);
 635 #else
 636   __ srl(addr, CardTable::card_shift, addr);
 637 #endif
 638 
 639   AddressLiteral rs((address)byte_map_base);
 640   __ set(rs, cardtable);         // cardtable := <card table base>
 641   __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
 642 
 643   __ cmp_and_br_short(tmp, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
 644 
 645   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
 646   __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
 647 
 648   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
 649   __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 650 
 651   __ bind(young_card);
 652   // We didn't take the branch, so we're already dirty: return.
 653   // Use return-from-leaf
 654   __ retl();
 655   __ delayed()->nop();
 656 
 657   // Not dirty.
 658   __ bind(not_already_dirty);
 659 
 660   // Get cardtable + tmp into a reg by itself
 661   __ add(addr, cardtable, tmp2);
 662 
 663   // First, dirty it.
 664   __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).
 665 
 666   Register tmp3 = cardtable;
 667   Register tmp4 = tmp;
 668 
 669   // these registers are now dead
 670   addr = cardtable = tmp = noreg;
 671 
 672   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
 673   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
 674 
 675   __ bind(restart);
 676 
 677   // Get the index into the update buffer. G1DirtyCardQueue::_index is
 678   // a size_t so ld_ptr is appropriate here.
 679   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
 680 
 681   // index == 0?
 682   __ cmp_and_brx_short(tmp3, G0, Assembler::equal,  Assembler::pn, refill);
 683 
 684   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
 685   __ sub(tmp3, oopSize, tmp3);
 686 
 687   __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
 688   // Use return-from-leaf
 689   __ retl();
 690   __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);
 691 
 692   __ bind(refill);
 693 
 694   __ save_live_registers_no_oop_map(true);
 695 
 696   __ call_VM_leaf(L7_thread_cache,
 697                   CAST_FROM_FN_PTR(address,
 698                                    G1DirtyCardQueueSet::handle_zero_index_for_thread),
 699                   G2_thread);
 700 
 701   __ restore_live_registers(true);
 702 
 703   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 704   __ epilogue();
 705 }
 706 
 707 #undef __
 708 
 709 #endif // COMPILER1