1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "gc/g1/g1BarrierSet.hpp"
  28 #include "gc/g1/g1BarrierSetAssembler.hpp"
  29 #include "gc/g1/g1BarrierSetRuntime.hpp"
  30 #include "gc/g1/g1CardTable.hpp"
  31 #include "gc/g1/g1SATBMarkQueueSet.hpp"
  32 #include "gc/g1/g1ThreadLocalData.hpp"
  33 #include "gc/g1/heapRegion.hpp"
  34 #include "interpreter/interp_masm.hpp"
  35 #include "runtime/sharedRuntime.hpp"
  36 #include "utilities/macros.hpp"
  37 #ifdef COMPILER1
  38 #include "c1/c1_LIRAssembler.hpp"
  39 #include "c1/c1_MacroAssembler.hpp"
  40 #include "gc/g1/c1/g1BarrierSetC1.hpp"
  41 #endif
  42 
  43 #define __ masm->
  44 
  45 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
  46                                                             Register addr, Register count) {
  47   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
  48   // With G1, don't generate the call if we statically know that the target in uninitialized
  49   if (!dest_uninitialized) {
  50     Register tmp = O5;
  51     assert_different_registers(addr, count, tmp);
  52     Label filtered;
  53     // Is marking active?
  54     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
  55       __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
  56     } else {
  57       guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
  58       __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
  59     }
  60     // Is marking active?
  61     __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
  62 
  63     __ save_frame(0);
  64     // Save the necessary global regs... will be used after.
  65     if (addr->is_global()) {
  66       __ mov(addr, L0);
  67     }
  68     if (count->is_global()) {
  69       __ mov(count, L1);
  70     }
  71     __ mov(addr->after_save(), O0);
  72     // Get the count into O1
  73     address slowpath = UseCompressedOops ? CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry)
  74                                          : CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry);
  75     __ call(slowpath);
  76     __ delayed()->mov(count->after_save(), O1);
  77     if (addr->is_global()) {
  78       __ mov(L0, addr);
  79     }
  80     if (count->is_global()) {
  81       __ mov(L1, count);
  82     }
  83     __ restore();
  84 
  85     __ bind(filtered);
  86     DEBUG_ONLY(__ set(0xDEADC0DE, tmp);) // we have killed tmp
  87   }
  88 }
  89 
  90 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
  91                                                              Register addr, Register count, Register tmp) {
  92   // Get some new fresh output registers.
  93   __ save_frame(0);
  94   __ mov(addr->after_save(), O0);
  95   __ call(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry));
  96   __ delayed()->mov(count->after_save(), O1);
  97   __ restore();
  98 }
  99 
 100 #undef __
 101 
 102 static address satb_log_enqueue_with_frame = NULL;
 103 static u_char* satb_log_enqueue_with_frame_end = NULL;
 104 
 105 static address satb_log_enqueue_frameless = NULL;
 106 static u_char* satb_log_enqueue_frameless_end = NULL;
 107 
 108 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
 109 
 110 static void generate_satb_log_enqueue(bool with_frame) {
 111   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
 112   CodeBuffer buf(bb);
 113   MacroAssembler masm(&buf);
 114 
 115 #define __ masm.
 116 
 117   address start = __ pc();
 118   Register pre_val;
 119 
 120   Label refill, restart;
 121   if (with_frame) {
 122     __ save_frame(0);
 123     pre_val = I0;  // Was O0 before the save.
 124   } else {
 125     pre_val = O0;
 126   }
 127 
 128   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
 129   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
 130 
 131   assert(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t) &&
 132          in_bytes(SATBMarkQueue::byte_width_of_buf()) == sizeof(intptr_t),
 133          "check sizes in assembly below");
 134 
 135   __ bind(restart);
 136 
 137   // Load the index into the SATB buffer. SATBMarkQueue::_index is a size_t
 138   // so ld_ptr is appropriate.
 139   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
 140 
 141   // index == 0?
 142   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
 143 
 144   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
 145   __ sub(L0, oopSize, L0);
 146 
 147   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
 148   if (!with_frame) {
 149     // Use return-from-leaf
 150     __ retl();
 151     __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
 152   } else {
 153     // Not delayed.
 154     __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
 155   }
 156   if (with_frame) {
 157     __ ret();
 158     __ delayed()->restore();
 159   }
 160   __ bind(refill);
 161 
 162   address handle_zero =
 163     CAST_FROM_FN_PTR(address,
 164                      &G1SATBMarkQueueSet::handle_zero_index_for_thread);
 165   // This should be rare enough that we can afford to save all the
 166   // scratch registers that the calling context might be using.
 167   __ mov(G1_scratch, L0);
 168   __ mov(G3_scratch, L1);
 169   __ mov(G4, L2);
 170   // We need the value of O0 above (for the write into the buffer), so we
 171   // save and restore it.
 172   __ mov(O0, L3);
 173   // Since the call will overwrite O7, we save and restore that, as well.
 174   __ mov(O7, L4);
 175   __ call_VM_leaf(L5, handle_zero, G2_thread);
 176   __ mov(L0, G1_scratch);
 177   __ mov(L1, G3_scratch);
 178   __ mov(L2, G4);
 179   __ mov(L3, O0);
 180   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 181   __ delayed()->mov(L4, O7);
 182 
 183   if (with_frame) {
 184     satb_log_enqueue_with_frame = start;
 185     satb_log_enqueue_with_frame_end = __ pc();
 186   } else {
 187     satb_log_enqueue_frameless = start;
 188     satb_log_enqueue_frameless_end = __ pc();
 189   }
 190 
 191 #undef __
 192 }
 193 
 194 #define __ masm->
 195 
 196 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
 197                                                  Register obj,
 198                                                  Register index,
 199                                                  int offset,
 200                                                  Register pre_val,
 201                                                  Register tmp,
 202                                                  bool preserve_o_regs) {
 203   Label filtered;
 204 
 205   if (obj == noreg) {
 206     // We are not loading the previous value so make
 207     // sure that we don't trash the value in pre_val
 208     // with the code below.
 209     assert_different_registers(pre_val, tmp);
 210   } else {
 211     // We will be loading the previous value
 212     // in this code so...
 213     assert(offset == 0 || index == noreg, "choose one");
 214     assert(pre_val == noreg, "check this code");
 215   }
 216 
 217   // Is marking active?
 218   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 219     __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
 220   } else {
 221     guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 222     __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp);
 223   }
 224 
 225   // Is marking active?
 226   __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 227 
 228   // Do we need to load the previous value?
 229   if (obj != noreg) {
 230     // Load the previous value...
 231     if (index == noreg) {
 232       if (Assembler::is_simm13(offset)) {
 233         __ load_heap_oop(obj, offset, tmp);
 234       } else {
 235         __ set(offset, tmp);
 236         __ load_heap_oop(obj, tmp, tmp);
 237       }
 238     } else {
 239       __ load_heap_oop(obj, index, tmp);
 240     }
 241     // Previous value has been loaded into tmp
 242     pre_val = tmp;
 243   }
 244 
 245   assert(pre_val != noreg, "must have a real register");
 246 
 247   // Is the previous value null?
 248   __ cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
 249 
 250   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
 251   // case, pre_val will be a scratch G-reg, but there are some cases in
 252   // which it's an O-reg.  In the first case, do a normal call.  In the
 253   // latter, do a save here and call the frameless version.
 254 
 255   guarantee(pre_val->is_global() || pre_val->is_out(),
 256             "Or we need to think harder.");
 257 
 258   if (pre_val->is_global() && !preserve_o_regs) {
 259     __ call(satb_log_enqueue_with_frame);
 260     __ delayed()->mov(pre_val, O0);
 261   } else {
 262     __ save_frame(0);
 263     __ call(satb_log_enqueue_frameless);
 264     __ delayed()->mov(pre_val->after_save(), O0);
 265     __ restore();
 266   }
 267 
 268   __ bind(filtered);
 269 }
 270 
 271 #undef __
 272 
 273 static address dirty_card_log_enqueue = 0;
 274 static u_char* dirty_card_log_enqueue_end = 0;
 275 
 276 // This gets to assume that o0 contains the object address.
 277 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
 278   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
 279   CodeBuffer buf(bb);
 280   MacroAssembler masm(&buf);
 281 #define __ masm.
 282   address start = __ pc();
 283 
 284   Label not_already_dirty, restart, refill, young_card;
 285 
 286   __ srlx(O0, CardTable::card_shift, O0);
 287   AddressLiteral addrlit(byte_map_base);
 288   __ set(addrlit, O1); // O1 := <card table base>
 289   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
 290 
 291   __ cmp_and_br_short(O2, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
 292 
 293   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
 294   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
 295 
 296   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
 297   __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 298 
 299   __ bind(young_card);
 300   // We didn't take the branch, so we're already dirty: return.
 301   // Use return-from-leaf
 302   __ retl();
 303   __ delayed()->nop();
 304 
 305   // Not dirty.
 306   __ bind(not_already_dirty);
 307 
 308   // Get O0 + O1 into a reg by itself
 309   __ add(O0, O1, O3);
 310 
 311   // First, dirty it.
 312   __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
 313 
 314   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
 315   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
 316   __ bind(restart);
 317 
 318   // Load the index into the update buffer. DirtyCardQueue::_index is
 319   // a size_t so ld_ptr is appropriate here.
 320   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
 321 
 322   // index == 0?
 323   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
 324 
 325   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
 326   __ sub(L0, oopSize, L0);
 327 
 328   __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
 329   // Use return-from-leaf
 330   __ retl();
 331   __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
 332 
 333   __ bind(refill);
 334   address handle_zero =
 335     CAST_FROM_FN_PTR(address,
 336                      &DirtyCardQueueSet::handle_zero_index_for_thread);
 337   // This should be rare enough that we can afford to save all the
 338   // scratch registers that the calling context might be using.
 339   __ mov(G1_scratch, L3);
 340   __ mov(G3_scratch, L5);
 341   // We need the value of O3 above (for the write into the buffer), so we
 342   // save and restore it.
 343   __ mov(O3, L6);
 344   // Since the call will overwrite O7, we save and restore that, as well.
 345   __ mov(O7, L4);
 346 
 347   __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
 348   __ mov(L3, G1_scratch);
 349   __ mov(L5, G3_scratch);
 350   __ mov(L6, O3);
 351   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 352   __ delayed()->mov(L4, O7);
 353 
 354   dirty_card_log_enqueue = start;
 355   dirty_card_log_enqueue_end = __ pc();
 356   // XXX Should have a guarantee here about not going off the end!
 357   // Does it already do so?  Do an experiment...
 358 
 359 #undef __
 360 
 361 }
 362 
 363 #define __ masm->
 364 
 365 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) {
 366   Label filtered;
 367   MacroAssembler* post_filter_masm = masm;
 368 
 369   if (new_val == G0) return;
 370 
 371   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
 372 
 373   __ xor3(store_addr, new_val, tmp);
 374   __ srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
 375 
 376   __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
 377 
 378   // If the "store_addr" register is an "in" or "local" register, move it to
 379   // a scratch reg so we can pass it as an argument.
 380   bool use_scr = !(store_addr->is_global() || store_addr->is_out());
 381   // Pick a scratch register different from "tmp".
 382   Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
 383   // Make sure we use up the delay slot!
 384   if (use_scr) {
 385     post_filter_masm->mov(store_addr, scr);
 386   } else {
 387     post_filter_masm->nop();
 388   }
 389   __ save_frame(0);
 390   __ call(dirty_card_log_enqueue);
 391   if (use_scr) {
 392     __ delayed()->mov(scr, O0);
 393   } else {
 394     __ delayed()->mov(store_addr->after_save(), O0);
 395   }
 396   __ restore();
 397 
 398   __ bind(filtered);
 399 }
 400 
 401 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 402                                          Register val, Address dst, Register tmp) {
 403   bool in_heap = (decorators & IN_HEAP) != 0;
 404   bool as_normal = (decorators & AS_NORMAL) != 0;
 405   assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
 406 
 407   bool needs_pre_barrier = as_normal;
 408   // No need for post barrier if storing NULL
 409   bool needs_post_barrier = val != G0 && in_heap;
 410 
 411   bool is_array = (decorators & IS_ARRAY) != 0;
 412   bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
 413   bool precise = is_array || on_anonymous;
 414 
 415   Register index = dst.has_index() ? dst.index() : noreg;
 416   int disp = dst.has_disp() ? dst.disp() : 0;
 417 
 418   if (needs_pre_barrier) {
 419     // Load and record the previous value.
 420     g1_write_barrier_pre(masm, dst.base(), index, disp,
 421                          noreg /* pre_val */,
 422                          tmp, true /*preserve_o_regs*/);
 423   }
 424 
 425   Register new_val = val;
 426   if (needs_post_barrier) {
 427     // G1 barrier needs uncompressed oop for region cross check.
 428     if (UseCompressedOops && val != G0) {
 429       new_val = tmp;
 430       __ mov(val, new_val);
 431     }
 432   }
 433 
 434   BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp);
 435 
 436   if (needs_post_barrier) {
 437     Register base = dst.base();
 438     if (precise) {
 439       if (!dst.has_index()) {
 440         __ add(base, disp, base);
 441       } else {
 442         assert(!dst.has_disp(), "not supported yet");
 443         __ add(base, index, base);
 444       }
 445     }
 446     g1_write_barrier_post(masm, base, new_val, tmp);
 447   }
 448 }
 449 
 450 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 451                                     Address src, Register dst, Register tmp) {
 452   bool on_oop = type == T_OBJECT || type == T_ARRAY;
 453   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
 454   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
 455   bool on_reference = on_weak || on_phantom;
 456   // Load the value of the referent field.
 457   ModRefBarrierSetAssembler::load_at(masm, decorators, type, src, dst, tmp);
 458   if (on_oop && on_reference) {
 459     // Generate the G1 pre-barrier code to log the value of
 460     // the referent field in an SATB buffer. Note with
 461     // these parameters the pre-barrier does not generate
 462     // the load of the previous value
 463 
 464     Register pre_val = dst;
 465     bool saved = false;
 466     if (pre_val->is_in()) {
 467       // The g1_write_barrier_pre method assumes that the pre_val
 468       // is not in an input register.
 469       __ save_frame_and_mov(0, pre_val, O0);
 470       pre_val = O0;
 471       saved = true;
 472     }
 473 
 474     g1_write_barrier_pre(masm, noreg /* obj */, noreg /* index */, 0 /* offset */,
 475                          pre_val /* pre_val */,
 476                          tmp /* tmp */,
 477                          true /* preserve_o_regs */);
 478 
 479     if (saved) {
 480       __ restore();
 481     }
 482   }
 483 }
 484 
 485 void G1BarrierSetAssembler::barrier_stubs_init() {
 486   if (dirty_card_log_enqueue == 0) {
 487     G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
 488     CardTable *ct = bs->card_table();
 489     generate_dirty_card_log_enqueue(ct->byte_map_base());
 490     assert(dirty_card_log_enqueue != 0, "postcondition.");
 491   }
 492   if (satb_log_enqueue_with_frame == 0) {
 493     generate_satb_log_enqueue(true);
 494     assert(satb_log_enqueue_with_frame != 0, "postcondition.");
 495   }
 496   if (satb_log_enqueue_frameless == 0) {
 497     generate_satb_log_enqueue(false);
 498     assert(satb_log_enqueue_frameless != 0, "postcondition.");
 499   }
 500 }
 501 
 502 #ifdef COMPILER1
 503 
 504 #undef __
 505 #define __ ce->masm()->
 506 
 507 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
 508   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 509   // At this point we know that marking is in progress.
 510   // If do_load() is true then we have to emit the
 511   // load of the previous value; otherwise it has already
 512   // been loaded into _pre_val.
 513 
 514   __ bind(*stub->entry());
 515 
 516   assert(stub->pre_val()->is_register(), "Precondition.");
 517   Register pre_val_reg = stub->pre_val()->as_register();
 518 
 519   if (stub->do_load()) {
 520     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
 521   }
 522 
 523   if (__ is_in_wdisp16_range(*stub->continuation())) {
 524     __ br_null(pre_val_reg, /*annul*/false, Assembler::pt, *stub->continuation());
 525   } else {
 526     __ cmp(pre_val_reg, G0);
 527     __ brx(Assembler::equal, false, Assembler::pn, *stub->continuation());
 528   }
 529   __ delayed()->nop();
 530 
 531   __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin());
 532   __ delayed()->mov(pre_val_reg, G4);
 533   __ br(Assembler::always, false, Assembler::pt, *stub->continuation());
 534   __ delayed()->nop();
 535 }
 536 
 537 void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
 538   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 539   __ bind(*stub->entry());
 540 
 541   assert(stub->addr()->is_register(), "Precondition.");
 542   assert(stub->new_val()->is_register(), "Precondition.");
 543   Register addr_reg = stub->addr()->as_pointer_register();
 544   Register new_val_reg = stub->new_val()->as_register();
 545 
 546   if (__ is_in_wdisp16_range(*stub->continuation())) {
 547     __ br_null(new_val_reg, /*annul*/false, Assembler::pt, *stub->continuation());
 548   } else {
 549     __ cmp(new_val_reg, G0);
 550     __ brx(Assembler::equal, false, Assembler::pn, *stub->continuation());
 551   }
 552   __ delayed()->nop();
 553 
 554   __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin());
 555   __ delayed()->mov(addr_reg, G4);
 556   __ br(Assembler::always, false, Assembler::pt, *stub->continuation());
 557   __ delayed()->nop();
 558 }
 559 
 560 #undef __
 561 #define __ sasm->
 562 
 563 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 564   __ prologue("g1_pre_barrier", false);
 565 
 566   // G4: previous value of memory
 567 
 568   Register pre_val = G4;
 569   Register tmp  = G1_scratch;
 570   Register tmp2 = G3_scratch;
 571 
 572   Label refill, restart;
 573   int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
 574   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
 575   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
 576 
 577   // Is marking still active?
 578   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 579     __ ld(G2_thread, satb_q_active_byte_offset, tmp);
 580   } else {
 581     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 582     __ ldsb(G2_thread, satb_q_active_byte_offset, tmp);
 583   }
 584   __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, restart);
 585   __ retl();
 586   __ delayed()->nop();
 587 
 588   __ bind(restart);
 589   // Load the index into the SATB buffer. SATBMarkQueue::_index is a
 590   // size_t so ld_ptr is appropriate
 591   __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
 592 
 593   // index == 0?
 594   __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill);
 595 
 596   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
 597   __ sub(tmp, oopSize, tmp);
 598 
 599   __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
 600   // Use return-from-leaf
 601   __ retl();
 602   __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);
 603 
 604   __ bind(refill);
 605 
 606   __ save_live_registers_no_oop_map(true);
 607 
 608   __ call_VM_leaf(L7_thread_cache,
 609                   CAST_FROM_FN_PTR(address,
 610                                    G1SATBMarkQueueSet::handle_zero_index_for_thread),
 611                   G2_thread);
 612 
 613   __ restore_live_registers(true);
 614 
 615   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 616   __ epilogue();
 617 }
 618 
 619 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
 620   __ prologue("g1_post_barrier", false);
 621 
 622   G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
 623 
 624   Register addr = G4;
 625   Register cardtable = G5;
 626   Register tmp  = G1_scratch;
 627   Register tmp2 = G3_scratch;
 628   jbyte* byte_map_base = bs->card_table()->byte_map_base();
 629 
 630   Label not_already_dirty, restart, refill, young_card;
 631 
 632 #ifdef _LP64
 633   __ srlx(addr, CardTable::card_shift, addr);
 634 #else
 635   __ srl(addr, CardTable::card_shift, addr);
 636 #endif
 637 
 638   AddressLiteral rs((address)byte_map_base);
 639   __ set(rs, cardtable);         // cardtable := <card table base>
 640   __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
 641 
 642   __ cmp_and_br_short(tmp, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
 643 
 644   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
 645   __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
 646 
 647   assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code");
 648   __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 649 
 650   __ bind(young_card);
 651   // We didn't take the branch, so we're already dirty: return.
 652   // Use return-from-leaf
 653   __ retl();
 654   __ delayed()->nop();
 655 
 656   // Not dirty.
 657   __ bind(not_already_dirty);
 658 
 659   // Get cardtable + tmp into a reg by itself
 660   __ add(addr, cardtable, tmp2);
 661 
 662   // First, dirty it.
 663   __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).
 664 
 665   Register tmp3 = cardtable;
 666   Register tmp4 = tmp;
 667 
 668   // these registers are now dead
 669   addr = cardtable = tmp = noreg;
 670 
 671   int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
 672   int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
 673 
 674   __ bind(restart);
 675 
 676   // Get the index into the update buffer. DirtyCardQueue::_index is
 677   // a size_t so ld_ptr is appropriate here.
 678   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
 679 
 680   // index == 0?
 681   __ cmp_and_brx_short(tmp3, G0, Assembler::equal,  Assembler::pn, refill);
 682 
 683   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
 684   __ sub(tmp3, oopSize, tmp3);
 685 
 686   __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
 687   // Use return-from-leaf
 688   __ retl();
 689   __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);
 690 
 691   __ bind(refill);
 692 
 693   __ save_live_registers_no_oop_map(true);
 694 
 695   __ call_VM_leaf(L7_thread_cache,
 696                   CAST_FROM_FN_PTR(address,
 697                                    DirtyCardQueueSet::handle_zero_index_for_thread),
 698                   G2_thread);
 699 
 700   __ restore_live_registers(true);
 701 
 702   __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
 703   __ epilogue();
 704 }
 705 
 706 #undef __
 707 
 708 #endif // COMPILER1