1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "gc/g1/g1BarrierSet.hpp"
  28 #include "gc/g1/g1BarrierSetAssembler.hpp"
  29 #include "gc/g1/g1ThreadLocalData.hpp"
  30 #include "gc/g1/g1CardTable.hpp"
  31 #include "gc/g1/g1ThreadLocalData.hpp"
  32 #include "gc/g1/heapRegion.hpp"
  33 #include "interpreter/interp_masm.hpp"
  34 #include "runtime/sharedRuntime.hpp"
  35 #include "runtime/thread.hpp"
  36 #include "utilities/macros.hpp"
  37 #ifdef COMPILER1
  38 #include "c1/c1_LIRAssembler.hpp"
  39 #include "c1/c1_MacroAssembler.hpp"
  40 #include "gc/g1/c1/g1BarrierSetC1.hpp"
  41 #endif
  42 
  43 #define __ masm->
  44 
  45 #ifdef PRODUCT
  46 #define BLOCK_COMMENT(str) /* nothing */
  47 #else
  48 #define BLOCK_COMMENT(str) __ block_comment(str)
  49 #endif
  50 
  51 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  52 
  53 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
  54                                                             Register addr, Register count, int callee_saved_regs) {
  55   bool dest_uninitialized = (decorators & AS_DEST_NOT_INITIALIZED) != 0;
  56   if (!dest_uninitialized) {
  57     assert( addr->encoding() < callee_saved_regs, "addr must be saved");
  58     assert(count->encoding() < callee_saved_regs, "count must be saved");
  59 
  60     BLOCK_COMMENT("PreBarrier");
  61 
  62 #ifdef AARCH64
  63     callee_saved_regs = align_up(callee_saved_regs, 2);
  64     for (int i = 0; i < callee_saved_regs; i += 2) {
  65       __ raw_push(as_Register(i), as_Register(i+1));
  66     }
  67 #else
  68     RegisterSet saved_regs = RegisterSet(R0, as_Register(callee_saved_regs-1));
  69     __ push(saved_regs | R9ifScratched);
  70 #endif // AARCH64
  71 
  72     if (addr != R0) {
  73       assert_different_registers(count, R0);
  74       __ mov(R0, addr);
  75     }
  76 #ifdef AARCH64
  77     __ zero_extend(R1, count, 32); // G1BarrierSet::write_ref_array_pre_*_entry takes size_t
  78 #else
  79     if (count != R1) {
  80       __ mov(R1, count);
  81     }
  82 #endif // AARCH64
  83 
  84     if (UseCompressedOops) {
  85       __ call(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_narrow_oop_entry));
  86     } else {
  87       __ call(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry));
  88     }
  89 
  90 #ifdef AARCH64
  91     for (int i = callee_saved_regs - 2; i >= 0; i -= 2) {
  92       __ raw_pop(as_Register(i), as_Register(i+1));
  93     }
  94 #else
  95     __ pop(saved_regs | R9ifScratched);
  96 #endif // AARCH64
  97   }
  98 }
  99 
 100 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 101                                                              Register addr, Register count, Register tmp) {
 102 
 103   BLOCK_COMMENT("G1PostBarrier");
 104   if (addr != R0) {
 105     assert_different_registers(count, R0);
 106     __ mov(R0, addr);
 107   }
 108 #ifdef AARCH64
 109   __ zero_extend(R1, count, 32); // G1BarrierSet::write_ref_array_post_entry takes size_t
 110 #else
 111   if (count != R1) {
 112     __ mov(R1, count);
 113   }
 114 #if R9_IS_SCRATCHED
 115   // Safer to save R9 here since callers may have been written
 116   // assuming R9 survives. This is suboptimal but is not in
 117   // general worth optimizing for the few platforms where R9
 118   // is scratched. Note that the optimization might not be to
 119   // difficult for this particular call site.
 120   __ push(R9);
 121 #endif // !R9_IS_SCRATCHED
 122 #endif // !AARCH64
 123   __ call(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry));
 124 #ifndef AARCH64
 125 #if R9_IS_SCRATCHED
 126   __ pop(R9);
 127 #endif // !R9_IS_SCRATCHED
 128 #endif // !AARCH64
 129 }
 130 
 131 // G1 pre-barrier.
 132 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
 133 // If store_addr != noreg, then previous value is loaded from [store_addr];
 134 // in such case store_addr and new_val registers are preserved;
 135 // otherwise pre_val register is preserved.
 136 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
 137                                           Register store_addr,
 138                                           Register new_val,
 139                                           Register pre_val,
 140                                           Register tmp1,
 141                                           Register tmp2) {
 142   Label done;
 143   Label runtime;
 144 
 145   if (store_addr != noreg) {
 146     assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg);
 147   } else {
 148     assert (new_val == noreg, "should be");
 149     assert_different_registers(pre_val, tmp1, tmp2, noreg);
 150   }
 151 
 152   Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
 153   Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
 154   Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
 155 
 156   // Is marking active?
 157   assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code");
 158   __ ldrb(tmp1, in_progress);
 159   __ cbz(tmp1, done);
 160 
 161   // Do we need to load the previous value?
 162   if (store_addr != noreg) {
 163     __ load_heap_oop(pre_val, Address(store_addr, 0));
 164   }
 165 
 166   // Is the previous value null?
 167   __ cbz(pre_val, done);
 168 
 169   // Can we store original value in the thread's buffer?
 170   // Is index == 0?
 171   // (The index field is typed as size_t.)
 172 
 173   __ ldr(tmp1, index);           // tmp1 := *index_adr
 174   __ ldr(tmp2, buffer);
 175 
 176   __ subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize
 177   __ b(runtime, lt);             // If negative, goto runtime
 178 
 179   __ str(tmp1, index);           // *index_adr := tmp1
 180 
 181   // Record the previous value
 182   __ str(pre_val, Address(tmp2, tmp1));
 183   __ b(done);
 184 
 185   __ bind(runtime);
 186 
 187   // save the live input values
 188 #ifdef AARCH64
 189   if (store_addr != noreg) {
 190     __ raw_push(store_addr, new_val);
 191   } else {
 192     __ raw_push(pre_val, ZR);
 193   }
 194 #else
 195   if (store_addr != noreg) {
 196     // avoid raw_push to support any ordering of store_addr and new_val
 197     __ push(RegisterSet(store_addr) | RegisterSet(new_val));
 198   } else {
 199     __ push(pre_val);
 200   }
 201 #endif // AARCH64
 202 
 203   if (pre_val != R0) {
 204     __ mov(R0, pre_val);
 205   }
 206   __ mov(R1, Rthread);
 207 
 208   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1);
 209 
 210 #ifdef AARCH64
 211   if (store_addr != noreg) {
 212     __ raw_pop(store_addr, new_val);
 213   } else {
 214     __ raw_pop(pre_val, ZR);
 215   }
 216 #else
 217   if (store_addr != noreg) {
 218     __ pop(RegisterSet(store_addr) | RegisterSet(new_val));
 219   } else {
 220     __ pop(pre_val);
 221   }
 222 #endif // AARCH64
 223 
 224   __ bind(done);
 225 }
 226 
 227 // G1 post-barrier.
 228 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
 229 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
 230                                            Register store_addr,
 231                                            Register new_val,
 232                                            Register tmp1,
 233                                            Register tmp2,
 234                                            Register tmp3) {
 235 
 236   Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
 237   Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
 238 
 239   BarrierSet* bs = BarrierSet::barrier_set();
 240   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
 241   CardTable* ct = ctbs->card_table();
 242   Label done;
 243   Label runtime;
 244 
 245   // Does store cross heap regions?
 246 
 247   __ eor(tmp1, store_addr, new_val);
 248 #ifdef AARCH64
 249   __ logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
 250   __ cbz(tmp1, done);
 251 #else
 252   __ movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes));
 253   __ b(done, eq);
 254 #endif
 255 
 256   // crosses regions, storing NULL?
 257 
 258   __ cbz(new_val, done);
 259 
 260   // storing region crossing non-NULL, is card already dirty?
 261   const Register card_addr = tmp1;
 262   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
 263 
 264   __ mov_address(tmp2, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference);
 265   __ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift));
 266 
 267   __ ldrb(tmp2, Address(card_addr));
 268   __ cmp(tmp2, (int)G1CardTable::g1_young_card_val());
 269   __ b(done, eq);
 270 
 271   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
 272 
 273   assert(CardTable::dirty_card_val() == 0, "adjust this code");
 274   __ ldrb(tmp2, Address(card_addr));
 275   __ cbz(tmp2, done);
 276 
 277   // storing a region crossing, non-NULL oop, card is clean.
 278   // dirty card and log.
 279 
 280   __ strb(__ zero_register(tmp2), Address(card_addr));
 281 
 282   __ ldr(tmp2, queue_index);
 283   __ ldr(tmp3, buffer);
 284 
 285   __ subs(tmp2, tmp2, wordSize);
 286   __ b(runtime, lt); // go to runtime if now negative
 287 
 288   __ str(tmp2, queue_index);
 289 
 290   __ str(card_addr, Address(tmp3, tmp2));
 291   __ b(done);
 292 
 293   __ bind(runtime);
 294 
 295   if (card_addr != R0) {
 296     __ mov(R0, card_addr);
 297   }
 298   __ mov(R1, Rthread);
 299   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1);
 300 
 301   __ bind(done);
 302 }
 303 
 304 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 305                                     Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) {
 306   bool on_oop = type == T_OBJECT || type == T_ARRAY;
 307   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
 308   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
 309   bool on_reference = on_weak || on_phantom;
 310 
 311   ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2, tmp3);
 312   if (on_oop && on_reference) {
 313     // Generate the G1 pre-barrier code to log the value of
 314     // the referent field in an SATB buffer.
 315     g1_write_barrier_pre(masm, noreg, noreg, dst, tmp1, tmp2);
 316   }
 317 }
 318 
 319 
 320 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 321                                          Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
 322   bool in_heap = (decorators & IN_HEAP) != 0;
 323   bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
 324 
 325   bool needs_pre_barrier = in_heap || in_concurrent_root;
 326   bool needs_post_barrier = (new_val != noreg) && in_heap;
 327 
 328   // flatten object address if needed
 329   assert (obj.mode() == basic_offset, "pre- or post-indexing is not supported here");
 330 
 331   const Register store_addr = obj.base();
 332   if (obj.index() != noreg) {
 333     assert (obj.disp() == 0, "index or displacement, not both");
 334 #ifdef AARCH64
 335     __ add(store_addr, obj.base(), obj.index(), obj.extend(), obj.shift_imm());
 336 #else
 337     assert(obj.offset_op() == add_offset, "addition is expected");
 338     __ add(store_addr, obj.base(), AsmOperand(obj.index(), obj.shift(), obj.shift_imm()));
 339 #endif // AARCH64
 340   } else if (obj.disp() != 0) {
 341     __ add(store_addr, obj.base(), obj.disp());
 342   }
 343 
 344   if (needs_pre_barrier) {
 345     g1_write_barrier_pre(masm, store_addr, new_val, tmp1, tmp2, tmp3);
 346   }
 347 
 348   if (is_null) {
 349     BarrierSetAssembler::store_at(masm, decorators, type, Address(store_addr), new_val, tmp1, tmp2, tmp3, true);
 350   } else {
 351     // G1 barrier needs uncompressed oop for region cross check.
 352     Register val_to_store = new_val;
 353     if (UseCompressedOops) {
 354       val_to_store = tmp1;
 355       __ mov(val_to_store, new_val);
 356     }
 357     BarrierSetAssembler::store_at(masm, decorators, type, Address(store_addr), val_to_store, tmp1, tmp2, tmp3, false);
 358     if (needs_post_barrier) {
 359       g1_write_barrier_post(masm, store_addr, new_val, tmp1, tmp2, tmp3);
 360     }
 361   }
 362 };
 363 
 364 #ifdef COMPILER1
 365 
 366 #undef __
 367 #define __ ce->masm()->
 368 
 369 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
 370   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 371   // At this point we know that marking is in progress.
 372   // If do_load() is true then we have to emit the
 373   // load of the previous value; otherwise it has already
 374   // been loaded into _pre_val.
 375 
 376   __ bind(*stub->entry());
 377   assert(stub->pre_val()->is_register(), "Precondition.");
 378 
 379   Register pre_val_reg = stub->pre_val()->as_register();
 380 
 381   if (stub->do_load()) {
 382     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
 383   }
 384 
 385   __ cbz(pre_val_reg, *stub->continuation());
 386   ce->verify_reserved_argument_area_size(1);
 387   __ str(pre_val_reg, Address(SP));
 388   __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
 389 
 390   __ b(*stub->continuation());
 391 }
 392 
 393 void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
 394   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 395   __ bind(*stub->entry());
 396   assert(stub->addr()->is_register(), "Precondition.");
 397   assert(stub->new_val()->is_register(), "Precondition.");
 398   Register new_val_reg = stub->new_val()->as_register();
 399   __ cbz(new_val_reg, *stub->continuation());
 400   ce->verify_reserved_argument_area_size(1);
 401   __ str(stub->addr()->as_pointer_register(), Address(SP));
 402   __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
 403   __ b(*stub->continuation());
 404 }
 405 
 406 #undef __
 407 #define __ sasm->
 408 
 409 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
 410   // Input:
 411   // - pre_val pushed on the stack
 412 
 413   __ set_info("g1_pre_barrier_slow_id", false);
 414 
 415   // save at least the registers that need saving if the runtime is called
 416 #ifdef AARCH64
 417   __ raw_push(R0, R1);
 418   __ raw_push(R2, R3);
 419   const int nb_saved_regs = 4;
 420 #else // AARCH64
 421   const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR);
 422   const int nb_saved_regs = 6;
 423   assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs");
 424   __ push(saved_regs);
 425 #endif // AARCH64
 426 
 427   const Register r_pre_val_0  = R0; // must be R0, to be ready for the runtime call
 428   const Register r_index_1    = R1;
 429   const Register r_buffer_2   = R2;
 430 
 431   Address queue_active(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
 432   Address queue_index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
 433   Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
 434 
 435   Label done;
 436   Label runtime;
 437 
 438   // Is marking still active?
 439   assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 440   __ ldrb(R1, queue_active);
 441   __ cbz(R1, done);
 442 
 443   __ ldr(r_index_1, queue_index);
 444   __ ldr(r_pre_val_0, Address(SP, nb_saved_regs*wordSize));
 445   __ ldr(r_buffer_2, buffer);
 446 
 447   __ subs(r_index_1, r_index_1, wordSize);
 448   __ b(runtime, lt);
 449 
 450   __ str(r_index_1, queue_index);
 451   __ str(r_pre_val_0, Address(r_buffer_2, r_index_1));
 452 
 453   __ bind(done);
 454 
 455 #ifdef AARCH64
 456   __ raw_pop(R2, R3);
 457   __ raw_pop(R0, R1);
 458 #else // AARCH64
 459   __ pop(saved_regs);
 460 #endif // AARCH64
 461 
 462   __ ret();
 463 
 464   __ bind(runtime);
 465 
 466   __ save_live_registers();
 467 
 468   assert(r_pre_val_0 == c_rarg0, "pre_val should be in R0");
 469   __ mov(c_rarg1, Rthread);
 470   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, c_rarg1);
 471 
 472   __ restore_live_registers_without_return();
 473 
 474   __ b(done);
 475 }
 476 
 477 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
 478   // Input:
 479   // - store_addr, pushed on the stack
 480 
 481   __ set_info("g1_post_barrier_slow_id", false);
 482 
 483   Label done;
 484   Label recheck;
 485   Label runtime;
 486 
 487   Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
 488   Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
 489 
 490   AddressLiteral cardtable(ci_card_table_address_as<address>(), relocInfo::none);
 491 
 492   // save at least the registers that need saving if the runtime is called
 493 #ifdef AARCH64
 494   __ raw_push(R0, R1);
 495   __ raw_push(R2, R3);
 496   const int nb_saved_regs = 4;
 497 #else // AARCH64
 498   const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR);
 499   const int nb_saved_regs = 6;
 500   assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs");
 501   __ push(saved_regs);
 502 #endif // AARCH64
 503 
 504   const Register r_card_addr_0 = R0; // must be R0 for the slow case
 505   const Register r_obj_0 = R0;
 506   const Register r_card_base_1 = R1;
 507   const Register r_tmp2 = R2;
 508   const Register r_index_2 = R2;
 509   const Register r_buffer_3 = R3;
 510   const Register tmp1 = Rtemp;
 511 
 512   __ ldr(r_obj_0, Address(SP, nb_saved_regs*wordSize));
 513   // Note: there is a comment in x86 code about not using
 514   // ExternalAddress / lea, due to relocation not working
 515   // properly for that address. Should be OK for arm, where we
 516   // explicitly specify that 'cardtable' has a relocInfo::none
 517   // type.
 518   __ lea(r_card_base_1, cardtable);
 519   __ add(r_card_addr_0, r_card_base_1, AsmOperand(r_obj_0, lsr, CardTable::card_shift));
 520 
 521   // first quick check without barrier
 522   __ ldrb(r_tmp2, Address(r_card_addr_0));
 523 
 524   __ cmp(r_tmp2, (int)G1CardTable::g1_young_card_val());
 525   __ b(recheck, ne);
 526 
 527   __ bind(done);
 528 
 529 #ifdef AARCH64
 530   __ raw_pop(R2, R3);
 531   __ raw_pop(R0, R1);
 532 #else // AARCH64
 533   __ pop(saved_regs);
 534 #endif // AARCH64
 535 
 536   __ ret();
 537 
 538   __ bind(recheck);
 539 
 540   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp1);
 541 
 542   // reload card state after the barrier that ensures the stored oop was visible
 543   __ ldrb(r_tmp2, Address(r_card_addr_0));
 544 
 545   assert(CardTable::dirty_card_val() == 0, "adjust this code");
 546   __ cbz(r_tmp2, done);
 547 
 548   // storing region crossing non-NULL, card is clean.
 549   // dirty card and log.
 550 
 551   assert(0 == (int)CardTable::dirty_card_val(), "adjust this code");
 552   if ((ci_card_table_address_as<intptr_t>() & 0xff) == 0) {
 553     // Card table is aligned so the lowest byte of the table address base is zero.
 554     __ strb(r_card_base_1, Address(r_card_addr_0));
 555   } else {
 556     __ strb(__ zero_register(r_tmp2), Address(r_card_addr_0));
 557   }
 558 
 559   __ ldr(r_index_2, queue_index);
 560   __ ldr(r_buffer_3, buffer);
 561 
 562   __ subs(r_index_2, r_index_2, wordSize);
 563   __ b(runtime, lt); // go to runtime if now negative
 564 
 565   __ str(r_index_2, queue_index);
 566 
 567   __ str(r_card_addr_0, Address(r_buffer_3, r_index_2));
 568 
 569   __ b(done);
 570 
 571   __ bind(runtime);
 572 
 573   __ save_live_registers();
 574 
 575   assert(r_card_addr_0 == c_rarg0, "card_addr should be in R0");
 576   __ mov(c_rarg1, Rthread);
 577   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), c_rarg0, c_rarg1);
 578 
 579   __ restore_live_registers_without_return();
 580 
 581   __ b(done);
 582 }
 583 
 584 #undef __
 585 
 586 #endif // COMPILER1