1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.inline.hpp" 27 #include "gc/g1/g1BarrierSet.hpp" 28 #include "gc/g1/g1BarrierSetAssembler.hpp" 29 #include "gc/g1/g1ThreadLocalData.hpp" 30 #include "gc/g1/g1CardTable.hpp" 31 #include "gc/g1/g1ThreadLocalData.hpp" 32 #include "gc/g1/heapRegion.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/thread.hpp" 36 #include "utilities/macros.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/g1/c1/g1BarrierSetC1.hpp" 41 #endif 42 43 #define __ masm-> 44 45 #ifdef PRODUCT 46 #define BLOCK_COMMENT(str) /* nothing */ 47 #else 48 #define BLOCK_COMMENT(str) __ block_comment(str) 49 #endif 50 51 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 52 53 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, 54 Register addr, Register count, int callee_saved_regs) { 55 bool dest_uninitialized = (decorators & AS_DEST_NOT_INITIALIZED) != 0; 56 if (!dest_uninitialized) { 57 assert( addr->encoding() < callee_saved_regs, "addr must be saved"); 58 assert(count->encoding() < callee_saved_regs, "count must be saved"); 59 60 BLOCK_COMMENT("PreBarrier"); 61 62 #ifdef AARCH64 63 callee_saved_regs = align_up(callee_saved_regs, 2); 64 for (int i = 0; i < callee_saved_regs; i += 2) { 65 __ raw_push(as_Register(i), as_Register(i+1)); 66 } 67 #else 68 RegisterSet saved_regs = RegisterSet(R0, as_Register(callee_saved_regs-1)); 69 __ push(saved_regs | R9ifScratched); 70 #endif // AARCH64 71 72 if (addr != R0) { 73 assert_different_registers(count, R0); 74 __ mov(R0, addr); 75 } 76 #ifdef AARCH64 77 __ zero_extend(R1, count, 32); // G1BarrierSet::write_ref_array_pre_*_entry takes size_t 78 #else 79 if (count != R1) { 80 __ mov(R1, count); 81 } 82 #endif // AARCH64 83 84 if (UseCompressedOops) { 85 __ call(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_narrow_oop_entry)); 86 } else { 87 __ call(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_pre_oop_entry)); 88 } 89 90 #ifdef AARCH64 91 for (int i = callee_saved_regs - 2; i >= 0; i -= 2) { 92 __ raw_pop(as_Register(i), as_Register(i+1)); 93 } 94 #else 95 __ pop(saved_regs | R9ifScratched); 96 #endif // AARCH64 97 } 98 } 99 100 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, 101 Register addr, Register count, Register tmp) { 102 103 BLOCK_COMMENT("G1PostBarrier"); 104 if (addr != R0) { 105 assert_different_registers(count, R0); 106 __ mov(R0, addr); 107 } 108 #ifdef AARCH64 109 __ zero_extend(R1, count, 32); // G1BarrierSet::write_ref_array_post_entry takes size_t 110 #else 111 if (count != R1) { 112 __ mov(R1, count); 113 } 114 #if R9_IS_SCRATCHED 115 // Safer to save R9 here since callers may have been written 116 // assuming R9 survives. This is suboptimal but is not in 117 // general worth optimizing for the few platforms where R9 118 // is scratched. Note that the optimization might not be to 119 // difficult for this particular call site. 120 __ push(R9); 121 #endif // !R9_IS_SCRATCHED 122 #endif // !AARCH64 123 __ call(CAST_FROM_FN_PTR(address, G1BarrierSet::write_ref_array_post_entry)); 124 #ifndef AARCH64 125 #if R9_IS_SCRATCHED 126 __ pop(R9); 127 #endif // !R9_IS_SCRATCHED 128 #endif // !AARCH64 129 } 130 131 // G1 pre-barrier. 132 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 133 // If store_addr != noreg, then previous value is loaded from [store_addr]; 134 // in such case store_addr and new_val registers are preserved; 135 // otherwise pre_val register is preserved. 136 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, 137 Register store_addr, 138 Register new_val, 139 Register pre_val, 140 Register tmp1, 141 Register tmp2) { 142 Label done; 143 Label runtime; 144 145 if (store_addr != noreg) { 146 assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); 147 } else { 148 assert (new_val == noreg, "should be"); 149 assert_different_registers(pre_val, tmp1, tmp2, noreg); 150 } 151 152 Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 153 Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); 154 Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); 155 156 // Is marking active? 157 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); 158 __ ldrb(tmp1, in_progress); 159 __ cbz(tmp1, done); 160 161 // Do we need to load the previous value? 162 if (store_addr != noreg) { 163 __ load_heap_oop(pre_val, Address(store_addr, 0)); 164 } 165 166 // Is the previous value null? 167 __ cbz(pre_val, done); 168 169 // Can we store original value in the thread's buffer? 170 // Is index == 0? 171 // (The index field is typed as size_t.) 172 173 __ ldr(tmp1, index); // tmp1 := *index_adr 174 __ ldr(tmp2, buffer); 175 176 __ subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize 177 __ b(runtime, lt); // If negative, goto runtime 178 179 __ str(tmp1, index); // *index_adr := tmp1 180 181 // Record the previous value 182 __ str(pre_val, Address(tmp2, tmp1)); 183 __ b(done); 184 185 __ bind(runtime); 186 187 // save the live input values 188 #ifdef AARCH64 189 if (store_addr != noreg) { 190 __ raw_push(store_addr, new_val); 191 } else { 192 __ raw_push(pre_val, ZR); 193 } 194 #else 195 if (store_addr != noreg) { 196 // avoid raw_push to support any ordering of store_addr and new_val 197 __ push(RegisterSet(store_addr) | RegisterSet(new_val)); 198 } else { 199 __ push(pre_val); 200 } 201 #endif // AARCH64 202 203 if (pre_val != R0) { 204 __ mov(R0, pre_val); 205 } 206 __ mov(R1, Rthread); 207 208 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); 209 210 #ifdef AARCH64 211 if (store_addr != noreg) { 212 __ raw_pop(store_addr, new_val); 213 } else { 214 __ raw_pop(pre_val, ZR); 215 } 216 #else 217 if (store_addr != noreg) { 218 __ pop(RegisterSet(store_addr) | RegisterSet(new_val)); 219 } else { 220 __ pop(pre_val); 221 } 222 #endif // AARCH64 223 224 __ bind(done); 225 } 226 227 // G1 post-barrier. 228 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 229 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, 230 Register store_addr, 231 Register new_val, 232 Register tmp1, 233 Register tmp2, 234 Register tmp3) { 235 236 Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); 237 Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); 238 239 BarrierSet* bs = BarrierSet::barrier_set(); 240 CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs); 241 CardTable* ct = ctbs->card_table(); 242 Label done; 243 Label runtime; 244 245 // Does store cross heap regions? 246 247 __ eor(tmp1, store_addr, new_val); 248 #ifdef AARCH64 249 __ logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); 250 __ cbz(tmp1, done); 251 #else 252 __ movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); 253 __ b(done, eq); 254 #endif 255 256 // crosses regions, storing NULL? 257 258 __ cbz(new_val, done); 259 260 // storing region crossing non-NULL, is card already dirty? 261 const Register card_addr = tmp1; 262 assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); 263 264 __ mov_address(tmp2, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference); 265 __ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift)); 266 267 __ ldrb(tmp2, Address(card_addr)); 268 __ cmp(tmp2, (int)G1CardTable::g1_young_card_val()); 269 __ b(done, eq); 270 271 __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); 272 273 assert(CardTable::dirty_card_val() == 0, "adjust this code"); 274 __ ldrb(tmp2, Address(card_addr)); 275 __ cbz(tmp2, done); 276 277 // storing a region crossing, non-NULL oop, card is clean. 278 // dirty card and log. 279 280 __ strb(__ zero_register(tmp2), Address(card_addr)); 281 282 __ ldr(tmp2, queue_index); 283 __ ldr(tmp3, buffer); 284 285 __ subs(tmp2, tmp2, wordSize); 286 __ b(runtime, lt); // go to runtime if now negative 287 288 __ str(tmp2, queue_index); 289 290 __ str(card_addr, Address(tmp3, tmp2)); 291 __ b(done); 292 293 __ bind(runtime); 294 295 if (card_addr != R0) { 296 __ mov(R0, card_addr); 297 } 298 __ mov(R1, Rthread); 299 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); 300 301 __ bind(done); 302 } 303 304 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 305 Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) { 306 bool on_oop = type == T_OBJECT || type == T_ARRAY; 307 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 308 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 309 bool on_reference = on_weak || on_phantom; 310 311 ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2, tmp3); 312 if (on_oop && on_reference) { 313 // Generate the G1 pre-barrier code to log the value of 314 // the referent field in an SATB buffer. 315 g1_write_barrier_pre(masm, noreg, noreg, dst, tmp1, tmp2); 316 } 317 } 318 319 320 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 321 Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) { 322 bool in_heap = (decorators & IN_HEAP) != 0; 323 bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0; 324 325 bool needs_pre_barrier = in_heap || in_concurrent_root; 326 bool needs_post_barrier = (new_val != noreg) && in_heap; 327 328 // flatten object address if needed 329 assert (obj.mode() == basic_offset, "pre- or post-indexing is not supported here"); 330 331 const Register store_addr = obj.base(); 332 if (obj.index() != noreg) { 333 assert (obj.disp() == 0, "index or displacement, not both"); 334 #ifdef AARCH64 335 __ add(store_addr, obj.base(), obj.index(), obj.extend(), obj.shift_imm()); 336 #else 337 assert(obj.offset_op() == add_offset, "addition is expected"); 338 __ add(store_addr, obj.base(), AsmOperand(obj.index(), obj.shift(), obj.shift_imm())); 339 #endif // AARCH64 340 } else if (obj.disp() != 0) { 341 __ add(store_addr, obj.base(), obj.disp()); 342 } 343 344 if (needs_pre_barrier) { 345 g1_write_barrier_pre(masm, store_addr, new_val, tmp1, tmp2, tmp3); 346 } 347 348 if (is_null) { 349 BarrierSetAssembler::store_at(masm, decorators, type, Address(store_addr), new_val, tmp1, tmp2, tmp3, true); 350 } else { 351 // G1 barrier needs uncompressed oop for region cross check. 352 Register val_to_store = new_val; 353 if (UseCompressedOops) { 354 val_to_store = tmp1; 355 __ mov(val_to_store, new_val); 356 } 357 BarrierSetAssembler::store_at(masm, decorators, type, Address(store_addr), val_to_store, tmp1, tmp2, tmp3, false); 358 if (needs_post_barrier) { 359 g1_write_barrier_post(masm, store_addr, new_val, tmp1, tmp2, tmp3); 360 } 361 } 362 }; 363 364 #ifdef COMPILER1 365 366 #undef __ 367 #define __ ce->masm()-> 368 369 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { 370 G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 371 // At this point we know that marking is in progress. 372 // If do_load() is true then we have to emit the 373 // load of the previous value; otherwise it has already 374 // been loaded into _pre_val. 375 376 __ bind(*stub->entry()); 377 assert(stub->pre_val()->is_register(), "Precondition."); 378 379 Register pre_val_reg = stub->pre_val()->as_register(); 380 381 if (stub->do_load()) { 382 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 383 } 384 385 __ cbz(pre_val_reg, *stub->continuation()); 386 ce->verify_reserved_argument_area_size(1); 387 __ str(pre_val_reg, Address(SP)); 388 __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); 389 390 __ b(*stub->continuation()); 391 } 392 393 void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { 394 G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 395 __ bind(*stub->entry()); 396 assert(stub->addr()->is_register(), "Precondition."); 397 assert(stub->new_val()->is_register(), "Precondition."); 398 Register new_val_reg = stub->new_val()->as_register(); 399 __ cbz(new_val_reg, *stub->continuation()); 400 ce->verify_reserved_argument_area_size(1); 401 __ str(stub->addr()->as_pointer_register(), Address(SP)); 402 __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); 403 __ b(*stub->continuation()); 404 } 405 406 #undef __ 407 #define __ sasm-> 408 409 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 410 // Input: 411 // - pre_val pushed on the stack 412 413 __ set_info("g1_pre_barrier_slow_id", false); 414 415 // save at least the registers that need saving if the runtime is called 416 #ifdef AARCH64 417 __ raw_push(R0, R1); 418 __ raw_push(R2, R3); 419 const int nb_saved_regs = 4; 420 #else // AARCH64 421 const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR); 422 const int nb_saved_regs = 6; 423 assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs"); 424 __ push(saved_regs); 425 #endif // AARCH64 426 427 const Register r_pre_val_0 = R0; // must be R0, to be ready for the runtime call 428 const Register r_index_1 = R1; 429 const Register r_buffer_2 = R2; 430 431 Address queue_active(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 432 Address queue_index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); 433 Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); 434 435 Label done; 436 Label runtime; 437 438 // Is marking still active? 439 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 440 __ ldrb(R1, queue_active); 441 __ cbz(R1, done); 442 443 __ ldr(r_index_1, queue_index); 444 __ ldr(r_pre_val_0, Address(SP, nb_saved_regs*wordSize)); 445 __ ldr(r_buffer_2, buffer); 446 447 __ subs(r_index_1, r_index_1, wordSize); 448 __ b(runtime, lt); 449 450 __ str(r_index_1, queue_index); 451 __ str(r_pre_val_0, Address(r_buffer_2, r_index_1)); 452 453 __ bind(done); 454 455 #ifdef AARCH64 456 __ raw_pop(R2, R3); 457 __ raw_pop(R0, R1); 458 #else // AARCH64 459 __ pop(saved_regs); 460 #endif // AARCH64 461 462 __ ret(); 463 464 __ bind(runtime); 465 466 __ save_live_registers(); 467 468 assert(r_pre_val_0 == c_rarg0, "pre_val should be in R0"); 469 __ mov(c_rarg1, Rthread); 470 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, c_rarg1); 471 472 __ restore_live_registers_without_return(); 473 474 __ b(done); 475 } 476 477 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { 478 // Input: 479 // - store_addr, pushed on the stack 480 481 __ set_info("g1_post_barrier_slow_id", false); 482 483 Label done; 484 Label recheck; 485 Label runtime; 486 487 Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); 488 Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); 489 490 AddressLiteral cardtable(ci_card_table_address_as<address>(), relocInfo::none); 491 492 // save at least the registers that need saving if the runtime is called 493 #ifdef AARCH64 494 __ raw_push(R0, R1); 495 __ raw_push(R2, R3); 496 const int nb_saved_regs = 4; 497 #else // AARCH64 498 const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR); 499 const int nb_saved_regs = 6; 500 assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs"); 501 __ push(saved_regs); 502 #endif // AARCH64 503 504 const Register r_card_addr_0 = R0; // must be R0 for the slow case 505 const Register r_obj_0 = R0; 506 const Register r_card_base_1 = R1; 507 const Register r_tmp2 = R2; 508 const Register r_index_2 = R2; 509 const Register r_buffer_3 = R3; 510 const Register tmp1 = Rtemp; 511 512 __ ldr(r_obj_0, Address(SP, nb_saved_regs*wordSize)); 513 // Note: there is a comment in x86 code about not using 514 // ExternalAddress / lea, due to relocation not working 515 // properly for that address. Should be OK for arm, where we 516 // explicitly specify that 'cardtable' has a relocInfo::none 517 // type. 518 __ lea(r_card_base_1, cardtable); 519 __ add(r_card_addr_0, r_card_base_1, AsmOperand(r_obj_0, lsr, CardTable::card_shift)); 520 521 // first quick check without barrier 522 __ ldrb(r_tmp2, Address(r_card_addr_0)); 523 524 __ cmp(r_tmp2, (int)G1CardTable::g1_young_card_val()); 525 __ b(recheck, ne); 526 527 __ bind(done); 528 529 #ifdef AARCH64 530 __ raw_pop(R2, R3); 531 __ raw_pop(R0, R1); 532 #else // AARCH64 533 __ pop(saved_regs); 534 #endif // AARCH64 535 536 __ ret(); 537 538 __ bind(recheck); 539 540 __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp1); 541 542 // reload card state after the barrier that ensures the stored oop was visible 543 __ ldrb(r_tmp2, Address(r_card_addr_0)); 544 545 assert(CardTable::dirty_card_val() == 0, "adjust this code"); 546 __ cbz(r_tmp2, done); 547 548 // storing region crossing non-NULL, card is clean. 549 // dirty card and log. 550 551 assert(0 == (int)CardTable::dirty_card_val(), "adjust this code"); 552 if ((ci_card_table_address_as<intptr_t>() & 0xff) == 0) { 553 // Card table is aligned so the lowest byte of the table address base is zero. 554 __ strb(r_card_base_1, Address(r_card_addr_0)); 555 } else { 556 __ strb(__ zero_register(r_tmp2), Address(r_card_addr_0)); 557 } 558 559 __ ldr(r_index_2, queue_index); 560 __ ldr(r_buffer_3, buffer); 561 562 __ subs(r_index_2, r_index_2, wordSize); 563 __ b(runtime, lt); // go to runtime if now negative 564 565 __ str(r_index_2, queue_index); 566 567 __ str(r_card_addr_0, Address(r_buffer_3, r_index_2)); 568 569 __ b(done); 570 571 __ bind(runtime); 572 573 __ save_live_registers(); 574 575 assert(r_card_addr_0 == c_rarg0, "card_addr should be in R0"); 576 __ mov(c_rarg1, Rthread); 577 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), c_rarg0, c_rarg1); 578 579 __ restore_live_registers_without_return(); 580 581 __ b(done); 582 } 583 584 #undef __ 585 586 #endif // COMPILER1