1 /* 2 * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "gc/shenandoah/shenandoahBarrierSet.hpp" 27 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 28 #include "gc/shenandoah/shenandoahForwarding.hpp" 29 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 30 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 31 #include "gc/shenandoah/shenandoahHeuristics.hpp" 32 #include "gc/shenandoah/shenandoahRuntime.hpp" 33 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 34 #include "interpreter/interpreter.hpp" 35 #include "interpreter/interp_masm.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/thread.hpp" 38 #include "utilities/macros.hpp" 39 #ifdef COMPILER1 40 #include "c1/c1_LIRAssembler.hpp" 41 #include "c1/c1_MacroAssembler.hpp" 42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 43 #endif 44 45 #define __ masm-> 46 47 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 48 49 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 50 Register src, Register dst, Register count) { 51 52 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 53 54 if (is_reference_type(type)) { 55 56 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahStoreValEnqueueBarrier || ShenandoahLoadRefBarrier) { 57 #ifdef _LP64 58 Register thread = r15_thread; 59 #else 60 Register thread = rax; 61 if (thread == src || thread == dst || thread == count) { 62 thread = rbx; 63 } 64 if (thread == src || thread == dst || thread == count) { 65 thread = rcx; 66 } 67 if (thread == src || thread == dst || thread == count) { 68 thread = rdx; 69 } 70 __ push(thread); 71 __ get_thread(thread); 72 #endif 73 assert_different_registers(src, dst, count, thread); 74 75 Label done; 76 // Short-circuit if count == 0. 77 __ testptr(count, count); 78 __ jcc(Assembler::zero, done); 79 80 // Avoid runtime call when not active. 81 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 82 int flags; 83 if (ShenandoahSATBBarrier && dest_uninitialized) { 84 flags = ShenandoahHeap::HAS_FORWARDED; 85 } else { 86 flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING; 87 } 88 __ testb(gc_state, flags); 89 __ jcc(Assembler::zero, done); 90 91 __ pusha(); // push registers 92 93 #ifdef _LP64 94 assert(src == rdi, "expected"); 95 assert(dst == rsi, "expected"); 96 assert(count == rdx, "expected"); 97 if (UseCompressedOops) { 98 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), 99 src, dst, count); 100 } else 101 #endif 102 { 103 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), 104 src, dst, count); 105 } 106 107 __ popa(); 108 __ bind(done); 109 NOT_LP64(__ pop(thread);) 110 } 111 } 112 113 } 114 115 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 116 Register obj, 117 Register pre_val, 118 Register thread, 119 Register tmp, 120 bool tosca_live, 121 bool expand_call) { 122 123 if (ShenandoahSATBBarrier) { 124 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 125 } 126 } 127 128 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 129 Register obj, 130 Register pre_val, 131 Register thread, 132 Register tmp, 133 bool tosca_live, 134 bool expand_call) { 135 // If expand_call is true then we expand the call_VM_leaf macro 136 // directly to skip generating the check by 137 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 138 139 #ifdef _LP64 140 assert(thread == r15_thread, "must be"); 141 #endif // _LP64 142 143 Label done; 144 Label runtime; 145 146 assert(pre_val != noreg, "check this code"); 147 148 if (obj != noreg) { 149 assert_different_registers(obj, pre_val, tmp); 150 assert(pre_val != rax, "check this code"); 151 } 152 153 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 154 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 155 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 156 157 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 158 __ testb(gc_state, ShenandoahHeap::MARKING); 159 __ jcc(Assembler::zero, done); 160 161 // Do we need to load the previous value? 162 if (obj != noreg) { 163 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 164 } 165 166 // Is the previous value null? 167 __ cmpptr(pre_val, (int32_t) NULL_WORD); 168 __ jcc(Assembler::equal, done); 169 170 // Can we store original value in the thread's buffer? 171 // Is index == 0? 172 // (The index field is typed as size_t.) 173 174 __ movptr(tmp, index); // tmp := *index_adr 175 __ cmpptr(tmp, 0); // tmp == 0? 176 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 177 178 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 179 __ movptr(index, tmp); // *index_adr := tmp 180 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 181 182 // Record the previous value 183 __ movptr(Address(tmp, 0), pre_val); 184 __ jmp(done); 185 186 __ bind(runtime); 187 // save the live input values 188 if(tosca_live) __ push(rax); 189 190 if (obj != noreg && obj != rax) 191 __ push(obj); 192 193 if (pre_val != rax) 194 __ push(pre_val); 195 196 // Calling the runtime using the regular call_VM_leaf mechanism generates 197 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 198 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 199 // 200 // If we care generating the pre-barrier without a frame (e.g. in the 201 // intrinsified Reference.get() routine) then ebp might be pointing to 202 // the caller frame and so this check will most likely fail at runtime. 203 // 204 // Expanding the call directly bypasses the generation of the check. 205 // So when we do not have have a full interpreter frame on the stack 206 // expand_call should be passed true. 207 208 NOT_LP64( __ push(thread); ) 209 210 #ifdef _LP64 211 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 212 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 213 // Note: this should not accidentally smash thread, because thread is always r15. 214 assert(thread != c_rarg0, "smashed arg"); 215 if (c_rarg0 != pre_val) { 216 __ mov(c_rarg0, pre_val); 217 } 218 #endif 219 220 if (expand_call) { 221 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 222 #ifdef _LP64 223 if (c_rarg1 != thread) { 224 __ mov(c_rarg1, thread); 225 } 226 // Already moved pre_val into c_rarg0 above 227 #else 228 __ push(thread); 229 __ push(pre_val); 230 #endif 231 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 232 } else { 233 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 234 } 235 236 NOT_LP64( __ pop(thread); ) 237 238 // save the live input values 239 if (pre_val != rax) 240 __ pop(pre_val); 241 242 if (obj != noreg && obj != rax) 243 __ pop(obj); 244 245 if(tosca_live) __ pop(rax); 246 247 __ bind(done); 248 } 249 250 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) { 251 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 252 253 Label done; 254 255 #ifdef _LP64 256 Register thread = r15_thread; 257 #else 258 Register thread = rcx; 259 if (thread == dst) { 260 thread = rbx; 261 } 262 __ push(thread); 263 __ get_thread(thread); 264 #endif 265 266 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 267 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 268 __ jccb(Assembler::zero, done); 269 270 // Use rsi for src address 271 const Register src_addr = rsi; 272 // Setup address parameter first, if it does not clobber oop in dst 273 bool need_addr_setup = (src_addr != dst); 274 275 if (need_addr_setup) { 276 __ push(src_addr); 277 __ lea(src_addr, src); 278 279 if (dst != rax) { 280 // Move obj into rax and save rax 281 __ push(rax); 282 __ movptr(rax, dst); 283 } 284 } else { 285 // dst == rsi 286 __ push(rax); 287 __ movptr(rax, dst); 288 289 // we can clobber it, since it is outgoing register 290 __ lea(src_addr, src); 291 } 292 293 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 294 295 if (need_addr_setup) { 296 if (dst != rax) { 297 __ movptr(dst, rax); 298 __ pop(rax); 299 } 300 __ pop(src_addr); 301 } else { 302 __ movptr(dst, rax); 303 __ pop(rax); 304 } 305 306 __ bind(done); 307 308 #ifndef _LP64 309 __ pop(thread); 310 #endif 311 } 312 313 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src) { 314 if (!ShenandoahLoadRefBarrier) { 315 return; 316 } 317 318 Label done; 319 Label not_null; 320 Label slow_path; 321 __ block_comment("load_reference_barrier_native { "); 322 323 // null check 324 __ testptr(dst, dst); 325 __ jcc(Assembler::notZero, not_null); 326 __ jmp(done); 327 __ bind(not_null); 328 329 330 #ifdef _LP64 331 Register thread = r15_thread; 332 #else 333 Register thread = rcx; 334 if (thread == dst) { 335 thread = rbx; 336 } 337 __ push(thread); 338 __ get_thread(thread); 339 #endif 340 assert_different_registers(dst, thread); 341 342 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 343 __ testb(gc_state, ShenandoahHeap::EVACUATION); 344 #ifndef _LP64 345 __ pop(thread); 346 #endif 347 __ jccb(Assembler::notZero, slow_path); 348 __ jmp(done); 349 __ bind(slow_path); 350 351 if (dst != rax) { 352 __ push(rax); 353 } 354 __ push(rcx); 355 __ push(rdx); 356 __ push(rdi); 357 __ push(rsi); 358 #ifdef _LP64 359 __ push(r8); 360 __ push(r9); 361 __ push(r10); 362 __ push(r11); 363 __ push(r12); 364 __ push(r13); 365 __ push(r14); 366 __ push(r15); 367 #endif 368 369 assert_different_registers(dst, rsi); 370 __ lea(rsi, src); 371 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), dst, rsi); 372 373 #ifdef _LP64 374 __ pop(r15); 375 __ pop(r14); 376 __ pop(r13); 377 __ pop(r12); 378 __ pop(r11); 379 __ pop(r10); 380 __ pop(r9); 381 __ pop(r8); 382 #endif 383 __ pop(rsi); 384 __ pop(rdi); 385 __ pop(rdx); 386 __ pop(rcx); 387 388 if (dst != rax) { 389 __ movptr(dst, rax); 390 __ pop(rax); 391 } 392 393 __ bind(done); 394 __ block_comment("load_reference_barrier_native { "); 395 } 396 397 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 398 if (ShenandoahStoreValEnqueueBarrier) { 399 storeval_barrier_impl(masm, dst, tmp); 400 } 401 } 402 403 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 404 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 405 406 if (dst == noreg) return; 407 408 if (ShenandoahStoreValEnqueueBarrier) { 409 // The set of registers to be saved+restored is the same as in the write-barrier above. 410 // Those are the commonly used registers in the interpreter. 411 __ pusha(); 412 // __ push_callee_saved_registers(); 413 __ subptr(rsp, 2 * Interpreter::stackElementSize); 414 __ movdbl(Address(rsp, 0), xmm0); 415 416 #ifdef _LP64 417 Register thread = r15_thread; 418 #else 419 Register thread = rcx; 420 if (thread == dst || thread == tmp) { 421 thread = rdi; 422 } 423 if (thread == dst || thread == tmp) { 424 thread = rbx; 425 } 426 __ get_thread(thread); 427 #endif 428 assert_different_registers(dst, tmp, thread); 429 430 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 431 __ movdbl(xmm0, Address(rsp, 0)); 432 __ addptr(rsp, 2 * Interpreter::stackElementSize); 433 //__ pop_callee_saved_registers(); 434 __ popa(); 435 } 436 } 437 438 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) { 439 if (ShenandoahLoadRefBarrier) { 440 Label done; 441 __ testptr(dst, dst); 442 __ jcc(Assembler::zero, done); 443 load_reference_barrier_not_null(masm, dst, src); 444 __ bind(done); 445 } 446 } 447 448 // 449 // Arguments: 450 // 451 // Inputs: 452 // src: oop location, might be clobbered 453 // tmp1: scratch register, might not be valid. 454 // 455 // Output: 456 // dst: oop loaded from src location 457 // 458 // Kill: 459 // tmp1 (if it is valid) 460 // 461 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 462 Register dst, Address src, Register tmp1, Register tmp_thread) { 463 // 1: non-reference load, no additional barrier is needed 464 if (!is_reference_type(type)) { 465 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 466 return; 467 } 468 469 assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected"); 470 471 // 2: load a reference from src location and apply LRB if needed 472 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { 473 Register result_dst = dst; 474 bool use_tmp1_for_dst = false; 475 476 // Preserve src location for LRB 477 if (dst == src.base() || dst == src.index()) { 478 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() 479 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) { 480 dst = tmp1; 481 use_tmp1_for_dst = true; 482 } else { 483 dst = rdi; 484 __ push(dst); 485 } 486 assert_different_registers(dst, src.base(), src.index()); 487 } 488 489 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 490 491 if (ShenandoahBarrierSet::use_load_reference_barrier_native(decorators, type)) { 492 load_reference_barrier_native(masm, dst, src); 493 } else { 494 load_reference_barrier(masm, dst, src); 495 } 496 497 // Move loaded oop to final destination 498 if (dst != result_dst) { 499 __ movptr(result_dst, dst); 500 501 if (!use_tmp1_for_dst) { 502 __ pop(dst); 503 } 504 505 dst = result_dst; 506 } 507 } else { 508 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 509 } 510 511 // 3: apply keep-alive barrier if needed 512 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { 513 __ push_IU_state(); 514 // That path can be reached from the c2i adapter with live fp 515 // arguments in registers. 516 LP64_ONLY(assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call")); 517 __ subptr(rsp, 64); 518 __ movdbl(Address(rsp, 0), xmm0); 519 __ movdbl(Address(rsp, 8), xmm1); 520 __ movdbl(Address(rsp, 16), xmm2); 521 __ movdbl(Address(rsp, 24), xmm3); 522 __ movdbl(Address(rsp, 32), xmm4); 523 __ movdbl(Address(rsp, 40), xmm5); 524 __ movdbl(Address(rsp, 48), xmm6); 525 __ movdbl(Address(rsp, 56), xmm7); 526 527 Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 528 assert_different_registers(dst, tmp1, tmp_thread); 529 if (!thread->is_valid()) { 530 thread = rdx; 531 } 532 NOT_LP64(__ get_thread(thread)); 533 // Generate the SATB pre-barrier code to log the value of 534 // the referent field in an SATB buffer. 535 shenandoah_write_barrier_pre(masm /* masm */, 536 noreg /* obj */, 537 dst /* pre_val */, 538 thread /* thread */, 539 tmp1 /* tmp */, 540 true /* tosca_live */, 541 true /* expand_call */); 542 __ movdbl(xmm0, Address(rsp, 0)); 543 __ movdbl(xmm1, Address(rsp, 8)); 544 __ movdbl(xmm2, Address(rsp, 16)); 545 __ movdbl(xmm3, Address(rsp, 24)); 546 __ movdbl(xmm4, Address(rsp, 32)); 547 __ movdbl(xmm5, Address(rsp, 40)); 548 __ movdbl(xmm6, Address(rsp, 48)); 549 __ movdbl(xmm7, Address(rsp, 56)); 550 __ addptr(rsp, 64); 551 __ pop_IU_state(); 552 } 553 } 554 555 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 556 Address dst, Register val, Register tmp1, Register tmp2) { 557 558 bool on_oop = is_reference_type(type); 559 bool in_heap = (decorators & IN_HEAP) != 0; 560 bool as_normal = (decorators & AS_NORMAL) != 0; 561 if (on_oop && in_heap) { 562 bool needs_pre_barrier = as_normal; 563 564 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 565 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 566 // flatten object address if needed 567 // We do it regardless of precise because we need the registers 568 if (dst.index() == noreg && dst.disp() == 0) { 569 if (dst.base() != tmp1) { 570 __ movptr(tmp1, dst.base()); 571 } 572 } else { 573 __ lea(tmp1, dst); 574 } 575 576 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 577 578 #ifndef _LP64 579 __ get_thread(rthread); 580 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 581 imasm->save_bcp(); 582 #endif 583 584 if (needs_pre_barrier) { 585 shenandoah_write_barrier_pre(masm /*masm*/, 586 tmp1 /* obj */, 587 tmp2 /* pre_val */, 588 rthread /* thread */, 589 tmp3 /* tmp */, 590 val != noreg /* tosca_live */, 591 false /* expand_call */); 592 } 593 if (val == noreg) { 594 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 595 } else { 596 storeval_barrier(masm, val, tmp3); 597 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 598 } 599 NOT_LP64(imasm->restore_bcp()); 600 } else { 601 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 602 } 603 } 604 605 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 606 Register obj, Register tmp, Label& slowpath) { 607 Label done; 608 // Resolve jobject 609 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 610 611 // Check for null. 612 __ testptr(obj, obj); 613 __ jcc(Assembler::zero, done); 614 615 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 616 __ testb(gc_state, ShenandoahHeap::EVACUATION); 617 __ jccb(Assembler::notZero, slowpath); 618 __ bind(done); 619 } 620 621 // Special Shenandoah CAS implementation that handles false negatives 622 // due to concurrent evacuation. 623 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 624 Register res, Address addr, Register oldval, Register newval, 625 bool exchange, Register tmp1, Register tmp2) { 626 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 627 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 628 assert_different_registers(oldval, newval, tmp1, tmp2); 629 630 Label L_success, L_failure; 631 632 // Remember oldval for retry logic below 633 #ifdef _LP64 634 if (UseCompressedOops) { 635 __ movl(tmp1, oldval); 636 } else 637 #endif 638 { 639 __ movptr(tmp1, oldval); 640 } 641 642 // Step 1. Fast-path. 643 // 644 // Try to CAS with given arguments. If successful, then we are done. 645 646 if (os::is_MP()) __ lock(); 647 #ifdef _LP64 648 if (UseCompressedOops) { 649 __ cmpxchgl(newval, addr); 650 } else 651 #endif 652 { 653 __ cmpxchgptr(newval, addr); 654 } 655 __ jcc(Assembler::equal, L_success); 656 657 // Step 2. CAS had failed. This may be a false negative. 658 // 659 // The trouble comes when we compare the to-space pointer with the from-space 660 // pointer to the same object. To resolve this, it will suffice to resolve 661 // the value from memory -- this will give both to-space pointers. 662 // If they mismatch, then it was a legitimate failure. 663 // 664 // Before reaching to resolve sequence, see if we can avoid the whole shebang 665 // with filters. 666 667 // Filter: when offending in-memory value is NULL, the failure is definitely legitimate 668 __ testptr(oldval, oldval); 669 __ jcc(Assembler::zero, L_failure); 670 671 // Filter: when heap is stable, the failure is definitely legitimate 672 #ifdef _LP64 673 const Register thread = r15_thread; 674 #else 675 const Register thread = tmp2; 676 __ get_thread(thread); 677 #endif 678 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 679 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 680 __ jcc(Assembler::zero, L_failure); 681 682 #ifdef _LP64 683 if (UseCompressedOops) { 684 __ movl(tmp2, oldval); 685 __ decode_heap_oop(tmp2); 686 } else 687 #endif 688 { 689 __ movptr(tmp2, oldval); 690 } 691 692 // Decode offending in-memory value. 693 // Test if-forwarded 694 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value); 695 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded 696 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded 697 698 // Load and mask forwarding pointer 699 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); 700 __ shrptr(tmp2, 2); 701 __ shlptr(tmp2, 2); 702 703 #ifdef _LP64 704 if (UseCompressedOops) { 705 __ decode_heap_oop(tmp1); // decode for comparison 706 } 707 #endif 708 709 // Now we have the forwarded offender in tmp2. 710 // Compare and if they don't match, we have legitimate failure 711 __ cmpptr(tmp1, tmp2); 712 __ jcc(Assembler::notEqual, L_failure); 713 714 // Step 3. Need to fix the memory ptr before continuing. 715 // 716 // At this point, we have from-space oldval in the register, and its to-space 717 // address is in tmp2. Let's try to update it into memory. We don't care if it 718 // succeeds or not. If it does, then the retrying CAS would see it and succeed. 719 // If this fixup fails, this means somebody else beat us to it, and necessarily 720 // with to-space ptr store. We still have to do the retry, because the GC might 721 // have updated the reference for us. 722 723 #ifdef _LP64 724 if (UseCompressedOops) { 725 __ encode_heap_oop(tmp2); // previously decoded at step 2. 726 } 727 #endif 728 729 if (os::is_MP()) __ lock(); 730 #ifdef _LP64 731 if (UseCompressedOops) { 732 __ cmpxchgl(tmp2, addr); 733 } else 734 #endif 735 { 736 __ cmpxchgptr(tmp2, addr); 737 } 738 739 // Step 4. Try to CAS again. 740 // 741 // This is guaranteed not to have false negatives, because oldval is definitely 742 // to-space, and memory pointer is to-space as well. Nothing is able to store 743 // from-space ptr into memory anymore. Make sure oldval is restored, after being 744 // garbled during retries. 745 // 746 #ifdef _LP64 747 if (UseCompressedOops) { 748 __ movl(oldval, tmp2); 749 } else 750 #endif 751 { 752 __ movptr(oldval, tmp2); 753 } 754 755 if (os::is_MP()) __ lock(); 756 #ifdef _LP64 757 if (UseCompressedOops) { 758 __ cmpxchgl(newval, addr); 759 } else 760 #endif 761 { 762 __ cmpxchgptr(newval, addr); 763 } 764 if (!exchange) { 765 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump 766 } 767 768 // Step 5. If we need a boolean result out of CAS, set the flag appropriately. 769 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. 770 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. 771 772 if (exchange) { 773 __ bind(L_failure); 774 __ bind(L_success); 775 } else { 776 assert(res != NULL, "need result register"); 777 778 Label exit; 779 __ bind(L_failure); 780 __ xorptr(res, res); 781 __ jmpb(exit); 782 783 __ bind(L_success); 784 __ movptr(res, 1); 785 __ bind(exit); 786 } 787 } 788 789 #undef __ 790 791 #ifdef COMPILER1 792 793 #define __ ce->masm()-> 794 795 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 796 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 797 // At this point we know that marking is in progress. 798 // If do_load() is true then we have to emit the 799 // load of the previous value; otherwise it has already 800 // been loaded into _pre_val. 801 802 __ bind(*stub->entry()); 803 assert(stub->pre_val()->is_register(), "Precondition."); 804 805 Register pre_val_reg = stub->pre_val()->as_register(); 806 807 if (stub->do_load()) { 808 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 809 } 810 811 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 812 __ jcc(Assembler::equal, *stub->continuation()); 813 ce->store_parameter(stub->pre_val()->as_register(), 0); 814 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 815 __ jmp(*stub->continuation()); 816 817 } 818 819 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 820 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 821 __ bind(*stub->entry()); 822 823 Register obj = stub->obj()->as_register(); 824 Register res = stub->result()->as_register(); 825 Register addr = stub->addr()->as_pointer_register(); 826 Register tmp1 = stub->tmp1()->as_register(); 827 Register tmp2 = stub->tmp2()->as_register(); 828 assert_different_registers(obj, res, addr, tmp1, tmp2); 829 830 Label slow_path; 831 832 assert(res == rax, "result must arrive in rax"); 833 834 if (res != obj) { 835 __ mov(res, obj); 836 } 837 838 // Check for null. 839 __ testptr(res, res); 840 __ jcc(Assembler::zero, *stub->continuation()); 841 842 // Check for object being in the collection set. 843 __ mov(tmp1, res); 844 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 845 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 846 #ifdef _LP64 847 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 848 __ testbool(tmp2); 849 #else 850 // On x86_32, C1 register allocator can give us the register without 8-bit support. 851 // Do the full-register access and test to avoid compilation failures. 852 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 853 __ testptr(tmp2, 0xFF); 854 #endif 855 __ jcc(Assembler::zero, *stub->continuation()); 856 857 __ bind(slow_path); 858 ce->store_parameter(res, 0); 859 ce->store_parameter(addr, 1); 860 if (stub->is_native()) { 861 __ call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin())); 862 } else { 863 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 864 } 865 __ jmp(*stub->continuation()); 866 } 867 868 #undef __ 869 870 #define __ sasm-> 871 872 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 873 __ prologue("shenandoah_pre_barrier", false); 874 // arg0 : previous value of memory 875 876 __ push(rax); 877 __ push(rdx); 878 879 const Register pre_val = rax; 880 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 881 const Register tmp = rdx; 882 883 NOT_LP64(__ get_thread(thread);) 884 885 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 886 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 887 888 Label done; 889 Label runtime; 890 891 // Is SATB still active? 892 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 893 __ testb(gc_state, ShenandoahHeap::MARKING); 894 __ jcc(Assembler::zero, done); 895 896 // Can we store original value in the thread's buffer? 897 898 __ movptr(tmp, queue_index); 899 __ testptr(tmp, tmp); 900 __ jcc(Assembler::zero, runtime); 901 __ subptr(tmp, wordSize); 902 __ movptr(queue_index, tmp); 903 __ addptr(tmp, buffer); 904 905 // prev_val (rax) 906 __ load_parameter(0, pre_val); 907 __ movptr(Address(tmp, 0), pre_val); 908 __ jmp(done); 909 910 __ bind(runtime); 911 912 __ save_live_registers_no_oop_map(true); 913 914 // load the pre-value 915 __ load_parameter(0, rcx); 916 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 917 918 __ restore_live_registers(true); 919 920 __ bind(done); 921 922 __ pop(rdx); 923 __ pop(rax); 924 925 __ epilogue(); 926 } 927 928 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native) { 929 __ prologue("shenandoah_load_reference_barrier", false); 930 // arg0 : object to be resolved 931 932 __ save_live_registers_no_oop_map(true); 933 934 #ifdef _LP64 935 __ load_parameter(0, c_rarg0); 936 __ load_parameter(1, c_rarg1); 937 if (is_native) { 938 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), c_rarg0, c_rarg1); 939 } else if (UseCompressedOops) { 940 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1); 941 } else { 942 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1); 943 } 944 #else 945 __ load_parameter(0, rax); 946 __ load_parameter(1, rbx); 947 if (is_native) { 948 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), rax, rbx); 949 } else { 950 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx); 951 } 952 #endif 953 954 __ restore_live_registers_except_rax(true); 955 956 __ epilogue(); 957 } 958 959 #undef __ 960 961 #endif // COMPILER1 962 963 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 964 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 965 return _shenandoah_lrb; 966 } 967 968 #define __ cgen->assembler()-> 969 970 /* 971 * Incoming parameters: 972 * rax: oop 973 * rsi: load address 974 */ 975 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 976 __ align(CodeEntryAlignment); 977 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 978 address start = __ pc(); 979 980 Label resolve_oop, slow_path; 981 982 // We use RDI, which also serves as argument register for slow call. 983 // RAX always holds the src object ptr, except after the slow call, 984 // then it holds the result. R8/RBX is used as temporary register. 985 986 Register tmp1 = rdi; 987 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 988 989 __ push(tmp1); 990 __ push(tmp2); 991 992 // Check for object being in the collection set. 993 __ mov(tmp1, rax); 994 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 995 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 996 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 997 __ testbool(tmp2); 998 __ jccb(Assembler::notZero, resolve_oop); 999 __ pop(tmp2); 1000 __ pop(tmp1); 1001 __ ret(0); 1002 1003 // Test if object is already resolved. 1004 __ bind(resolve_oop); 1005 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 1006 // Test if both lowest bits are set. We trick it by negating the bits 1007 // then test for both bits clear. 1008 __ notptr(tmp2); 1009 __ testb(tmp2, markWord::marked_value); 1010 __ jccb(Assembler::notZero, slow_path); 1011 // Clear both lower bits. It's still inverted, so set them, and then invert back. 1012 __ orptr(tmp2, markWord::marked_value); 1013 __ notptr(tmp2); 1014 // At this point, tmp2 contains the decoded forwarding pointer. 1015 __ mov(rax, tmp2); 1016 1017 __ pop(tmp2); 1018 __ pop(tmp1); 1019 __ ret(0); 1020 1021 __ bind(slow_path); 1022 1023 __ push(rcx); 1024 __ push(rdx); 1025 __ push(rdi); 1026 #ifdef _LP64 1027 __ push(r8); 1028 __ push(r9); 1029 __ push(r10); 1030 __ push(r11); 1031 __ push(r12); 1032 __ push(r13); 1033 __ push(r14); 1034 __ push(r15); 1035 #endif 1036 __ push(rbp); 1037 __ movptr(rbp, rsp); 1038 __ andptr(rsp, -StackAlignmentInBytes); 1039 __ push_FPU_state(); 1040 if (UseCompressedOops) { 1041 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi); 1042 } else { 1043 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi); 1044 } 1045 __ pop_FPU_state(); 1046 __ movptr(rsp, rbp); 1047 __ pop(rbp); 1048 #ifdef _LP64 1049 __ pop(r15); 1050 __ pop(r14); 1051 __ pop(r13); 1052 __ pop(r12); 1053 __ pop(r11); 1054 __ pop(r10); 1055 __ pop(r9); 1056 __ pop(r8); 1057 #endif 1058 __ pop(rdi); 1059 __ pop(rdx); 1060 __ pop(rcx); 1061 1062 __ pop(tmp2); 1063 __ pop(tmp1); 1064 __ ret(0); 1065 1066 return start; 1067 } 1068 1069 #undef __ 1070 1071 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 1072 if (ShenandoahLoadRefBarrier) { 1073 int stub_code_size = 4096; 1074 ResourceMark rm; 1075 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 1076 CodeBuffer buf(bb); 1077 StubCodeGenerator cgen(&buf); 1078 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 1079 } 1080 }