1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSet.hpp" 26 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 27 #include "gc/shenandoah/shenandoahForwarding.hpp" 28 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 29 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 30 #include "gc/shenandoah/shenandoahHeuristics.hpp" 31 #include "gc/shenandoah/shenandoahRuntime.hpp" 32 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 33 #include "interpreter/interpreter.hpp" 34 #include "interpreter/interp_masm.hpp" 35 #include "runtime/sharedRuntime.hpp" 36 #include "runtime/thread.hpp" 37 #include "utilities/macros.hpp" 38 #ifdef COMPILER1 39 #include "c1/c1_LIRAssembler.hpp" 40 #include "c1/c1_MacroAssembler.hpp" 41 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 42 #endif 43 44 #define __ masm-> 45 46 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 47 48 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 49 Register src, Register dst, Register count) { 50 51 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 52 53 if (is_reference_type(type)) { 54 55 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) { 56 #ifdef _LP64 57 Register thread = r15_thread; 58 #else 59 Register thread = rax; 60 if (thread == src || thread == dst || thread == count) { 61 thread = rbx; 62 } 63 if (thread == src || thread == dst || thread == count) { 64 thread = rcx; 65 } 66 if (thread == src || thread == dst || thread == count) { 67 thread = rdx; 68 } 69 __ push(thread); 70 __ get_thread(thread); 71 #endif 72 assert_different_registers(src, dst, count, thread); 73 74 Label done; 75 // Short-circuit if count == 0. 76 __ testptr(count, count); 77 __ jcc(Assembler::zero, done); 78 79 // Avoid runtime call when not marking. 80 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 81 int flags = ShenandoahHeap::HAS_FORWARDED; 82 if (!dest_uninitialized) { 83 flags |= ShenandoahHeap::MARKING; 84 } 85 __ testb(gc_state, flags); 86 __ jcc(Assembler::zero, done); 87 88 __ pusha(); // push registers 89 #ifdef _LP64 90 assert(src == rdi, "expected"); 91 assert(dst == rsi, "expected"); 92 assert(count == rdx, "expected"); 93 if (UseCompressedOops) { 94 if (dest_uninitialized) { 95 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count); 96 } else { 97 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count); 98 } 99 } else 100 #endif 101 { 102 if (dest_uninitialized) { 103 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count); 104 } else { 105 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count); 106 } 107 } 108 __ popa(); 109 __ bind(done); 110 NOT_LP64(__ pop(thread);) 111 } 112 } 113 114 } 115 116 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 117 Register obj, 118 Register pre_val, 119 Register thread, 120 Register tmp, 121 bool tosca_live, 122 bool expand_call) { 123 124 if (ShenandoahSATBBarrier) { 125 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 126 } 127 } 128 129 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 130 Register obj, 131 Register pre_val, 132 Register thread, 133 Register tmp, 134 bool tosca_live, 135 bool expand_call) { 136 // If expand_call is true then we expand the call_VM_leaf macro 137 // directly to skip generating the check by 138 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 139 140 #ifdef _LP64 141 assert(thread == r15_thread, "must be"); 142 #endif // _LP64 143 144 Label done; 145 Label runtime; 146 147 assert(pre_val != noreg, "check this code"); 148 149 if (obj != noreg) { 150 assert_different_registers(obj, pre_val, tmp); 151 assert(pre_val != rax, "check this code"); 152 } 153 154 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 155 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 156 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 157 158 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 159 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 160 __ jcc(Assembler::zero, done); 161 162 // Do we need to load the previous value? 163 if (obj != noreg) { 164 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 165 } 166 167 // Is the previous value null? 168 __ cmpptr(pre_val, (int32_t) NULL_WORD); 169 __ jcc(Assembler::equal, done); 170 171 // Can we store original value in the thread's buffer? 172 // Is index == 0? 173 // (The index field is typed as size_t.) 174 175 __ movptr(tmp, index); // tmp := *index_adr 176 __ cmpptr(tmp, 0); // tmp == 0? 177 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 178 179 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 180 __ movptr(index, tmp); // *index_adr := tmp 181 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 182 183 // Record the previous value 184 __ movptr(Address(tmp, 0), pre_val); 185 __ jmp(done); 186 187 __ bind(runtime); 188 // save the live input values 189 if(tosca_live) __ push(rax); 190 191 if (obj != noreg && obj != rax) 192 __ push(obj); 193 194 if (pre_val != rax) 195 __ push(pre_val); 196 197 // Calling the runtime using the regular call_VM_leaf mechanism generates 198 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 199 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 200 // 201 // If we care generating the pre-barrier without a frame (e.g. in the 202 // intrinsified Reference.get() routine) then ebp might be pointing to 203 // the caller frame and so this check will most likely fail at runtime. 204 // 205 // Expanding the call directly bypasses the generation of the check. 206 // So when we do not have have a full interpreter frame on the stack 207 // expand_call should be passed true. 208 209 NOT_LP64( __ push(thread); ) 210 211 #ifdef _LP64 212 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 213 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 214 // Note: this should not accidentally smash thread, because thread is always r15. 215 assert(thread != c_rarg0, "smashed arg"); 216 if (c_rarg0 != pre_val) { 217 __ mov(c_rarg0, pre_val); 218 } 219 #endif 220 221 if (expand_call) { 222 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 223 #ifdef _LP64 224 if (c_rarg1 != thread) { 225 __ mov(c_rarg1, thread); 226 } 227 // Already moved pre_val into c_rarg0 above 228 #else 229 __ push(thread); 230 __ push(pre_val); 231 #endif 232 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 233 } else { 234 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 235 } 236 237 NOT_LP64( __ pop(thread); ) 238 239 // save the live input values 240 if (pre_val != rax) 241 __ pop(pre_val); 242 243 if (obj != noreg && obj != rax) 244 __ pop(obj); 245 246 if(tosca_live) __ pop(rax); 247 248 __ bind(done); 249 } 250 251 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) { 252 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 253 254 Label done; 255 256 #ifdef _LP64 257 Register thread = r15_thread; 258 #else 259 Register thread = rcx; 260 if (thread == dst) { 261 thread = rbx; 262 } 263 __ push(thread); 264 __ get_thread(thread); 265 #endif 266 267 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 268 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 269 __ jccb(Assembler::zero, done); 270 271 // Use rsi for src address 272 const Register src_addr = rsi; 273 // Setup address parameter first, if it does not clobber oop in dst 274 bool need_addr_setup = (src_addr != dst); 275 276 if (need_addr_setup) { 277 __ push(src_addr); 278 __ lea(src_addr, src); 279 280 if (dst != rax) { 281 // Move obj into rax and save rax 282 __ push(rax); 283 __ movptr(rax, dst); 284 } 285 } else { 286 // dst == rsi 287 __ push(rax); 288 __ movptr(rax, dst); 289 290 // we can clobber it, since it is outgoing register 291 __ lea(src_addr, src); 292 } 293 294 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 295 296 if (need_addr_setup) { 297 if (dst != rax) { 298 __ movptr(dst, rax); 299 __ pop(rax); 300 } 301 __ pop(src_addr); 302 } else { 303 __ movptr(dst, rax); 304 __ pop(rax); 305 } 306 307 __ bind(done); 308 309 #ifndef _LP64 310 __ pop(thread); 311 #endif 312 } 313 314 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src) { 315 if (!ShenandoahLoadRefBarrier) { 316 return; 317 } 318 319 Label done; 320 Label not_null; 321 Label slow_path; 322 __ block_comment("load_reference_barrier_native { "); 323 324 // null check 325 __ testptr(dst, dst); 326 __ jcc(Assembler::notZero, not_null); 327 __ jmp(done); 328 __ bind(not_null); 329 330 331 #ifdef _LP64 332 Register thread = r15_thread; 333 #else 334 Register thread = rcx; 335 if (thread == dst) { 336 thread = rbx; 337 } 338 __ push(thread); 339 __ get_thread(thread); 340 #endif 341 assert_different_registers(dst, thread); 342 343 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 344 __ testb(gc_state, ShenandoahHeap::EVACUATION); 345 #ifndef _LP64 346 __ pop(thread); 347 #endif 348 __ jccb(Assembler::notZero, slow_path); 349 __ jmp(done); 350 __ bind(slow_path); 351 352 if (dst != rax) { 353 __ push(rax); 354 } 355 __ push(rcx); 356 __ push(rdx); 357 __ push(rdi); 358 __ push(rsi); 359 #ifdef _LP64 360 __ push(r8); 361 __ push(r9); 362 __ push(r10); 363 __ push(r11); 364 __ push(r12); 365 __ push(r13); 366 __ push(r14); 367 __ push(r15); 368 #endif 369 370 assert_different_registers(dst, rsi); 371 __ lea(rsi, src); 372 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), dst, rsi); 373 374 #ifdef _LP64 375 __ pop(r15); 376 __ pop(r14); 377 __ pop(r13); 378 __ pop(r12); 379 __ pop(r11); 380 __ pop(r10); 381 __ pop(r9); 382 __ pop(r8); 383 #endif 384 __ pop(rsi); 385 __ pop(rdi); 386 __ pop(rdx); 387 __ pop(rcx); 388 389 if (dst != rax) { 390 __ movptr(dst, rax); 391 __ pop(rax); 392 } 393 394 __ bind(done); 395 __ block_comment("load_reference_barrier_native { "); 396 } 397 398 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 399 if (ShenandoahStoreValEnqueueBarrier) { 400 storeval_barrier_impl(masm, dst, tmp); 401 } 402 } 403 404 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 405 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 406 407 if (dst == noreg) return; 408 409 if (ShenandoahStoreValEnqueueBarrier) { 410 // The set of registers to be saved+restored is the same as in the write-barrier above. 411 // Those are the commonly used registers in the interpreter. 412 __ pusha(); 413 // __ push_callee_saved_registers(); 414 __ subptr(rsp, 2 * Interpreter::stackElementSize); 415 __ movdbl(Address(rsp, 0), xmm0); 416 417 #ifdef _LP64 418 Register thread = r15_thread; 419 #else 420 Register thread = rcx; 421 if (thread == dst || thread == tmp) { 422 thread = rdi; 423 } 424 if (thread == dst || thread == tmp) { 425 thread = rbx; 426 } 427 __ get_thread(thread); 428 #endif 429 assert_different_registers(dst, tmp, thread); 430 431 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 432 __ movdbl(xmm0, Address(rsp, 0)); 433 __ addptr(rsp, 2 * Interpreter::stackElementSize); 434 //__ pop_callee_saved_registers(); 435 __ popa(); 436 } 437 } 438 439 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) { 440 if (ShenandoahLoadRefBarrier) { 441 Label done; 442 __ testptr(dst, dst); 443 __ jcc(Assembler::zero, done); 444 load_reference_barrier_not_null(masm, dst, src); 445 __ bind(done); 446 } 447 } 448 449 // 450 // Arguments: 451 // 452 // Inputs: 453 // src: oop location, might be clobbered 454 // tmp1: scratch register, might not be valid. 455 // 456 // Output: 457 // dst: oop loaded from src location 458 // 459 // Kill: 460 // tmp1 (if it is valid) 461 // 462 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 463 Register dst, Address src, Register tmp1, Register tmp_thread) { 464 // 1: non-reference load, no additional barrier is needed 465 if (!is_reference_type(type)) { 466 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 467 return; 468 } 469 470 assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected"); 471 472 // 2: load a reference from src location and apply LRB if needed 473 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { 474 Register result_dst = dst; 475 bool use_tmp1_for_dst = false; 476 477 // Preserve src location for LRB 478 if (dst == src.base() || dst == src.index()) { 479 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() 480 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) { 481 dst = tmp1; 482 use_tmp1_for_dst = true; 483 } else { 484 dst = rdi; 485 __ push(dst); 486 } 487 assert_different_registers(dst, src.base(), src.index()); 488 } 489 490 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 491 492 if (ShenandoahBarrierSet::use_load_reference_barrier_native(decorators, type)) { 493 load_reference_barrier_native(masm, dst, src); 494 } else { 495 load_reference_barrier(masm, dst, src); 496 } 497 498 // Move loaded oop to final destination 499 if (dst != result_dst) { 500 __ movptr(result_dst, dst); 501 502 if (!use_tmp1_for_dst) { 503 __ pop(dst); 504 } 505 506 dst = result_dst; 507 } 508 } else { 509 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 510 } 511 512 // 3: apply keep-alive barrier if needed 513 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { 514 __ push_IU_state(); 515 Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 516 assert_different_registers(dst, tmp1, tmp_thread); 517 if (!thread->is_valid()) { 518 thread = rdx; 519 } 520 NOT_LP64(__ get_thread(thread)); 521 // Generate the SATB pre-barrier code to log the value of 522 // the referent field in an SATB buffer. 523 shenandoah_write_barrier_pre(masm /* masm */, 524 noreg /* obj */, 525 dst /* pre_val */, 526 thread /* thread */, 527 tmp1 /* tmp */, 528 true /* tosca_live */, 529 true /* expand_call */); 530 __ pop_IU_state(); 531 } 532 } 533 534 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 535 Address dst, Register val, Register tmp1, Register tmp2) { 536 537 bool on_oop = is_reference_type(type); 538 bool in_heap = (decorators & IN_HEAP) != 0; 539 bool as_normal = (decorators & AS_NORMAL) != 0; 540 if (on_oop && in_heap) { 541 bool needs_pre_barrier = as_normal; 542 543 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 544 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 545 // flatten object address if needed 546 // We do it regardless of precise because we need the registers 547 if (dst.index() == noreg && dst.disp() == 0) { 548 if (dst.base() != tmp1) { 549 __ movptr(tmp1, dst.base()); 550 } 551 } else { 552 __ lea(tmp1, dst); 553 } 554 555 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 556 557 #ifndef _LP64 558 __ get_thread(rthread); 559 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 560 imasm->save_bcp(); 561 #endif 562 563 if (needs_pre_barrier) { 564 shenandoah_write_barrier_pre(masm /*masm*/, 565 tmp1 /* obj */, 566 tmp2 /* pre_val */, 567 rthread /* thread */, 568 tmp3 /* tmp */, 569 val != noreg /* tosca_live */, 570 false /* expand_call */); 571 } 572 if (val == noreg) { 573 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 574 } else { 575 storeval_barrier(masm, val, tmp3); 576 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 577 } 578 NOT_LP64(imasm->restore_bcp()); 579 } else { 580 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 581 } 582 } 583 584 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 585 Register obj, Register tmp, Label& slowpath) { 586 Label done; 587 // Resolve jobject 588 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 589 590 // Check for null. 591 __ testptr(obj, obj); 592 __ jcc(Assembler::zero, done); 593 594 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 595 __ testb(gc_state, ShenandoahHeap::EVACUATION); 596 __ jccb(Assembler::notZero, slowpath); 597 __ bind(done); 598 } 599 600 // Special Shenandoah CAS implementation that handles false negatives 601 // due to concurrent evacuation. 602 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 603 Register res, Address addr, Register oldval, Register newval, 604 bool exchange, Register tmp1, Register tmp2) { 605 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 606 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 607 assert_different_registers(oldval, newval, tmp1, tmp2); 608 609 Label L_success, L_failure; 610 611 // Remember oldval for retry logic below 612 #ifdef _LP64 613 if (UseCompressedOops) { 614 __ movl(tmp1, oldval); 615 } else 616 #endif 617 { 618 __ movptr(tmp1, oldval); 619 } 620 621 // Step 1. Fast-path. 622 // 623 // Try to CAS with given arguments. If successful, then we are done. 624 625 if (os::is_MP()) __ lock(); 626 #ifdef _LP64 627 if (UseCompressedOops) { 628 __ cmpxchgl(newval, addr); 629 } else 630 #endif 631 { 632 __ cmpxchgptr(newval, addr); 633 } 634 __ jcc(Assembler::equal, L_success); 635 636 // Step 2. CAS had failed. This may be a false negative. 637 // 638 // The trouble comes when we compare the to-space pointer with the from-space 639 // pointer to the same object. To resolve this, it will suffice to resolve 640 // the value from memory -- this will give both to-space pointers. 641 // If they mismatch, then it was a legitimate failure. 642 // 643 // Before reaching to resolve sequence, see if we can avoid the whole shebang 644 // with filters. 645 646 // Filter: when offending in-memory value is NULL, the failure is definitely legitimate 647 __ testptr(oldval, oldval); 648 __ jcc(Assembler::zero, L_failure); 649 650 // Filter: when heap is stable, the failure is definitely legitimate 651 #ifdef _LP64 652 const Register thread = r15_thread; 653 #else 654 const Register thread = tmp2; 655 __ get_thread(thread); 656 #endif 657 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 658 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 659 __ jcc(Assembler::zero, L_failure); 660 661 #ifdef _LP64 662 if (UseCompressedOops) { 663 __ movl(tmp2, oldval); 664 __ decode_heap_oop(tmp2); 665 } else 666 #endif 667 { 668 __ movptr(tmp2, oldval); 669 } 670 671 // Decode offending in-memory value. 672 // Test if-forwarded 673 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value); 674 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded 675 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded 676 677 // Load and mask forwarding pointer 678 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); 679 __ shrptr(tmp2, 2); 680 __ shlptr(tmp2, 2); 681 682 #ifdef _LP64 683 if (UseCompressedOops) { 684 __ decode_heap_oop(tmp1); // decode for comparison 685 } 686 #endif 687 688 // Now we have the forwarded offender in tmp2. 689 // Compare and if they don't match, we have legitimate failure 690 __ cmpptr(tmp1, tmp2); 691 __ jcc(Assembler::notEqual, L_failure); 692 693 // Step 3. Need to fix the memory ptr before continuing. 694 // 695 // At this point, we have from-space oldval in the register, and its to-space 696 // address is in tmp2. Let's try to update it into memory. We don't care if it 697 // succeeds or not. If it does, then the retrying CAS would see it and succeed. 698 // If this fixup fails, this means somebody else beat us to it, and necessarily 699 // with to-space ptr store. We still have to do the retry, because the GC might 700 // have updated the reference for us. 701 702 #ifdef _LP64 703 if (UseCompressedOops) { 704 __ encode_heap_oop(tmp2); // previously decoded at step 2. 705 } 706 #endif 707 708 if (os::is_MP()) __ lock(); 709 #ifdef _LP64 710 if (UseCompressedOops) { 711 __ cmpxchgl(tmp2, addr); 712 } else 713 #endif 714 { 715 __ cmpxchgptr(tmp2, addr); 716 } 717 718 // Step 4. Try to CAS again. 719 // 720 // This is guaranteed not to have false negatives, because oldval is definitely 721 // to-space, and memory pointer is to-space as well. Nothing is able to store 722 // from-space ptr into memory anymore. Make sure oldval is restored, after being 723 // garbled during retries. 724 // 725 #ifdef _LP64 726 if (UseCompressedOops) { 727 __ movl(oldval, tmp2); 728 } else 729 #endif 730 { 731 __ movptr(oldval, tmp2); 732 } 733 734 if (os::is_MP()) __ lock(); 735 #ifdef _LP64 736 if (UseCompressedOops) { 737 __ cmpxchgl(newval, addr); 738 } else 739 #endif 740 { 741 __ cmpxchgptr(newval, addr); 742 } 743 if (!exchange) { 744 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump 745 } 746 747 // Step 5. If we need a boolean result out of CAS, set the flag appropriately. 748 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. 749 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. 750 751 if (exchange) { 752 __ bind(L_failure); 753 __ bind(L_success); 754 } else { 755 assert(res != NULL, "need result register"); 756 757 Label exit; 758 __ bind(L_failure); 759 __ xorptr(res, res); 760 __ jmpb(exit); 761 762 __ bind(L_success); 763 __ movptr(res, 1); 764 __ bind(exit); 765 } 766 } 767 768 #undef __ 769 770 #ifdef COMPILER1 771 772 #define __ ce->masm()-> 773 774 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 775 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 776 // At this point we know that marking is in progress. 777 // If do_load() is true then we have to emit the 778 // load of the previous value; otherwise it has already 779 // been loaded into _pre_val. 780 781 __ bind(*stub->entry()); 782 assert(stub->pre_val()->is_register(), "Precondition."); 783 784 Register pre_val_reg = stub->pre_val()->as_register(); 785 786 if (stub->do_load()) { 787 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 788 } 789 790 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 791 __ jcc(Assembler::equal, *stub->continuation()); 792 ce->store_parameter(stub->pre_val()->as_register(), 0); 793 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 794 __ jmp(*stub->continuation()); 795 796 } 797 798 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 799 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 800 __ bind(*stub->entry()); 801 802 Register obj = stub->obj()->as_register(); 803 Register res = stub->result()->as_register(); 804 Register addr = stub->addr()->as_register(); 805 Register tmp1 = stub->tmp1()->as_register(); 806 Register tmp2 = stub->tmp2()->as_register(); 807 assert_different_registers(obj, res, addr, tmp1, tmp2); 808 809 Label slow_path; 810 811 assert(res == rax, "result must arrive in rax"); 812 813 if (res != obj) { 814 __ mov(res, obj); 815 } 816 817 // Check for null. 818 __ testptr(res, res); 819 __ jcc(Assembler::zero, *stub->continuation()); 820 821 // Check for object being in the collection set. 822 __ mov(tmp1, res); 823 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 824 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 825 #ifdef _LP64 826 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 827 __ testbool(tmp2); 828 #else 829 // On x86_32, C1 register allocator can give us the register without 8-bit support. 830 // Do the full-register access and test to avoid compilation failures. 831 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 832 __ testptr(tmp2, 0xFF); 833 #endif 834 __ jcc(Assembler::zero, *stub->continuation()); 835 836 __ bind(slow_path); 837 ce->store_parameter(res, 0); 838 ce->store_parameter(addr, 1); 839 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 840 841 __ jmp(*stub->continuation()); 842 } 843 844 #undef __ 845 846 #define __ sasm-> 847 848 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 849 __ prologue("shenandoah_pre_barrier", false); 850 // arg0 : previous value of memory 851 852 __ push(rax); 853 __ push(rdx); 854 855 const Register pre_val = rax; 856 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 857 const Register tmp = rdx; 858 859 NOT_LP64(__ get_thread(thread);) 860 861 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 862 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 863 864 Label done; 865 Label runtime; 866 867 // Is SATB still active? 868 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 869 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 870 __ jcc(Assembler::zero, done); 871 872 // Can we store original value in the thread's buffer? 873 874 __ movptr(tmp, queue_index); 875 __ testptr(tmp, tmp); 876 __ jcc(Assembler::zero, runtime); 877 __ subptr(tmp, wordSize); 878 __ movptr(queue_index, tmp); 879 __ addptr(tmp, buffer); 880 881 // prev_val (rax) 882 __ load_parameter(0, pre_val); 883 __ movptr(Address(tmp, 0), pre_val); 884 __ jmp(done); 885 886 __ bind(runtime); 887 888 __ save_live_registers_no_oop_map(true); 889 890 // load the pre-value 891 __ load_parameter(0, rcx); 892 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 893 894 __ restore_live_registers(true); 895 896 __ bind(done); 897 898 __ pop(rdx); 899 __ pop(rax); 900 901 __ epilogue(); 902 } 903 904 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { 905 __ prologue("shenandoah_load_reference_barrier", false); 906 // arg0 : object to be resolved 907 908 __ save_live_registers_no_oop_map(true); 909 910 #ifdef _LP64 911 __ load_parameter(0, c_rarg0); 912 __ load_parameter(1, c_rarg1); 913 if (UseCompressedOops) { 914 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1); 915 } else { 916 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1); 917 } 918 #else 919 __ load_parameter(0, rax); 920 __ load_parameter(1, rbx); 921 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx); 922 #endif 923 924 __ restore_live_registers_except_rax(true); 925 926 __ epilogue(); 927 } 928 929 #undef __ 930 931 #endif // COMPILER1 932 933 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 934 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 935 return _shenandoah_lrb; 936 } 937 938 #define __ cgen->assembler()-> 939 940 /* 941 * Incoming parameters: 942 * rax: oop 943 * rsi: load address 944 */ 945 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 946 __ align(CodeEntryAlignment); 947 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 948 address start = __ pc(); 949 950 Label resolve_oop, slow_path; 951 952 // We use RDI, which also serves as argument register for slow call. 953 // RAX always holds the src object ptr, except after the slow call, 954 // then it holds the result. R8/RBX is used as temporary register. 955 956 Register tmp1 = rdi; 957 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 958 959 __ push(tmp1); 960 __ push(tmp2); 961 962 // Check for object being in the collection set. 963 __ mov(tmp1, rax); 964 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 965 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 966 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 967 __ testbool(tmp2); 968 __ jccb(Assembler::notZero, resolve_oop); 969 __ pop(tmp2); 970 __ pop(tmp1); 971 __ ret(0); 972 973 // Test if object is already resolved. 974 __ bind(resolve_oop); 975 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 976 // Test if both lowest bits are set. We trick it by negating the bits 977 // then test for both bits clear. 978 __ notptr(tmp2); 979 __ testb(tmp2, markWord::marked_value); 980 __ jccb(Assembler::notZero, slow_path); 981 // Clear both lower bits. It's still inverted, so set them, and then invert back. 982 __ orptr(tmp2, markWord::marked_value); 983 __ notptr(tmp2); 984 // At this point, tmp2 contains the decoded forwarding pointer. 985 __ mov(rax, tmp2); 986 987 __ pop(tmp2); 988 __ pop(tmp1); 989 __ ret(0); 990 991 __ bind(slow_path); 992 993 __ push(rcx); 994 __ push(rdx); 995 __ push(rdi); 996 #ifdef _LP64 997 __ push(r8); 998 __ push(r9); 999 __ push(r10); 1000 __ push(r11); 1001 __ push(r12); 1002 __ push(r13); 1003 __ push(r14); 1004 __ push(r15); 1005 #endif 1006 __ push(rbp); 1007 __ movptr(rbp, rsp); 1008 __ andptr(rsp, -StackAlignmentInBytes); 1009 __ push_FPU_state(); 1010 if (UseCompressedOops) { 1011 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi); 1012 } else { 1013 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi); 1014 } 1015 __ pop_FPU_state(); 1016 __ movptr(rsp, rbp); 1017 __ pop(rbp); 1018 #ifdef _LP64 1019 __ pop(r15); 1020 __ pop(r14); 1021 __ pop(r13); 1022 __ pop(r12); 1023 __ pop(r11); 1024 __ pop(r10); 1025 __ pop(r9); 1026 __ pop(r8); 1027 #endif 1028 __ pop(rdi); 1029 __ pop(rdx); 1030 __ pop(rcx); 1031 1032 __ pop(tmp2); 1033 __ pop(tmp1); 1034 __ ret(0); 1035 1036 return start; 1037 } 1038 1039 #undef __ 1040 1041 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 1042 if (ShenandoahLoadRefBarrier) { 1043 int stub_code_size = 4096; 1044 ResourceMark rm; 1045 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 1046 CodeBuffer buf(bb); 1047 StubCodeGenerator cgen(&buf); 1048 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 1049 } 1050 }