1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 26 #include "gc/shenandoah/shenandoahForwarding.hpp" 27 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 28 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 29 #include "gc/shenandoah/shenandoahHeuristics.hpp" 30 #include "gc/shenandoah/shenandoahRuntime.hpp" 31 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 32 #include "interpreter/interpreter.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/thread.hpp" 36 #include "utilities/macros.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 41 #endif 42 43 #define __ masm-> 44 45 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 46 47 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 48 Register src, Register dst, Register count) { 49 50 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 51 52 if (is_reference_type(type)) { 53 54 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) { 55 #ifdef _LP64 56 Register thread = r15_thread; 57 #else 58 Register thread = rax; 59 if (thread == src || thread == dst || thread == count) { 60 thread = rbx; 61 } 62 if (thread == src || thread == dst || thread == count) { 63 thread = rcx; 64 } 65 if (thread == src || thread == dst || thread == count) { 66 thread = rdx; 67 } 68 __ push(thread); 69 __ get_thread(thread); 70 #endif 71 assert_different_registers(src, dst, count, thread); 72 73 Label done; 74 // Short-circuit if count == 0. 75 __ testptr(count, count); 76 __ jcc(Assembler::zero, done); 77 78 // Avoid runtime call when not marking. 79 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 80 int flags = ShenandoahHeap::HAS_FORWARDED; 81 if (!dest_uninitialized) { 82 flags |= ShenandoahHeap::MARKING; 83 } 84 __ testb(gc_state, flags); 85 __ jcc(Assembler::zero, done); 86 87 __ pusha(); // push registers 88 #ifdef _LP64 89 assert(src == rdi, "expected"); 90 assert(dst == rsi, "expected"); 91 assert(count == rdx, "expected"); 92 if (UseCompressedOops) { 93 if (dest_uninitialized) { 94 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count); 95 } else { 96 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count); 97 } 98 } else 99 #endif 100 { 101 if (dest_uninitialized) { 102 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count); 103 } else { 104 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count); 105 } 106 } 107 __ popa(); 108 __ bind(done); 109 NOT_LP64(__ pop(thread);) 110 } 111 } 112 113 } 114 115 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 116 Register obj, 117 Register pre_val, 118 Register thread, 119 Register tmp, 120 bool tosca_live, 121 bool expand_call) { 122 123 if (ShenandoahSATBBarrier) { 124 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 125 } 126 } 127 128 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 129 Register obj, 130 Register pre_val, 131 Register thread, 132 Register tmp, 133 bool tosca_live, 134 bool expand_call) { 135 // If expand_call is true then we expand the call_VM_leaf macro 136 // directly to skip generating the check by 137 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 138 139 #ifdef _LP64 140 assert(thread == r15_thread, "must be"); 141 #endif // _LP64 142 143 Label done; 144 Label runtime; 145 146 assert(pre_val != noreg, "check this code"); 147 148 if (obj != noreg) { 149 assert_different_registers(obj, pre_val, tmp); 150 assert(pre_val != rax, "check this code"); 151 } 152 153 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 154 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 155 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 156 157 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 158 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 159 __ jcc(Assembler::zero, done); 160 161 // Do we need to load the previous value? 162 if (obj != noreg) { 163 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 164 } 165 166 // Is the previous value null? 167 __ cmpptr(pre_val, (int32_t) NULL_WORD); 168 __ jcc(Assembler::equal, done); 169 170 // Can we store original value in the thread's buffer? 171 // Is index == 0? 172 // (The index field is typed as size_t.) 173 174 __ movptr(tmp, index); // tmp := *index_adr 175 __ cmpptr(tmp, 0); // tmp == 0? 176 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 177 178 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 179 __ movptr(index, tmp); // *index_adr := tmp 180 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 181 182 // Record the previous value 183 __ movptr(Address(tmp, 0), pre_val); 184 __ jmp(done); 185 186 __ bind(runtime); 187 // save the live input values 188 if(tosca_live) __ push(rax); 189 190 if (obj != noreg && obj != rax) 191 __ push(obj); 192 193 if (pre_val != rax) 194 __ push(pre_val); 195 196 // Calling the runtime using the regular call_VM_leaf mechanism generates 197 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 198 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 199 // 200 // If we care generating the pre-barrier without a frame (e.g. in the 201 // intrinsified Reference.get() routine) then ebp might be pointing to 202 // the caller frame and so this check will most likely fail at runtime. 203 // 204 // Expanding the call directly bypasses the generation of the check. 205 // So when we do not have have a full interpreter frame on the stack 206 // expand_call should be passed true. 207 208 NOT_LP64( __ push(thread); ) 209 210 #ifdef _LP64 211 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 212 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 213 // Note: this should not accidentally smash thread, because thread is always r15. 214 assert(thread != c_rarg0, "smashed arg"); 215 if (c_rarg0 != pre_val) { 216 __ mov(c_rarg0, pre_val); 217 } 218 #endif 219 220 if (expand_call) { 221 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 222 #ifdef _LP64 223 if (c_rarg1 != thread) { 224 __ mov(c_rarg1, thread); 225 } 226 // Already moved pre_val into c_rarg0 above 227 #else 228 __ push(thread); 229 __ push(pre_val); 230 #endif 231 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 232 } else { 233 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 234 } 235 236 NOT_LP64( __ pop(thread); ) 237 238 // save the live input values 239 if (pre_val != rax) 240 __ pop(pre_val); 241 242 if (obj != noreg && obj != rax) 243 __ pop(obj); 244 245 if(tosca_live) __ pop(rax); 246 247 __ bind(done); 248 } 249 250 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) { 251 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 252 253 Label done; 254 255 #ifdef _LP64 256 Register thread = r15_thread; 257 #else 258 Register thread = rcx; 259 if (thread == dst) { 260 thread = rbx; 261 } 262 __ push(thread); 263 __ get_thread(thread); 264 #endif 265 266 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 267 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 268 __ jccb(Assembler::zero, done); 269 270 // Use rsi for src address 271 const Register src_addr = rsi; 272 // Setup address parameter first, if it does not clobber oop in dst 273 bool need_addr_setup = (src_addr != dst); 274 275 if (need_addr_setup) { 276 __ push(src_addr); 277 __ lea(src_addr, src); 278 279 if (dst != rax) { 280 // Move obj into rax and save rax 281 __ push(rax); 282 __ movptr(rax, dst); 283 } 284 } else { 285 // dst == rsi 286 __ push(rax); 287 __ movptr(rax, dst); 288 289 // we can clobber it, since it is outgoing register 290 __ lea(src_addr, src); 291 } 292 293 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 294 295 if (need_addr_setup) { 296 if (dst != rax) { 297 __ movptr(dst, rax); 298 __ pop(rax); 299 } 300 __ pop(src_addr); 301 } else { 302 __ movptr(dst, rax); 303 __ pop(rax); 304 } 305 306 __ bind(done); 307 308 #ifndef _LP64 309 __ pop(thread); 310 #endif 311 } 312 313 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src) { 314 if (!ShenandoahLoadRefBarrier) { 315 return; 316 } 317 318 Label done; 319 Label not_null; 320 Label slow_path; 321 __ block_comment("load_reference_barrier_native { "); 322 323 // null check 324 __ testptr(dst, dst); 325 __ jcc(Assembler::notZero, not_null); 326 __ jmp(done); 327 __ bind(not_null); 328 329 330 #ifdef _LP64 331 Register thread = r15_thread; 332 #else 333 Register thread = rcx; 334 if (thread == dst) { 335 thread = rbx; 336 } 337 __ push(thread); 338 __ get_thread(thread); 339 #endif 340 assert_different_registers(dst, thread); 341 342 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 343 __ testb(gc_state, ShenandoahHeap::EVACUATION); 344 #ifndef _LP64 345 __ pop(thread); 346 #endif 347 __ jccb(Assembler::notZero, slow_path); 348 __ jmp(done); 349 __ bind(slow_path); 350 351 if (dst != rax) { 352 __ push(rax); 353 } 354 __ push(rcx); 355 __ push(rdx); 356 __ push(rdi); 357 __ push(rsi); 358 #ifdef _LP64 359 __ push(r8); 360 __ push(r9); 361 __ push(r10); 362 __ push(r11); 363 __ push(r12); 364 __ push(r13); 365 __ push(r14); 366 __ push(r15); 367 #endif 368 369 assert_different_registers(dst, rsi); 370 __ lea(rsi, src); 371 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), dst, rsi); 372 373 #ifdef _LP64 374 __ pop(r15); 375 __ pop(r14); 376 __ pop(r13); 377 __ pop(r12); 378 __ pop(r11); 379 __ pop(r10); 380 __ pop(r9); 381 __ pop(r8); 382 #endif 383 __ pop(rsi); 384 __ pop(rdi); 385 __ pop(rdx); 386 __ pop(rcx); 387 388 if (dst != rax) { 389 __ movptr(dst, rax); 390 __ pop(rax); 391 } 392 393 __ bind(done); 394 __ block_comment("load_reference_barrier_native { "); 395 } 396 397 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 398 if (ShenandoahStoreValEnqueueBarrier) { 399 storeval_barrier_impl(masm, dst, tmp); 400 } 401 } 402 403 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 404 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 405 406 if (dst == noreg) return; 407 408 if (ShenandoahStoreValEnqueueBarrier) { 409 // The set of registers to be saved+restored is the same as in the write-barrier above. 410 // Those are the commonly used registers in the interpreter. 411 __ pusha(); 412 // __ push_callee_saved_registers(); 413 __ subptr(rsp, 2 * Interpreter::stackElementSize); 414 __ movdbl(Address(rsp, 0), xmm0); 415 416 #ifdef _LP64 417 Register thread = r15_thread; 418 #else 419 Register thread = rcx; 420 if (thread == dst || thread == tmp) { 421 thread = rdi; 422 } 423 if (thread == dst || thread == tmp) { 424 thread = rbx; 425 } 426 __ get_thread(thread); 427 #endif 428 assert_different_registers(dst, tmp, thread); 429 430 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 431 __ movdbl(xmm0, Address(rsp, 0)); 432 __ addptr(rsp, 2 * Interpreter::stackElementSize); 433 //__ pop_callee_saved_registers(); 434 __ popa(); 435 } 436 } 437 438 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) { 439 if (ShenandoahLoadRefBarrier) { 440 Label done; 441 __ testptr(dst, dst); 442 __ jcc(Assembler::zero, done); 443 load_reference_barrier_not_null(masm, dst, src); 444 __ bind(done); 445 } 446 } 447 448 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 449 Register dst, Address src, Register tmp1, Register tmp_thread) { 450 bool on_oop = is_reference_type(type); 451 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 452 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 453 bool not_in_heap = (decorators & IN_NATIVE) != 0; 454 bool on_reference = on_weak || on_phantom; 455 bool is_traversal_mode = ShenandoahHeap::heap()->is_traversal_mode(); 456 bool keep_alive = ((decorators & AS_NO_KEEPALIVE) == 0) || is_traversal_mode; 457 458 Register result_dst = dst; 459 bool use_tmp1_for_dst = false; 460 461 if (on_oop) { 462 // We want to preserve src 463 if (dst == src.base() || dst == src.index()) { 464 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() 465 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) { 466 dst = tmp1; 467 use_tmp1_for_dst = true; 468 } else { 469 dst = rdi; 470 __ push(dst); 471 } 472 } 473 assert_different_registers(dst, src.base(), src.index()); 474 } 475 476 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 477 478 if (on_oop) { 479 if (not_in_heap && !is_traversal_mode) { 480 load_reference_barrier_native(masm, dst, src); 481 } else { 482 load_reference_barrier(masm, dst, src); 483 } 484 485 if (dst != result_dst) { 486 __ movptr(result_dst, dst); 487 488 if (!use_tmp1_for_dst) { 489 __ pop(dst); 490 } 491 492 dst = result_dst; 493 } 494 495 if (ShenandoahKeepAliveBarrier && on_reference && keep_alive) { 496 const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 497 assert_different_registers(dst, tmp1, tmp_thread); 498 NOT_LP64(__ get_thread(thread)); 499 // Generate the SATB pre-barrier code to log the value of 500 // the referent field in an SATB buffer. 501 shenandoah_write_barrier_pre(masm /* masm */, 502 noreg /* obj */, 503 dst /* pre_val */, 504 thread /* thread */, 505 tmp1 /* tmp */, 506 true /* tosca_live */, 507 true /* expand_call */); 508 } 509 } 510 } 511 512 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 513 Address dst, Register val, Register tmp1, Register tmp2) { 514 515 bool on_oop = is_reference_type(type); 516 bool in_heap = (decorators & IN_HEAP) != 0; 517 bool as_normal = (decorators & AS_NORMAL) != 0; 518 if (on_oop && in_heap) { 519 bool needs_pre_barrier = as_normal; 520 521 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 522 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 523 // flatten object address if needed 524 // We do it regardless of precise because we need the registers 525 if (dst.index() == noreg && dst.disp() == 0) { 526 if (dst.base() != tmp1) { 527 __ movptr(tmp1, dst.base()); 528 } 529 } else { 530 __ lea(tmp1, dst); 531 } 532 533 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 534 535 #ifndef _LP64 536 __ get_thread(rthread); 537 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 538 imasm->save_bcp(); 539 #endif 540 541 if (needs_pre_barrier) { 542 shenandoah_write_barrier_pre(masm /*masm*/, 543 tmp1 /* obj */, 544 tmp2 /* pre_val */, 545 rthread /* thread */, 546 tmp3 /* tmp */, 547 val != noreg /* tosca_live */, 548 false /* expand_call */); 549 } 550 if (val == noreg) { 551 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 552 } else { 553 storeval_barrier(masm, val, tmp3); 554 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 555 } 556 NOT_LP64(imasm->restore_bcp()); 557 } else { 558 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 559 } 560 } 561 562 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 563 Register obj, Register tmp, Label& slowpath) { 564 Label done; 565 // Resolve jobject 566 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 567 568 // Check for null. 569 __ testptr(obj, obj); 570 __ jcc(Assembler::zero, done); 571 572 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 573 __ testb(gc_state, ShenandoahHeap::EVACUATION); 574 __ jccb(Assembler::notZero, slowpath); 575 __ bind(done); 576 } 577 578 // Special Shenandoah CAS implementation that handles false negatives 579 // due to concurrent evacuation. 580 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 581 Register res, Address addr, Register oldval, Register newval, 582 bool exchange, Register tmp1, Register tmp2) { 583 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 584 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 585 assert_different_registers(oldval, newval, tmp1, tmp2); 586 587 Label L_success, L_failure; 588 589 // Remember oldval for retry logic below 590 #ifdef _LP64 591 if (UseCompressedOops) { 592 __ movl(tmp1, oldval); 593 } else 594 #endif 595 { 596 __ movptr(tmp1, oldval); 597 } 598 599 // Step 1. Fast-path. 600 // 601 // Try to CAS with given arguments. If successful, then we are done. 602 603 if (os::is_MP()) __ lock(); 604 #ifdef _LP64 605 if (UseCompressedOops) { 606 __ cmpxchgl(newval, addr); 607 } else 608 #endif 609 { 610 __ cmpxchgptr(newval, addr); 611 } 612 __ jcc(Assembler::equal, L_success); 613 614 // Step 2. CAS had failed. This may be a false negative. 615 // 616 // The trouble comes when we compare the to-space pointer with the from-space 617 // pointer to the same object. To resolve this, it will suffice to resolve 618 // the value from memory -- this will give both to-space pointers. 619 // If they mismatch, then it was a legitimate failure. 620 // 621 // Before reaching to resolve sequence, see if we can avoid the whole shebang 622 // with filters. 623 624 // Filter: when offending in-memory value is NULL, the failure is definitely legitimate 625 __ testptr(oldval, oldval); 626 __ jcc(Assembler::zero, L_failure); 627 628 // Filter: when heap is stable, the failure is definitely legitimate 629 #ifdef _LP64 630 const Register thread = r15_thread; 631 #else 632 const Register thread = tmp2; 633 __ get_thread(thread); 634 #endif 635 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 636 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 637 __ jcc(Assembler::zero, L_failure); 638 639 #ifdef _LP64 640 if (UseCompressedOops) { 641 __ movl(tmp2, oldval); 642 __ decode_heap_oop(tmp2); 643 } else 644 #endif 645 { 646 __ movptr(tmp2, oldval); 647 } 648 649 // Decode offending in-memory value. 650 // Test if-forwarded 651 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value); 652 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded 653 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded 654 655 // Load and mask forwarding pointer 656 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); 657 __ shrptr(tmp2, 2); 658 __ shlptr(tmp2, 2); 659 660 #ifdef _LP64 661 if (UseCompressedOops) { 662 __ decode_heap_oop(tmp1); // decode for comparison 663 } 664 #endif 665 666 // Now we have the forwarded offender in tmp2. 667 // Compare and if they don't match, we have legitimate failure 668 __ cmpptr(tmp1, tmp2); 669 __ jcc(Assembler::notEqual, L_failure); 670 671 // Step 3. Need to fix the memory ptr before continuing. 672 // 673 // At this point, we have from-space oldval in the register, and its to-space 674 // address is in tmp2. Let's try to update it into memory. We don't care if it 675 // succeeds or not. If it does, then the retrying CAS would see it and succeed. 676 // If this fixup fails, this means somebody else beat us to it, and necessarily 677 // with to-space ptr store. We still have to do the retry, because the GC might 678 // have updated the reference for us. 679 680 #ifdef _LP64 681 if (UseCompressedOops) { 682 __ encode_heap_oop(tmp2); // previously decoded at step 2. 683 } 684 #endif 685 686 if (os::is_MP()) __ lock(); 687 #ifdef _LP64 688 if (UseCompressedOops) { 689 __ cmpxchgl(tmp2, addr); 690 } else 691 #endif 692 { 693 __ cmpxchgptr(tmp2, addr); 694 } 695 696 // Step 4. Try to CAS again. 697 // 698 // This is guaranteed not to have false negatives, because oldval is definitely 699 // to-space, and memory pointer is to-space as well. Nothing is able to store 700 // from-space ptr into memory anymore. Make sure oldval is restored, after being 701 // garbled during retries. 702 // 703 #ifdef _LP64 704 if (UseCompressedOops) { 705 __ movl(oldval, tmp2); 706 } else 707 #endif 708 { 709 __ movptr(oldval, tmp2); 710 } 711 712 if (os::is_MP()) __ lock(); 713 #ifdef _LP64 714 if (UseCompressedOops) { 715 __ cmpxchgl(newval, addr); 716 } else 717 #endif 718 { 719 __ cmpxchgptr(newval, addr); 720 } 721 if (!exchange) { 722 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump 723 } 724 725 // Step 5. If we need a boolean result out of CAS, set the flag appropriately. 726 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. 727 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. 728 729 if (exchange) { 730 __ bind(L_failure); 731 __ bind(L_success); 732 } else { 733 assert(res != NULL, "need result register"); 734 735 Label exit; 736 __ bind(L_failure); 737 __ xorptr(res, res); 738 __ jmpb(exit); 739 740 __ bind(L_success); 741 __ movptr(res, 1); 742 __ bind(exit); 743 } 744 } 745 746 #undef __ 747 748 #ifdef COMPILER1 749 750 #define __ ce->masm()-> 751 752 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 753 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 754 // At this point we know that marking is in progress. 755 // If do_load() is true then we have to emit the 756 // load of the previous value; otherwise it has already 757 // been loaded into _pre_val. 758 759 __ bind(*stub->entry()); 760 assert(stub->pre_val()->is_register(), "Precondition."); 761 762 Register pre_val_reg = stub->pre_val()->as_register(); 763 764 if (stub->do_load()) { 765 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 766 } 767 768 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 769 __ jcc(Assembler::equal, *stub->continuation()); 770 ce->store_parameter(stub->pre_val()->as_register(), 0); 771 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 772 __ jmp(*stub->continuation()); 773 774 } 775 776 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 777 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 778 __ bind(*stub->entry()); 779 780 Register obj = stub->obj()->as_register(); 781 Register res = stub->result()->as_register(); 782 Register addr = stub->addr()->as_register(); 783 Register tmp1 = stub->tmp1()->as_register(); 784 Register tmp2 = stub->tmp2()->as_register(); 785 assert_different_registers(obj, res, addr, tmp1, tmp2); 786 787 Label slow_path; 788 789 assert(res == rax, "result must arrive in rax"); 790 791 if (res != obj) { 792 __ mov(res, obj); 793 } 794 795 // Check for null. 796 __ testptr(res, res); 797 __ jcc(Assembler::zero, *stub->continuation()); 798 799 // Check for object being in the collection set. 800 __ mov(tmp1, res); 801 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 802 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 803 #ifdef _LP64 804 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 805 __ testbool(tmp2); 806 #else 807 // On x86_32, C1 register allocator can give us the register without 8-bit support. 808 // Do the full-register access and test to avoid compilation failures. 809 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 810 __ testptr(tmp2, 0xFF); 811 #endif 812 __ jcc(Assembler::zero, *stub->continuation()); 813 814 __ bind(slow_path); 815 ce->store_parameter(res, 0); 816 ce->store_parameter(addr, 1); 817 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 818 819 __ jmp(*stub->continuation()); 820 } 821 822 #undef __ 823 824 #define __ sasm-> 825 826 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 827 __ prologue("shenandoah_pre_barrier", false); 828 // arg0 : previous value of memory 829 830 __ push(rax); 831 __ push(rdx); 832 833 const Register pre_val = rax; 834 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 835 const Register tmp = rdx; 836 837 NOT_LP64(__ get_thread(thread);) 838 839 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 840 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 841 842 Label done; 843 Label runtime; 844 845 // Is SATB still active? 846 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 847 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 848 __ jcc(Assembler::zero, done); 849 850 // Can we store original value in the thread's buffer? 851 852 __ movptr(tmp, queue_index); 853 __ testptr(tmp, tmp); 854 __ jcc(Assembler::zero, runtime); 855 __ subptr(tmp, wordSize); 856 __ movptr(queue_index, tmp); 857 __ addptr(tmp, buffer); 858 859 // prev_val (rax) 860 __ load_parameter(0, pre_val); 861 __ movptr(Address(tmp, 0), pre_val); 862 __ jmp(done); 863 864 __ bind(runtime); 865 866 __ save_live_registers_no_oop_map(true); 867 868 // load the pre-value 869 __ load_parameter(0, rcx); 870 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 871 872 __ restore_live_registers(true); 873 874 __ bind(done); 875 876 __ pop(rdx); 877 __ pop(rax); 878 879 __ epilogue(); 880 } 881 882 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { 883 __ prologue("shenandoah_load_reference_barrier", false); 884 // arg0 : object to be resolved 885 886 __ save_live_registers_no_oop_map(true); 887 888 #ifdef _LP64 889 __ load_parameter(0, c_rarg0); 890 __ load_parameter(1, c_rarg1); 891 if (UseCompressedOops) { 892 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1); 893 } else { 894 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1); 895 } 896 #else 897 __ load_parameter(0, rax); 898 __ load_parameter(1, rbx); 899 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx); 900 #endif 901 902 __ restore_live_registers_except_rax(true); 903 904 __ epilogue(); 905 } 906 907 #undef __ 908 909 #endif // COMPILER1 910 911 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 912 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 913 return _shenandoah_lrb; 914 } 915 916 #define __ cgen->assembler()-> 917 918 /* 919 * Incoming parameters: 920 * rax: oop 921 * rsi: load address 922 */ 923 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 924 __ align(CodeEntryAlignment); 925 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 926 address start = __ pc(); 927 928 Label resolve_oop, slow_path; 929 930 // We use RDI, which also serves as argument register for slow call. 931 // RAX always holds the src object ptr, except after the slow call, 932 // then it holds the result. R8/RBX is used as temporary register. 933 934 Register tmp1 = rdi; 935 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 936 937 __ push(tmp1); 938 __ push(tmp2); 939 940 // Check for object being in the collection set. 941 __ mov(tmp1, rax); 942 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 943 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 944 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 945 __ testbool(tmp2); 946 __ jccb(Assembler::notZero, resolve_oop); 947 __ pop(tmp2); 948 __ pop(tmp1); 949 __ ret(0); 950 951 // Test if object is already resolved. 952 __ bind(resolve_oop); 953 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 954 // Test if both lowest bits are set. We trick it by negating the bits 955 // then test for both bits clear. 956 __ notptr(tmp2); 957 __ testb(tmp2, markWord::marked_value); 958 __ jccb(Assembler::notZero, slow_path); 959 // Clear both lower bits. It's still inverted, so set them, and then invert back. 960 __ orptr(tmp2, markWord::marked_value); 961 __ notptr(tmp2); 962 // At this point, tmp2 contains the decoded forwarding pointer. 963 __ mov(rax, tmp2); 964 965 __ pop(tmp2); 966 __ pop(tmp1); 967 __ ret(0); 968 969 __ bind(slow_path); 970 971 __ push(rcx); 972 __ push(rdx); 973 __ push(rdi); 974 #ifdef _LP64 975 __ push(r8); 976 __ push(r9); 977 __ push(r10); 978 __ push(r11); 979 __ push(r12); 980 __ push(r13); 981 __ push(r14); 982 __ push(r15); 983 #endif 984 __ push(rbp); 985 __ movptr(rbp, rsp); 986 __ andptr(rsp, -StackAlignmentInBytes); 987 __ push_FPU_state(); 988 if (UseCompressedOops) { 989 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi); 990 } else { 991 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi); 992 } 993 __ pop_FPU_state(); 994 __ movptr(rsp, rbp); 995 __ pop(rbp); 996 #ifdef _LP64 997 __ pop(r15); 998 __ pop(r14); 999 __ pop(r13); 1000 __ pop(r12); 1001 __ pop(r11); 1002 __ pop(r10); 1003 __ pop(r9); 1004 __ pop(r8); 1005 #endif 1006 __ pop(rdi); 1007 __ pop(rdx); 1008 __ pop(rcx); 1009 1010 __ pop(tmp2); 1011 __ pop(tmp1); 1012 __ ret(0); 1013 1014 return start; 1015 } 1016 1017 #undef __ 1018 1019 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 1020 if (ShenandoahLoadRefBarrier) { 1021 int stub_code_size = 4096; 1022 ResourceMark rm; 1023 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 1024 CodeBuffer buf(bb); 1025 StubCodeGenerator cgen(&buf); 1026 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 1027 } 1028 }