1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 26 #include "gc/shenandoah/shenandoahForwarding.hpp" 27 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 28 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 29 #include "gc/shenandoah/shenandoahHeuristics.hpp" 30 #include "gc/shenandoah/shenandoahRuntime.hpp" 31 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 32 #include "interpreter/interpreter.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/thread.hpp" 36 #include "utilities/macros.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 41 #endif 42 43 #define __ masm-> 44 45 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 46 47 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 48 Register src, Register dst, Register count) { 49 50 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 51 52 if (is_reference_type(type)) { 53 54 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) { 55 #ifdef _LP64 56 Register thread = r15_thread; 57 #else 58 Register thread = rax; 59 if (thread == src || thread == dst || thread == count) { 60 thread = rbx; 61 } 62 if (thread == src || thread == dst || thread == count) { 63 thread = rcx; 64 } 65 if (thread == src || thread == dst || thread == count) { 66 thread = rdx; 67 } 68 __ push(thread); 69 __ get_thread(thread); 70 #endif 71 assert_different_registers(src, dst, count, thread); 72 73 Label done; 74 // Short-circuit if count == 0. 75 __ testptr(count, count); 76 __ jcc(Assembler::zero, done); 77 78 // Avoid runtime call when not marking. 79 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 80 int flags = ShenandoahHeap::HAS_FORWARDED; 81 if (!dest_uninitialized) { 82 flags |= ShenandoahHeap::MARKING; 83 } 84 __ testb(gc_state, flags); 85 __ jcc(Assembler::zero, done); 86 87 __ pusha(); // push registers 88 #ifdef _LP64 89 assert(src == rdi, "expected"); 90 assert(dst == rsi, "expected"); 91 assert(count == rdx, "expected"); 92 if (UseCompressedOops) { 93 if (dest_uninitialized) { 94 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count); 95 } else { 96 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count); 97 } 98 } else 99 #endif 100 { 101 if (dest_uninitialized) { 102 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count); 103 } else { 104 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count); 105 } 106 } 107 __ popa(); 108 __ bind(done); 109 NOT_LP64(__ pop(thread);) 110 } 111 } 112 113 } 114 115 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 116 Register obj, 117 Register pre_val, 118 Register thread, 119 Register tmp, 120 bool tosca_live, 121 bool expand_call) { 122 123 if (ShenandoahSATBBarrier) { 124 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 125 } 126 } 127 128 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 129 Register obj, 130 Register pre_val, 131 Register thread, 132 Register tmp, 133 bool tosca_live, 134 bool expand_call) { 135 // If expand_call is true then we expand the call_VM_leaf macro 136 // directly to skip generating the check by 137 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 138 139 #ifdef _LP64 140 assert(thread == r15_thread, "must be"); 141 #endif // _LP64 142 143 Label done; 144 Label runtime; 145 146 assert(pre_val != noreg, "check this code"); 147 148 if (obj != noreg) { 149 assert_different_registers(obj, pre_val, tmp); 150 assert(pre_val != rax, "check this code"); 151 } 152 153 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 154 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 155 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 156 157 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 158 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 159 __ jcc(Assembler::zero, done); 160 161 // Do we need to load the previous value? 162 if (obj != noreg) { 163 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 164 } 165 166 // Is the previous value null? 167 __ cmpptr(pre_val, (int32_t) NULL_WORD); 168 __ jcc(Assembler::equal, done); 169 170 // Can we store original value in the thread's buffer? 171 // Is index == 0? 172 // (The index field is typed as size_t.) 173 174 __ movptr(tmp, index); // tmp := *index_adr 175 __ cmpptr(tmp, 0); // tmp == 0? 176 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 177 178 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 179 __ movptr(index, tmp); // *index_adr := tmp 180 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 181 182 // Record the previous value 183 __ movptr(Address(tmp, 0), pre_val); 184 __ jmp(done); 185 186 __ bind(runtime); 187 // save the live input values 188 if(tosca_live) __ push(rax); 189 190 if (obj != noreg && obj != rax) 191 __ push(obj); 192 193 if (pre_val != rax) 194 __ push(pre_val); 195 196 // Calling the runtime using the regular call_VM_leaf mechanism generates 197 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 198 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 199 // 200 // If we care generating the pre-barrier without a frame (e.g. in the 201 // intrinsified Reference.get() routine) then ebp might be pointing to 202 // the caller frame and so this check will most likely fail at runtime. 203 // 204 // Expanding the call directly bypasses the generation of the check. 205 // So when we do not have have a full interpreter frame on the stack 206 // expand_call should be passed true. 207 208 NOT_LP64( __ push(thread); ) 209 210 #ifdef _LP64 211 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 212 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 213 // Note: this should not accidentally smash thread, because thread is always r15. 214 assert(thread != c_rarg0, "smashed arg"); 215 if (c_rarg0 != pre_val) { 216 __ mov(c_rarg0, pre_val); 217 } 218 #endif 219 220 if (expand_call) { 221 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 222 #ifdef _LP64 223 if (c_rarg1 != thread) { 224 __ mov(c_rarg1, thread); 225 } 226 // Already moved pre_val into c_rarg0 above 227 #else 228 __ push(thread); 229 __ push(pre_val); 230 #endif 231 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 232 } else { 233 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 234 } 235 236 NOT_LP64( __ pop(thread); ) 237 238 // save the live input values 239 if (pre_val != rax) 240 __ pop(pre_val); 241 242 if (obj != noreg && obj != rax) 243 __ pop(obj); 244 245 if(tosca_live) __ pop(rax); 246 247 __ bind(done); 248 } 249 250 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) { 251 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 252 253 Label done; 254 255 #ifdef _LP64 256 Register thread = r15_thread; 257 #else 258 Register thread = rcx; 259 if (thread == dst) { 260 thread = rbx; 261 } 262 __ push(thread); 263 __ get_thread(thread); 264 #endif 265 266 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 267 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 268 __ jccb(Assembler::zero, done); 269 270 // Use rsi for src address 271 const Register src_addr = rsi; 272 // Setup address parameter first, if it does not clobber oop in dst 273 bool need_addr_setup = (src_addr != dst); 274 275 if (need_addr_setup) { 276 __ push(src_addr); 277 __ lea(src_addr, src); 278 279 if (dst != rax) { 280 // Move obj into rax and save rax 281 __ push(rax); 282 __ movptr(rax, dst); 283 } 284 } else { 285 // dst == rsi 286 __ push(rax); 287 __ movptr(rax, dst); 288 289 // we can clobber it, since it is outgoing register 290 __ lea(src_addr, src); 291 } 292 293 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 294 295 if (need_addr_setup) { 296 if (dst != rax) { 297 __ movptr(dst, rax); 298 __ pop(rax); 299 } 300 __ pop(src_addr); 301 } else { 302 __ movptr(dst, rax); 303 __ pop(rax); 304 } 305 306 __ bind(done); 307 308 #ifndef _LP64 309 __ pop(thread); 310 #endif 311 } 312 313 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src) { 314 if (!ShenandoahLoadRefBarrier) { 315 return; 316 } 317 318 Label done; 319 Label not_null; 320 Label slow_path; 321 __ block_comment("load_reference_barrier_native { "); 322 323 // null check 324 __ testptr(dst, dst); 325 __ jcc(Assembler::notZero, not_null); 326 __ jmp(done); 327 __ bind(not_null); 328 329 330 #ifdef _LP64 331 Register thread = r15_thread; 332 #else 333 Register thread = rcx; 334 if (thread == dst) { 335 thread = rbx; 336 } 337 __ push(thread); 338 __ get_thread(thread); 339 #endif 340 assert_different_registers(dst, thread); 341 342 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 343 __ testb(gc_state, ShenandoahHeap::EVACUATION); 344 #ifndef _LP64 345 __ pop(thread); 346 #endif 347 __ jccb(Assembler::notZero, slow_path); 348 __ jmp(done); 349 __ bind(slow_path); 350 351 if (dst != rax) { 352 __ push(rax); 353 } 354 __ push(rcx); 355 __ push(rdx); 356 __ push(rdi); 357 __ push(rsi); 358 #ifdef _LP64 359 __ push(r8); 360 __ push(r9); 361 __ push(r10); 362 __ push(r11); 363 __ push(r12); 364 __ push(r13); 365 __ push(r14); 366 __ push(r15); 367 #endif 368 369 assert_different_registers(dst, rsi); 370 __ lea(rsi, src); 371 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), dst, rsi); 372 373 #ifdef _LP64 374 __ pop(r15); 375 __ pop(r14); 376 __ pop(r13); 377 __ pop(r12); 378 __ pop(r11); 379 __ pop(r10); 380 __ pop(r9); 381 __ pop(r8); 382 #endif 383 __ pop(rsi); 384 __ pop(rdi); 385 __ pop(rdx); 386 __ pop(rcx); 387 388 if (dst != rax) { 389 __ movptr(dst, rax); 390 __ pop(rax); 391 } 392 393 __ bind(done); 394 __ block_comment("load_reference_barrier_native { "); 395 } 396 397 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 398 if (ShenandoahStoreValEnqueueBarrier) { 399 storeval_barrier_impl(masm, dst, tmp); 400 } 401 } 402 403 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 404 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 405 406 if (dst == noreg) return; 407 408 if (ShenandoahStoreValEnqueueBarrier) { 409 // The set of registers to be saved+restored is the same as in the write-barrier above. 410 // Those are the commonly used registers in the interpreter. 411 __ pusha(); 412 // __ push_callee_saved_registers(); 413 __ subptr(rsp, 2 * Interpreter::stackElementSize); 414 __ movdbl(Address(rsp, 0), xmm0); 415 416 #ifdef _LP64 417 Register thread = r15_thread; 418 #else 419 Register thread = rcx; 420 if (thread == dst || thread == tmp) { 421 thread = rdi; 422 } 423 if (thread == dst || thread == tmp) { 424 thread = rbx; 425 } 426 __ get_thread(thread); 427 #endif 428 assert_different_registers(dst, tmp, thread); 429 430 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 431 __ movdbl(xmm0, Address(rsp, 0)); 432 __ addptr(rsp, 2 * Interpreter::stackElementSize); 433 //__ pop_callee_saved_registers(); 434 __ popa(); 435 } 436 } 437 438 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) { 439 if (ShenandoahLoadRefBarrier) { 440 Label done; 441 __ testptr(dst, dst); 442 __ jcc(Assembler::zero, done); 443 load_reference_barrier_not_null(masm, dst, src); 444 __ bind(done); 445 } 446 } 447 448 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 449 Register dst, Address src, Register tmp1, Register tmp_thread) { 450 bool on_oop = is_reference_type(type); 451 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 452 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 453 bool not_in_heap = (decorators & IN_NATIVE) != 0; 454 bool on_reference = on_weak || on_phantom; 455 bool is_traversal_mode = ShenandoahHeap::heap()->is_traversal_mode(); 456 bool keep_alive = ((decorators & AS_NO_KEEPALIVE) == 0) || is_traversal_mode; 457 458 Register result_dst = dst; 459 bool use_tmp1_for_dst = false; 460 461 if (on_oop) { 462 // We want to preserve src 463 if (dst == src.base() || dst == src.index()) { 464 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() 465 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) { 466 dst = tmp1; 467 use_tmp1_for_dst = true; 468 } else { 469 dst = rdi; 470 __ push(dst); 471 } 472 } 473 assert_different_registers(dst, src.base(), src.index()); 474 } 475 476 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 477 478 if (on_oop) { 479 if (not_in_heap && !is_traversal_mode) { 480 load_reference_barrier_native(masm, dst, src); 481 } else { 482 load_reference_barrier(masm, dst, src); 483 } 484 485 if (dst != result_dst) { 486 __ movptr(result_dst, dst); 487 488 if (!use_tmp1_for_dst) { 489 __ pop(dst); 490 } 491 492 dst = result_dst; 493 } 494 495 if (ShenandoahKeepAliveBarrier && on_reference && keep_alive) { 496 __ push_IU_state(); 497 const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 498 assert_different_registers(dst, tmp1, tmp_thread); 499 NOT_LP64(__ get_thread(thread)); 500 // Generate the SATB pre-barrier code to log the value of 501 // the referent field in an SATB buffer. 502 shenandoah_write_barrier_pre(masm /* masm */, 503 noreg /* obj */, 504 dst /* pre_val */, 505 thread /* thread */, 506 tmp1 /* tmp */, 507 true /* tosca_live */, 508 true /* expand_call */); 509 __ pop_IU_state(); 510 } 511 } 512 } 513 514 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 515 Address dst, Register val, Register tmp1, Register tmp2) { 516 517 bool on_oop = is_reference_type(type); 518 bool in_heap = (decorators & IN_HEAP) != 0; 519 bool as_normal = (decorators & AS_NORMAL) != 0; 520 if (on_oop && in_heap) { 521 bool needs_pre_barrier = as_normal; 522 523 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 524 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 525 // flatten object address if needed 526 // We do it regardless of precise because we need the registers 527 if (dst.index() == noreg && dst.disp() == 0) { 528 if (dst.base() != tmp1) { 529 __ movptr(tmp1, dst.base()); 530 } 531 } else { 532 __ lea(tmp1, dst); 533 } 534 535 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 536 537 #ifndef _LP64 538 __ get_thread(rthread); 539 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 540 imasm->save_bcp(); 541 #endif 542 543 if (needs_pre_barrier) { 544 shenandoah_write_barrier_pre(masm /*masm*/, 545 tmp1 /* obj */, 546 tmp2 /* pre_val */, 547 rthread /* thread */, 548 tmp3 /* tmp */, 549 val != noreg /* tosca_live */, 550 false /* expand_call */); 551 } 552 if (val == noreg) { 553 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 554 } else { 555 storeval_barrier(masm, val, tmp3); 556 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 557 } 558 NOT_LP64(imasm->restore_bcp()); 559 } else { 560 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 561 } 562 } 563 564 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 565 Register obj, Register tmp, Label& slowpath) { 566 Label done; 567 // Resolve jobject 568 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 569 570 // Check for null. 571 __ testptr(obj, obj); 572 __ jcc(Assembler::zero, done); 573 574 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 575 __ testb(gc_state, ShenandoahHeap::EVACUATION); 576 __ jccb(Assembler::notZero, slowpath); 577 __ bind(done); 578 } 579 580 // Special Shenandoah CAS implementation that handles false negatives 581 // due to concurrent evacuation. 582 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 583 Register res, Address addr, Register oldval, Register newval, 584 bool exchange, Register tmp1, Register tmp2) { 585 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 586 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 587 assert_different_registers(oldval, newval, tmp1, tmp2); 588 589 Label L_success, L_failure; 590 591 // Remember oldval for retry logic below 592 #ifdef _LP64 593 if (UseCompressedOops) { 594 __ movl(tmp1, oldval); 595 } else 596 #endif 597 { 598 __ movptr(tmp1, oldval); 599 } 600 601 // Step 1. Fast-path. 602 // 603 // Try to CAS with given arguments. If successful, then we are done. 604 605 if (os::is_MP()) __ lock(); 606 #ifdef _LP64 607 if (UseCompressedOops) { 608 __ cmpxchgl(newval, addr); 609 } else 610 #endif 611 { 612 __ cmpxchgptr(newval, addr); 613 } 614 __ jcc(Assembler::equal, L_success); 615 616 // Step 2. CAS had failed. This may be a false negative. 617 // 618 // The trouble comes when we compare the to-space pointer with the from-space 619 // pointer to the same object. To resolve this, it will suffice to resolve 620 // the value from memory -- this will give both to-space pointers. 621 // If they mismatch, then it was a legitimate failure. 622 // 623 // Before reaching to resolve sequence, see if we can avoid the whole shebang 624 // with filters. 625 626 // Filter: when offending in-memory value is NULL, the failure is definitely legitimate 627 __ testptr(oldval, oldval); 628 __ jcc(Assembler::zero, L_failure); 629 630 // Filter: when heap is stable, the failure is definitely legitimate 631 #ifdef _LP64 632 const Register thread = r15_thread; 633 #else 634 const Register thread = tmp2; 635 __ get_thread(thread); 636 #endif 637 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 638 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 639 __ jcc(Assembler::zero, L_failure); 640 641 #ifdef _LP64 642 if (UseCompressedOops) { 643 __ movl(tmp2, oldval); 644 __ decode_heap_oop(tmp2); 645 } else 646 #endif 647 { 648 __ movptr(tmp2, oldval); 649 } 650 651 // Decode offending in-memory value. 652 // Test if-forwarded 653 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value); 654 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded 655 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded 656 657 // Load and mask forwarding pointer 658 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); 659 __ shrptr(tmp2, 2); 660 __ shlptr(tmp2, 2); 661 662 #ifdef _LP64 663 if (UseCompressedOops) { 664 __ decode_heap_oop(tmp1); // decode for comparison 665 } 666 #endif 667 668 // Now we have the forwarded offender in tmp2. 669 // Compare and if they don't match, we have legitimate failure 670 __ cmpptr(tmp1, tmp2); 671 __ jcc(Assembler::notEqual, L_failure); 672 673 // Step 3. Need to fix the memory ptr before continuing. 674 // 675 // At this point, we have from-space oldval in the register, and its to-space 676 // address is in tmp2. Let's try to update it into memory. We don't care if it 677 // succeeds or not. If it does, then the retrying CAS would see it and succeed. 678 // If this fixup fails, this means somebody else beat us to it, and necessarily 679 // with to-space ptr store. We still have to do the retry, because the GC might 680 // have updated the reference for us. 681 682 #ifdef _LP64 683 if (UseCompressedOops) { 684 __ encode_heap_oop(tmp2); // previously decoded at step 2. 685 } 686 #endif 687 688 if (os::is_MP()) __ lock(); 689 #ifdef _LP64 690 if (UseCompressedOops) { 691 __ cmpxchgl(tmp2, addr); 692 } else 693 #endif 694 { 695 __ cmpxchgptr(tmp2, addr); 696 } 697 698 // Step 4. Try to CAS again. 699 // 700 // This is guaranteed not to have false negatives, because oldval is definitely 701 // to-space, and memory pointer is to-space as well. Nothing is able to store 702 // from-space ptr into memory anymore. Make sure oldval is restored, after being 703 // garbled during retries. 704 // 705 #ifdef _LP64 706 if (UseCompressedOops) { 707 __ movl(oldval, tmp2); 708 } else 709 #endif 710 { 711 __ movptr(oldval, tmp2); 712 } 713 714 if (os::is_MP()) __ lock(); 715 #ifdef _LP64 716 if (UseCompressedOops) { 717 __ cmpxchgl(newval, addr); 718 } else 719 #endif 720 { 721 __ cmpxchgptr(newval, addr); 722 } 723 if (!exchange) { 724 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump 725 } 726 727 // Step 5. If we need a boolean result out of CAS, set the flag appropriately. 728 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. 729 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. 730 731 if (exchange) { 732 __ bind(L_failure); 733 __ bind(L_success); 734 } else { 735 assert(res != NULL, "need result register"); 736 737 Label exit; 738 __ bind(L_failure); 739 __ xorptr(res, res); 740 __ jmpb(exit); 741 742 __ bind(L_success); 743 __ movptr(res, 1); 744 __ bind(exit); 745 } 746 } 747 748 #undef __ 749 750 #ifdef COMPILER1 751 752 #define __ ce->masm()-> 753 754 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 755 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 756 // At this point we know that marking is in progress. 757 // If do_load() is true then we have to emit the 758 // load of the previous value; otherwise it has already 759 // been loaded into _pre_val. 760 761 __ bind(*stub->entry()); 762 assert(stub->pre_val()->is_register(), "Precondition."); 763 764 Register pre_val_reg = stub->pre_val()->as_register(); 765 766 if (stub->do_load()) { 767 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 768 } 769 770 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 771 __ jcc(Assembler::equal, *stub->continuation()); 772 ce->store_parameter(stub->pre_val()->as_register(), 0); 773 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 774 __ jmp(*stub->continuation()); 775 776 } 777 778 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 779 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 780 __ bind(*stub->entry()); 781 782 Register obj = stub->obj()->as_register(); 783 Register res = stub->result()->as_register(); 784 Register addr = stub->addr()->as_register(); 785 Register tmp1 = stub->tmp1()->as_register(); 786 Register tmp2 = stub->tmp2()->as_register(); 787 assert_different_registers(obj, res, addr, tmp1, tmp2); 788 789 Label slow_path; 790 791 assert(res == rax, "result must arrive in rax"); 792 793 if (res != obj) { 794 __ mov(res, obj); 795 } 796 797 // Check for null. 798 __ testptr(res, res); 799 __ jcc(Assembler::zero, *stub->continuation()); 800 801 // Check for object being in the collection set. 802 __ mov(tmp1, res); 803 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 804 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 805 #ifdef _LP64 806 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 807 __ testbool(tmp2); 808 #else 809 // On x86_32, C1 register allocator can give us the register without 8-bit support. 810 // Do the full-register access and test to avoid compilation failures. 811 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 812 __ testptr(tmp2, 0xFF); 813 #endif 814 __ jcc(Assembler::zero, *stub->continuation()); 815 816 __ bind(slow_path); 817 ce->store_parameter(res, 0); 818 ce->store_parameter(addr, 1); 819 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 820 821 __ jmp(*stub->continuation()); 822 } 823 824 #undef __ 825 826 #define __ sasm-> 827 828 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 829 __ prologue("shenandoah_pre_barrier", false); 830 // arg0 : previous value of memory 831 832 __ push(rax); 833 __ push(rdx); 834 835 const Register pre_val = rax; 836 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 837 const Register tmp = rdx; 838 839 NOT_LP64(__ get_thread(thread);) 840 841 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 842 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 843 844 Label done; 845 Label runtime; 846 847 // Is SATB still active? 848 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 849 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 850 __ jcc(Assembler::zero, done); 851 852 // Can we store original value in the thread's buffer? 853 854 __ movptr(tmp, queue_index); 855 __ testptr(tmp, tmp); 856 __ jcc(Assembler::zero, runtime); 857 __ subptr(tmp, wordSize); 858 __ movptr(queue_index, tmp); 859 __ addptr(tmp, buffer); 860 861 // prev_val (rax) 862 __ load_parameter(0, pre_val); 863 __ movptr(Address(tmp, 0), pre_val); 864 __ jmp(done); 865 866 __ bind(runtime); 867 868 __ save_live_registers_no_oop_map(true); 869 870 // load the pre-value 871 __ load_parameter(0, rcx); 872 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 873 874 __ restore_live_registers(true); 875 876 __ bind(done); 877 878 __ pop(rdx); 879 __ pop(rax); 880 881 __ epilogue(); 882 } 883 884 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { 885 __ prologue("shenandoah_load_reference_barrier", false); 886 // arg0 : object to be resolved 887 888 __ save_live_registers_no_oop_map(true); 889 890 #ifdef _LP64 891 __ load_parameter(0, c_rarg0); 892 __ load_parameter(1, c_rarg1); 893 if (UseCompressedOops) { 894 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1); 895 } else { 896 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1); 897 } 898 #else 899 __ load_parameter(0, rax); 900 __ load_parameter(1, rbx); 901 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx); 902 #endif 903 904 __ restore_live_registers_except_rax(true); 905 906 __ epilogue(); 907 } 908 909 #undef __ 910 911 #endif // COMPILER1 912 913 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 914 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 915 return _shenandoah_lrb; 916 } 917 918 #define __ cgen->assembler()-> 919 920 /* 921 * Incoming parameters: 922 * rax: oop 923 * rsi: load address 924 */ 925 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 926 __ align(CodeEntryAlignment); 927 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 928 address start = __ pc(); 929 930 Label resolve_oop, slow_path; 931 932 // We use RDI, which also serves as argument register for slow call. 933 // RAX always holds the src object ptr, except after the slow call, 934 // then it holds the result. R8/RBX is used as temporary register. 935 936 Register tmp1 = rdi; 937 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 938 939 __ push(tmp1); 940 __ push(tmp2); 941 942 // Check for object being in the collection set. 943 __ mov(tmp1, rax); 944 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 945 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 946 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 947 __ testbool(tmp2); 948 __ jccb(Assembler::notZero, resolve_oop); 949 __ pop(tmp2); 950 __ pop(tmp1); 951 __ ret(0); 952 953 // Test if object is already resolved. 954 __ bind(resolve_oop); 955 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 956 // Test if both lowest bits are set. We trick it by negating the bits 957 // then test for both bits clear. 958 __ notptr(tmp2); 959 __ testb(tmp2, markWord::marked_value); 960 __ jccb(Assembler::notZero, slow_path); 961 // Clear both lower bits. It's still inverted, so set them, and then invert back. 962 __ orptr(tmp2, markWord::marked_value); 963 __ notptr(tmp2); 964 // At this point, tmp2 contains the decoded forwarding pointer. 965 __ mov(rax, tmp2); 966 967 __ pop(tmp2); 968 __ pop(tmp1); 969 __ ret(0); 970 971 __ bind(slow_path); 972 973 __ push(rcx); 974 __ push(rdx); 975 __ push(rdi); 976 #ifdef _LP64 977 __ push(r8); 978 __ push(r9); 979 __ push(r10); 980 __ push(r11); 981 __ push(r12); 982 __ push(r13); 983 __ push(r14); 984 __ push(r15); 985 #endif 986 __ push(rbp); 987 __ movptr(rbp, rsp); 988 __ andptr(rsp, -StackAlignmentInBytes); 989 __ push_FPU_state(); 990 if (UseCompressedOops) { 991 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi); 992 } else { 993 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi); 994 } 995 __ pop_FPU_state(); 996 __ movptr(rsp, rbp); 997 __ pop(rbp); 998 #ifdef _LP64 999 __ pop(r15); 1000 __ pop(r14); 1001 __ pop(r13); 1002 __ pop(r12); 1003 __ pop(r11); 1004 __ pop(r10); 1005 __ pop(r9); 1006 __ pop(r8); 1007 #endif 1008 __ pop(rdi); 1009 __ pop(rdx); 1010 __ pop(rcx); 1011 1012 __ pop(tmp2); 1013 __ pop(tmp1); 1014 __ ret(0); 1015 1016 return start; 1017 } 1018 1019 #undef __ 1020 1021 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 1022 if (ShenandoahLoadRefBarrier) { 1023 int stub_code_size = 4096; 1024 ResourceMark rm; 1025 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 1026 CodeBuffer buf(bb); 1027 StubCodeGenerator cgen(&buf); 1028 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 1029 } 1030 }