1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSet.hpp" 26 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 27 #include "gc/shenandoah/shenandoahForwarding.hpp" 28 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 29 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 30 #include "gc/shenandoah/shenandoahHeuristics.hpp" 31 #include "gc/shenandoah/shenandoahRuntime.hpp" 32 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 33 #include "interpreter/interpreter.hpp" 34 #include "interpreter/interp_masm.hpp" 35 #include "runtime/sharedRuntime.hpp" 36 #include "runtime/thread.hpp" 37 #include "utilities/macros.hpp" 38 #ifdef COMPILER1 39 #include "c1/c1_LIRAssembler.hpp" 40 #include "c1/c1_MacroAssembler.hpp" 41 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 42 #endif 43 44 #define __ masm-> 45 46 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 47 48 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 49 Register src, Register dst, Register count) { 50 51 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 52 53 if (is_reference_type(type)) { 54 55 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) { 56 #ifdef _LP64 57 Register thread = r15_thread; 58 #else 59 Register thread = rax; 60 if (thread == src || thread == dst || thread == count) { 61 thread = rbx; 62 } 63 if (thread == src || thread == dst || thread == count) { 64 thread = rcx; 65 } 66 if (thread == src || thread == dst || thread == count) { 67 thread = rdx; 68 } 69 __ push(thread); 70 __ get_thread(thread); 71 #endif 72 assert_different_registers(src, dst, count, thread); 73 74 Label done; 75 // Short-circuit if count == 0. 76 __ testptr(count, count); 77 __ jcc(Assembler::zero, done); 78 79 // Avoid runtime call when not marking. 80 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 81 int flags = ShenandoahHeap::HAS_FORWARDED; 82 if (!dest_uninitialized) { 83 flags |= ShenandoahHeap::MARKING; 84 } 85 __ testb(gc_state, flags); 86 __ jcc(Assembler::zero, done); 87 88 __ pusha(); // push registers 89 #ifdef _LP64 90 assert(src == rdi, "expected"); 91 assert(dst == rsi, "expected"); 92 assert(count == rdx, "expected"); 93 if (UseCompressedOops) { 94 if (dest_uninitialized) { 95 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count); 96 } else { 97 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count); 98 } 99 } else 100 #endif 101 { 102 if (dest_uninitialized) { 103 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count); 104 } else { 105 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count); 106 } 107 } 108 __ popa(); 109 __ bind(done); 110 NOT_LP64(__ pop(thread);) 111 } 112 } 113 114 } 115 116 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 117 Register obj, 118 Register pre_val, 119 Register thread, 120 Register tmp, 121 bool tosca_live, 122 bool expand_call) { 123 124 if (ShenandoahSATBBarrier) { 125 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 126 } 127 } 128 129 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 130 Register obj, 131 Register pre_val, 132 Register thread, 133 Register tmp, 134 bool tosca_live, 135 bool expand_call) { 136 // If expand_call is true then we expand the call_VM_leaf macro 137 // directly to skip generating the check by 138 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 139 140 #ifdef _LP64 141 assert(thread == r15_thread, "must be"); 142 #endif // _LP64 143 144 Label done; 145 Label runtime; 146 147 assert(pre_val != noreg, "check this code"); 148 149 if (obj != noreg) { 150 assert_different_registers(obj, pre_val, tmp); 151 assert(pre_val != rax, "check this code"); 152 } 153 154 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 155 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 156 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 157 158 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 159 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 160 __ jcc(Assembler::zero, done); 161 162 // Do we need to load the previous value? 163 if (obj != noreg) { 164 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 165 } 166 167 // Is the previous value null? 168 __ cmpptr(pre_val, (int32_t) NULL_WORD); 169 __ jcc(Assembler::equal, done); 170 171 // Can we store original value in the thread's buffer? 172 // Is index == 0? 173 // (The index field is typed as size_t.) 174 175 __ movptr(tmp, index); // tmp := *index_adr 176 __ cmpptr(tmp, 0); // tmp == 0? 177 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 178 179 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 180 __ movptr(index, tmp); // *index_adr := tmp 181 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 182 183 // Record the previous value 184 __ movptr(Address(tmp, 0), pre_val); 185 __ jmp(done); 186 187 __ bind(runtime); 188 // save the live input values 189 if(tosca_live) __ push(rax); 190 191 if (obj != noreg && obj != rax) 192 __ push(obj); 193 194 if (pre_val != rax) 195 __ push(pre_val); 196 197 // Calling the runtime using the regular call_VM_leaf mechanism generates 198 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 199 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 200 // 201 // If we care generating the pre-barrier without a frame (e.g. in the 202 // intrinsified Reference.get() routine) then ebp might be pointing to 203 // the caller frame and so this check will most likely fail at runtime. 204 // 205 // Expanding the call directly bypasses the generation of the check. 206 // So when we do not have have a full interpreter frame on the stack 207 // expand_call should be passed true. 208 209 NOT_LP64( __ push(thread); ) 210 211 #ifdef _LP64 212 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 213 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 214 // Note: this should not accidentally smash thread, because thread is always r15. 215 assert(thread != c_rarg0, "smashed arg"); 216 if (c_rarg0 != pre_val) { 217 __ mov(c_rarg0, pre_val); 218 } 219 #endif 220 221 if (expand_call) { 222 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 223 #ifdef _LP64 224 if (c_rarg1 != thread) { 225 __ mov(c_rarg1, thread); 226 } 227 // Already moved pre_val into c_rarg0 above 228 #else 229 __ push(thread); 230 __ push(pre_val); 231 #endif 232 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 233 } else { 234 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 235 } 236 237 NOT_LP64( __ pop(thread); ) 238 239 // save the live input values 240 if (pre_val != rax) 241 __ pop(pre_val); 242 243 if (obj != noreg && obj != rax) 244 __ pop(obj); 245 246 if(tosca_live) __ pop(rax); 247 248 __ bind(done); 249 } 250 251 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) { 252 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 253 254 Label done; 255 256 #ifdef _LP64 257 Register thread = r15_thread; 258 #else 259 Register thread = rcx; 260 if (thread == dst) { 261 thread = rbx; 262 } 263 __ push(thread); 264 __ get_thread(thread); 265 #endif 266 267 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 268 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 269 __ jccb(Assembler::zero, done); 270 271 // Use rsi for src address 272 const Register src_addr = rsi; 273 // Setup address parameter first, if it does not clobber oop in dst 274 bool need_addr_setup = (src_addr != dst); 275 276 if (need_addr_setup) { 277 __ push(src_addr); 278 __ lea(src_addr, src); 279 280 if (dst != rax) { 281 // Move obj into rax and save rax 282 __ push(rax); 283 __ movptr(rax, dst); 284 } 285 } else { 286 // dst == rsi 287 __ push(rax); 288 __ movptr(rax, dst); 289 290 // we can clobber it, since it is outgoing register 291 __ lea(src_addr, src); 292 } 293 294 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 295 296 if (need_addr_setup) { 297 if (dst != rax) { 298 __ movptr(dst, rax); 299 __ pop(rax); 300 } 301 __ pop(src_addr); 302 } else { 303 __ movptr(dst, rax); 304 __ pop(rax); 305 } 306 307 __ bind(done); 308 309 #ifndef _LP64 310 __ pop(thread); 311 #endif 312 } 313 314 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src) { 315 if (!ShenandoahLoadRefBarrier) { 316 return; 317 } 318 319 Label done; 320 Label not_null; 321 Label slow_path; 322 __ block_comment("load_reference_barrier_native { "); 323 324 // null check 325 __ testptr(dst, dst); 326 __ jcc(Assembler::notZero, not_null); 327 __ jmp(done); 328 __ bind(not_null); 329 330 331 #ifdef _LP64 332 Register thread = r15_thread; 333 #else 334 Register thread = rcx; 335 if (thread == dst) { 336 thread = rbx; 337 } 338 __ push(thread); 339 __ get_thread(thread); 340 #endif 341 assert_different_registers(dst, thread); 342 343 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 344 __ testb(gc_state, ShenandoahHeap::EVACUATION); 345 #ifndef _LP64 346 __ pop(thread); 347 #endif 348 __ jccb(Assembler::notZero, slow_path); 349 __ jmp(done); 350 __ bind(slow_path); 351 352 if (dst != rax) { 353 __ push(rax); 354 } 355 __ push(rcx); 356 __ push(rdx); 357 __ push(rdi); 358 __ push(rsi); 359 #ifdef _LP64 360 __ push(r8); 361 __ push(r9); 362 __ push(r10); 363 __ push(r11); 364 __ push(r12); 365 __ push(r13); 366 __ push(r14); 367 __ push(r15); 368 #endif 369 370 assert_different_registers(dst, rsi); 371 __ lea(rsi, src); 372 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), dst, rsi); 373 374 #ifdef _LP64 375 __ pop(r15); 376 __ pop(r14); 377 __ pop(r13); 378 __ pop(r12); 379 __ pop(r11); 380 __ pop(r10); 381 __ pop(r9); 382 __ pop(r8); 383 #endif 384 __ pop(rsi); 385 __ pop(rdi); 386 __ pop(rdx); 387 __ pop(rcx); 388 389 if (dst != rax) { 390 __ movptr(dst, rax); 391 __ pop(rax); 392 } 393 394 __ bind(done); 395 __ block_comment("load_reference_barrier_native { "); 396 } 397 398 #ifdef _LP64 399 void ShenandoahBarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { 400 // Use default version 401 BarrierSetAssembler::c2i_entry_barrier(masm); 402 } 403 #else 404 void ShenandoahBarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { 405 BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); 406 if (bs == NULL) { 407 return; 408 } 409 410 Label bad_call; 411 __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters. 412 __ jcc(Assembler::equal, bad_call); 413 414 Register tmp1 = rax; 415 Register tmp2 = rcx; 416 417 __ push(tmp1); 418 __ push(tmp2); 419 420 // Pointer chase to the method holder to find out if the method is concurrently unloading. 421 Label method_live; 422 __ load_method_holder_cld(tmp1, rbx); 423 424 // Is it a strong CLD? 425 __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_offset()), 0); 426 __ jcc(Assembler::greater, method_live); 427 428 // Is it a weak but alive CLD? 429 __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset())); 430 __ resolve_weak_handle(tmp1, tmp2); 431 __ cmpptr(tmp1, 0); 432 __ jcc(Assembler::notEqual, method_live); 433 __ pop(tmp2); 434 __ pop(tmp1); 435 436 __ bind(bad_call); 437 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); 438 __ bind(method_live); 439 __ pop(tmp2); 440 __ pop(tmp1); 441 } 442 #endif 443 444 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 445 if (ShenandoahStoreValEnqueueBarrier) { 446 storeval_barrier_impl(masm, dst, tmp); 447 } 448 } 449 450 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 451 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 452 453 if (dst == noreg) return; 454 455 if (ShenandoahStoreValEnqueueBarrier) { 456 // The set of registers to be saved+restored is the same as in the write-barrier above. 457 // Those are the commonly used registers in the interpreter. 458 __ pusha(); 459 // __ push_callee_saved_registers(); 460 __ subptr(rsp, 2 * Interpreter::stackElementSize); 461 __ movdbl(Address(rsp, 0), xmm0); 462 463 #ifdef _LP64 464 Register thread = r15_thread; 465 #else 466 Register thread = rcx; 467 if (thread == dst || thread == tmp) { 468 thread = rdi; 469 } 470 if (thread == dst || thread == tmp) { 471 thread = rbx; 472 } 473 __ get_thread(thread); 474 #endif 475 assert_different_registers(dst, tmp, thread); 476 477 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 478 __ movdbl(xmm0, Address(rsp, 0)); 479 __ addptr(rsp, 2 * Interpreter::stackElementSize); 480 //__ pop_callee_saved_registers(); 481 __ popa(); 482 } 483 } 484 485 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) { 486 if (ShenandoahLoadRefBarrier) { 487 Label done; 488 __ testptr(dst, dst); 489 __ jcc(Assembler::zero, done); 490 load_reference_barrier_not_null(masm, dst, src); 491 __ bind(done); 492 } 493 } 494 495 // 496 // Arguments: 497 // 498 // Inputs: 499 // src: oop location, might be clobbered 500 // tmp1: scratch register, might not be valid. 501 // 502 // Output: 503 // dst: oop loaded from src location 504 // 505 // Kill: 506 // tmp1 (if it is valid) 507 // 508 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 509 Register dst, Address src, Register tmp1, Register tmp_thread) { 510 // 1: non-reference load, no additional barrier is needed 511 if (!is_reference_type(type)) { 512 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 513 return; 514 } 515 516 assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected"); 517 518 // 2: load a reference from src location and apply LRB if needed 519 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { 520 Register result_dst = dst; 521 bool use_tmp1_for_dst = false; 522 523 // Preserve src location for LRB 524 if (dst == src.base() || dst == src.index()) { 525 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() 526 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) { 527 dst = tmp1; 528 use_tmp1_for_dst = true; 529 } else { 530 dst = rdi; 531 __ push(dst); 532 } 533 assert_different_registers(dst, src.base(), src.index()); 534 } 535 536 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 537 538 if (ShenandoahBarrierSet::use_load_reference_barrier_native(decorators, type)) { 539 load_reference_barrier_native(masm, dst, src); 540 } else { 541 load_reference_barrier(masm, dst, src); 542 } 543 544 // Move loaded oop to final destination 545 if (dst != result_dst) { 546 __ movptr(result_dst, dst); 547 548 if (!use_tmp1_for_dst) { 549 __ pop(dst); 550 } 551 552 dst = result_dst; 553 } 554 } else { 555 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 556 } 557 558 // 3: apply keep-alive barrier if needed 559 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { 560 __ push_IU_state(); 561 Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 562 assert_different_registers(dst, tmp1, tmp_thread); 563 if (!thread->is_valid()) { 564 thread = rdx; 565 } 566 NOT_LP64(__ get_thread(thread)); 567 // Generate the SATB pre-barrier code to log the value of 568 // the referent field in an SATB buffer. 569 shenandoah_write_barrier_pre(masm /* masm */, 570 noreg /* obj */, 571 dst /* pre_val */, 572 thread /* thread */, 573 tmp1 /* tmp */, 574 true /* tosca_live */, 575 true /* expand_call */); 576 __ pop_IU_state(); 577 } 578 } 579 580 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 581 Address dst, Register val, Register tmp1, Register tmp2) { 582 583 bool on_oop = is_reference_type(type); 584 bool in_heap = (decorators & IN_HEAP) != 0; 585 bool as_normal = (decorators & AS_NORMAL) != 0; 586 if (on_oop && in_heap) { 587 bool needs_pre_barrier = as_normal; 588 589 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 590 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 591 // flatten object address if needed 592 // We do it regardless of precise because we need the registers 593 if (dst.index() == noreg && dst.disp() == 0) { 594 if (dst.base() != tmp1) { 595 __ movptr(tmp1, dst.base()); 596 } 597 } else { 598 __ lea(tmp1, dst); 599 } 600 601 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 602 603 #ifndef _LP64 604 __ get_thread(rthread); 605 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 606 imasm->save_bcp(); 607 #endif 608 609 if (needs_pre_barrier) { 610 shenandoah_write_barrier_pre(masm /*masm*/, 611 tmp1 /* obj */, 612 tmp2 /* pre_val */, 613 rthread /* thread */, 614 tmp3 /* tmp */, 615 val != noreg /* tosca_live */, 616 false /* expand_call */); 617 } 618 if (val == noreg) { 619 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 620 } else { 621 storeval_barrier(masm, val, tmp3); 622 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 623 } 624 NOT_LP64(imasm->restore_bcp()); 625 } else { 626 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 627 } 628 } 629 630 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 631 Register obj, Register tmp, Label& slowpath) { 632 Label done; 633 // Resolve jobject 634 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 635 636 // Check for null. 637 __ testptr(obj, obj); 638 __ jcc(Assembler::zero, done); 639 640 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 641 __ testb(gc_state, ShenandoahHeap::EVACUATION); 642 __ jccb(Assembler::notZero, slowpath); 643 __ bind(done); 644 } 645 646 // Special Shenandoah CAS implementation that handles false negatives 647 // due to concurrent evacuation. 648 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 649 Register res, Address addr, Register oldval, Register newval, 650 bool exchange, Register tmp1, Register tmp2) { 651 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 652 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 653 assert_different_registers(oldval, newval, tmp1, tmp2); 654 655 Label L_success, L_failure; 656 657 // Remember oldval for retry logic below 658 #ifdef _LP64 659 if (UseCompressedOops) { 660 __ movl(tmp1, oldval); 661 } else 662 #endif 663 { 664 __ movptr(tmp1, oldval); 665 } 666 667 // Step 1. Fast-path. 668 // 669 // Try to CAS with given arguments. If successful, then we are done. 670 671 if (os::is_MP()) __ lock(); 672 #ifdef _LP64 673 if (UseCompressedOops) { 674 __ cmpxchgl(newval, addr); 675 } else 676 #endif 677 { 678 __ cmpxchgptr(newval, addr); 679 } 680 __ jcc(Assembler::equal, L_success); 681 682 // Step 2. CAS had failed. This may be a false negative. 683 // 684 // The trouble comes when we compare the to-space pointer with the from-space 685 // pointer to the same object. To resolve this, it will suffice to resolve 686 // the value from memory -- this will give both to-space pointers. 687 // If they mismatch, then it was a legitimate failure. 688 // 689 // Before reaching to resolve sequence, see if we can avoid the whole shebang 690 // with filters. 691 692 // Filter: when offending in-memory value is NULL, the failure is definitely legitimate 693 __ testptr(oldval, oldval); 694 __ jcc(Assembler::zero, L_failure); 695 696 // Filter: when heap is stable, the failure is definitely legitimate 697 #ifdef _LP64 698 const Register thread = r15_thread; 699 #else 700 const Register thread = tmp2; 701 __ get_thread(thread); 702 #endif 703 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 704 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 705 __ jcc(Assembler::zero, L_failure); 706 707 #ifdef _LP64 708 if (UseCompressedOops) { 709 __ movl(tmp2, oldval); 710 __ decode_heap_oop(tmp2); 711 } else 712 #endif 713 { 714 __ movptr(tmp2, oldval); 715 } 716 717 // Decode offending in-memory value. 718 // Test if-forwarded 719 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value); 720 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded 721 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded 722 723 // Load and mask forwarding pointer 724 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); 725 __ shrptr(tmp2, 2); 726 __ shlptr(tmp2, 2); 727 728 #ifdef _LP64 729 if (UseCompressedOops) { 730 __ decode_heap_oop(tmp1); // decode for comparison 731 } 732 #endif 733 734 // Now we have the forwarded offender in tmp2. 735 // Compare and if they don't match, we have legitimate failure 736 __ cmpptr(tmp1, tmp2); 737 __ jcc(Assembler::notEqual, L_failure); 738 739 // Step 3. Need to fix the memory ptr before continuing. 740 // 741 // At this point, we have from-space oldval in the register, and its to-space 742 // address is in tmp2. Let's try to update it into memory. We don't care if it 743 // succeeds or not. If it does, then the retrying CAS would see it and succeed. 744 // If this fixup fails, this means somebody else beat us to it, and necessarily 745 // with to-space ptr store. We still have to do the retry, because the GC might 746 // have updated the reference for us. 747 748 #ifdef _LP64 749 if (UseCompressedOops) { 750 __ encode_heap_oop(tmp2); // previously decoded at step 2. 751 } 752 #endif 753 754 if (os::is_MP()) __ lock(); 755 #ifdef _LP64 756 if (UseCompressedOops) { 757 __ cmpxchgl(tmp2, addr); 758 } else 759 #endif 760 { 761 __ cmpxchgptr(tmp2, addr); 762 } 763 764 // Step 4. Try to CAS again. 765 // 766 // This is guaranteed not to have false negatives, because oldval is definitely 767 // to-space, and memory pointer is to-space as well. Nothing is able to store 768 // from-space ptr into memory anymore. Make sure oldval is restored, after being 769 // garbled during retries. 770 // 771 #ifdef _LP64 772 if (UseCompressedOops) { 773 __ movl(oldval, tmp2); 774 } else 775 #endif 776 { 777 __ movptr(oldval, tmp2); 778 } 779 780 if (os::is_MP()) __ lock(); 781 #ifdef _LP64 782 if (UseCompressedOops) { 783 __ cmpxchgl(newval, addr); 784 } else 785 #endif 786 { 787 __ cmpxchgptr(newval, addr); 788 } 789 if (!exchange) { 790 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump 791 } 792 793 // Step 5. If we need a boolean result out of CAS, set the flag appropriately. 794 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. 795 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. 796 797 if (exchange) { 798 __ bind(L_failure); 799 __ bind(L_success); 800 } else { 801 assert(res != NULL, "need result register"); 802 803 Label exit; 804 __ bind(L_failure); 805 __ xorptr(res, res); 806 __ jmpb(exit); 807 808 __ bind(L_success); 809 __ movptr(res, 1); 810 __ bind(exit); 811 } 812 } 813 814 #undef __ 815 816 #ifdef COMPILER1 817 818 #define __ ce->masm()-> 819 820 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 821 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 822 // At this point we know that marking is in progress. 823 // If do_load() is true then we have to emit the 824 // load of the previous value; otherwise it has already 825 // been loaded into _pre_val. 826 827 __ bind(*stub->entry()); 828 assert(stub->pre_val()->is_register(), "Precondition."); 829 830 Register pre_val_reg = stub->pre_val()->as_register(); 831 832 if (stub->do_load()) { 833 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 834 } 835 836 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 837 __ jcc(Assembler::equal, *stub->continuation()); 838 ce->store_parameter(stub->pre_val()->as_register(), 0); 839 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 840 __ jmp(*stub->continuation()); 841 842 } 843 844 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 845 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 846 __ bind(*stub->entry()); 847 848 Register obj = stub->obj()->as_register(); 849 Register res = stub->result()->as_register(); 850 Register addr = stub->addr()->as_register(); 851 Register tmp1 = stub->tmp1()->as_register(); 852 Register tmp2 = stub->tmp2()->as_register(); 853 assert_different_registers(obj, res, addr, tmp1, tmp2); 854 855 Label slow_path; 856 857 assert(res == rax, "result must arrive in rax"); 858 859 if (res != obj) { 860 __ mov(res, obj); 861 } 862 863 // Check for null. 864 __ testptr(res, res); 865 __ jcc(Assembler::zero, *stub->continuation()); 866 867 // Check for object being in the collection set. 868 __ mov(tmp1, res); 869 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 870 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 871 #ifdef _LP64 872 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 873 __ testbool(tmp2); 874 #else 875 // On x86_32, C1 register allocator can give us the register without 8-bit support. 876 // Do the full-register access and test to avoid compilation failures. 877 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 878 __ testptr(tmp2, 0xFF); 879 #endif 880 __ jcc(Assembler::zero, *stub->continuation()); 881 882 __ bind(slow_path); 883 ce->store_parameter(res, 0); 884 ce->store_parameter(addr, 1); 885 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 886 887 __ jmp(*stub->continuation()); 888 } 889 890 #undef __ 891 892 #define __ sasm-> 893 894 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 895 __ prologue("shenandoah_pre_barrier", false); 896 // arg0 : previous value of memory 897 898 __ push(rax); 899 __ push(rdx); 900 901 const Register pre_val = rax; 902 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 903 const Register tmp = rdx; 904 905 NOT_LP64(__ get_thread(thread);) 906 907 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 908 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 909 910 Label done; 911 Label runtime; 912 913 // Is SATB still active? 914 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 915 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 916 __ jcc(Assembler::zero, done); 917 918 // Can we store original value in the thread's buffer? 919 920 __ movptr(tmp, queue_index); 921 __ testptr(tmp, tmp); 922 __ jcc(Assembler::zero, runtime); 923 __ subptr(tmp, wordSize); 924 __ movptr(queue_index, tmp); 925 __ addptr(tmp, buffer); 926 927 // prev_val (rax) 928 __ load_parameter(0, pre_val); 929 __ movptr(Address(tmp, 0), pre_val); 930 __ jmp(done); 931 932 __ bind(runtime); 933 934 __ save_live_registers_no_oop_map(true); 935 936 // load the pre-value 937 __ load_parameter(0, rcx); 938 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 939 940 __ restore_live_registers(true); 941 942 __ bind(done); 943 944 __ pop(rdx); 945 __ pop(rax); 946 947 __ epilogue(); 948 } 949 950 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { 951 __ prologue("shenandoah_load_reference_barrier", false); 952 // arg0 : object to be resolved 953 954 __ save_live_registers_no_oop_map(true); 955 956 #ifdef _LP64 957 __ load_parameter(0, c_rarg0); 958 __ load_parameter(1, c_rarg1); 959 if (UseCompressedOops) { 960 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1); 961 } else { 962 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1); 963 } 964 #else 965 __ load_parameter(0, rax); 966 __ load_parameter(1, rbx); 967 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx); 968 #endif 969 970 __ restore_live_registers_except_rax(true); 971 972 __ epilogue(); 973 } 974 975 #undef __ 976 977 #endif // COMPILER1 978 979 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 980 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 981 return _shenandoah_lrb; 982 } 983 984 #define __ cgen->assembler()-> 985 986 /* 987 * Incoming parameters: 988 * rax: oop 989 * rsi: load address 990 */ 991 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 992 __ align(CodeEntryAlignment); 993 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 994 address start = __ pc(); 995 996 Label resolve_oop, slow_path; 997 998 // We use RDI, which also serves as argument register for slow call. 999 // RAX always holds the src object ptr, except after the slow call, 1000 // then it holds the result. R8/RBX is used as temporary register. 1001 1002 Register tmp1 = rdi; 1003 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 1004 1005 __ push(tmp1); 1006 __ push(tmp2); 1007 1008 // Check for object being in the collection set. 1009 __ mov(tmp1, rax); 1010 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 1011 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 1012 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 1013 __ testbool(tmp2); 1014 __ jccb(Assembler::notZero, resolve_oop); 1015 __ pop(tmp2); 1016 __ pop(tmp1); 1017 __ ret(0); 1018 1019 // Test if object is already resolved. 1020 __ bind(resolve_oop); 1021 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 1022 // Test if both lowest bits are set. We trick it by negating the bits 1023 // then test for both bits clear. 1024 __ notptr(tmp2); 1025 __ testb(tmp2, markWord::marked_value); 1026 __ jccb(Assembler::notZero, slow_path); 1027 // Clear both lower bits. It's still inverted, so set them, and then invert back. 1028 __ orptr(tmp2, markWord::marked_value); 1029 __ notptr(tmp2); 1030 // At this point, tmp2 contains the decoded forwarding pointer. 1031 __ mov(rax, tmp2); 1032 1033 __ pop(tmp2); 1034 __ pop(tmp1); 1035 __ ret(0); 1036 1037 __ bind(slow_path); 1038 1039 __ push(rcx); 1040 __ push(rdx); 1041 __ push(rdi); 1042 #ifdef _LP64 1043 __ push(r8); 1044 __ push(r9); 1045 __ push(r10); 1046 __ push(r11); 1047 __ push(r12); 1048 __ push(r13); 1049 __ push(r14); 1050 __ push(r15); 1051 #endif 1052 __ push(rbp); 1053 __ movptr(rbp, rsp); 1054 __ andptr(rsp, -StackAlignmentInBytes); 1055 __ push_FPU_state(); 1056 if (UseCompressedOops) { 1057 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi); 1058 } else { 1059 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi); 1060 } 1061 __ pop_FPU_state(); 1062 __ movptr(rsp, rbp); 1063 __ pop(rbp); 1064 #ifdef _LP64 1065 __ pop(r15); 1066 __ pop(r14); 1067 __ pop(r13); 1068 __ pop(r12); 1069 __ pop(r11); 1070 __ pop(r10); 1071 __ pop(r9); 1072 __ pop(r8); 1073 #endif 1074 __ pop(rdi); 1075 __ pop(rdx); 1076 __ pop(rcx); 1077 1078 __ pop(tmp2); 1079 __ pop(tmp1); 1080 __ ret(0); 1081 1082 return start; 1083 } 1084 1085 #undef __ 1086 1087 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 1088 if (ShenandoahLoadRefBarrier) { 1089 int stub_code_size = 4096; 1090 ResourceMark rm; 1091 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 1092 CodeBuffer buf(bb); 1093 StubCodeGenerator cgen(&buf); 1094 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 1095 } 1096 }