1 /* 2 * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "gc/shenandoah/shenandoahBarrierSet.hpp" 27 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 28 #include "gc/shenandoah/shenandoahForwarding.hpp" 29 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 30 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 31 #include "gc/shenandoah/shenandoahHeuristics.hpp" 32 #include "gc/shenandoah/shenandoahRuntime.hpp" 33 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 34 #include "interpreter/interpreter.hpp" 35 #include "interpreter/interp_masm.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/thread.hpp" 38 #include "utilities/macros.hpp" 39 #ifdef COMPILER1 40 #include "c1/c1_LIRAssembler.hpp" 41 #include "c1/c1_MacroAssembler.hpp" 42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 43 #endif 44 45 #define __ masm-> 46 47 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 48 49 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 50 Register src, Register dst, Register count) { 51 52 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 53 54 if (is_reference_type(type)) { 55 56 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) { 57 #ifdef _LP64 58 Register thread = r15_thread; 59 #else 60 Register thread = rax; 61 if (thread == src || thread == dst || thread == count) { 62 thread = rbx; 63 } 64 if (thread == src || thread == dst || thread == count) { 65 thread = rcx; 66 } 67 if (thread == src || thread == dst || thread == count) { 68 thread = rdx; 69 } 70 __ push(thread); 71 __ get_thread(thread); 72 #endif 73 assert_different_registers(src, dst, count, thread); 74 75 Label done; 76 // Short-circuit if count == 0. 77 __ testptr(count, count); 78 __ jcc(Assembler::zero, done); 79 80 // Avoid runtime call when not marking. 81 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 82 int flags = ShenandoahHeap::HAS_FORWARDED; 83 if (!dest_uninitialized) { 84 flags |= ShenandoahHeap::MARKING; 85 } 86 __ testb(gc_state, flags); 87 __ jcc(Assembler::zero, done); 88 89 __ pusha(); // push registers 90 #ifdef _LP64 91 assert(src == rdi, "expected"); 92 assert(dst == rsi, "expected"); 93 assert(count == rdx, "expected"); 94 if (UseCompressedOops) { 95 if (dest_uninitialized) { 96 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count); 97 } else { 98 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count); 99 } 100 } else 101 #endif 102 { 103 if (dest_uninitialized) { 104 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count); 105 } else { 106 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count); 107 } 108 } 109 __ popa(); 110 __ bind(done); 111 NOT_LP64(__ pop(thread);) 112 } 113 } 114 115 } 116 117 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 118 Register obj, 119 Register pre_val, 120 Register thread, 121 Register tmp, 122 bool tosca_live, 123 bool expand_call) { 124 125 if (ShenandoahSATBBarrier) { 126 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 127 } 128 } 129 130 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 131 Register obj, 132 Register pre_val, 133 Register thread, 134 Register tmp, 135 bool tosca_live, 136 bool expand_call) { 137 // If expand_call is true then we expand the call_VM_leaf macro 138 // directly to skip generating the check by 139 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 140 141 #ifdef _LP64 142 assert(thread == r15_thread, "must be"); 143 #endif // _LP64 144 145 Label done; 146 Label runtime; 147 148 assert(pre_val != noreg, "check this code"); 149 150 if (obj != noreg) { 151 assert_different_registers(obj, pre_val, tmp); 152 assert(pre_val != rax, "check this code"); 153 } 154 155 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 156 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 157 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 158 159 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 160 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 161 __ jcc(Assembler::zero, done); 162 163 // Do we need to load the previous value? 164 if (obj != noreg) { 165 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 166 } 167 168 // Is the previous value null? 169 __ cmpptr(pre_val, (int32_t) NULL_WORD); 170 __ jcc(Assembler::equal, done); 171 172 // Can we store original value in the thread's buffer? 173 // Is index == 0? 174 // (The index field is typed as size_t.) 175 176 __ movptr(tmp, index); // tmp := *index_adr 177 __ cmpptr(tmp, 0); // tmp == 0? 178 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 179 180 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 181 __ movptr(index, tmp); // *index_adr := tmp 182 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 183 184 // Record the previous value 185 __ movptr(Address(tmp, 0), pre_val); 186 __ jmp(done); 187 188 __ bind(runtime); 189 // save the live input values 190 if(tosca_live) __ push(rax); 191 192 if (obj != noreg && obj != rax) 193 __ push(obj); 194 195 if (pre_val != rax) 196 __ push(pre_val); 197 198 // Calling the runtime using the regular call_VM_leaf mechanism generates 199 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 200 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 201 // 202 // If we care generating the pre-barrier without a frame (e.g. in the 203 // intrinsified Reference.get() routine) then ebp might be pointing to 204 // the caller frame and so this check will most likely fail at runtime. 205 // 206 // Expanding the call directly bypasses the generation of the check. 207 // So when we do not have have a full interpreter frame on the stack 208 // expand_call should be passed true. 209 210 NOT_LP64( __ push(thread); ) 211 212 #ifdef _LP64 213 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 214 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 215 // Note: this should not accidentally smash thread, because thread is always r15. 216 assert(thread != c_rarg0, "smashed arg"); 217 if (c_rarg0 != pre_val) { 218 __ mov(c_rarg0, pre_val); 219 } 220 #endif 221 222 if (expand_call) { 223 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 224 #ifdef _LP64 225 if (c_rarg1 != thread) { 226 __ mov(c_rarg1, thread); 227 } 228 // Already moved pre_val into c_rarg0 above 229 #else 230 __ push(thread); 231 __ push(pre_val); 232 #endif 233 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 234 } else { 235 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 236 } 237 238 NOT_LP64( __ pop(thread); ) 239 240 // save the live input values 241 if (pre_val != rax) 242 __ pop(pre_val); 243 244 if (obj != noreg && obj != rax) 245 __ pop(obj); 246 247 if(tosca_live) __ pop(rax); 248 249 __ bind(done); 250 } 251 252 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) { 253 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 254 255 Label done; 256 257 #ifdef _LP64 258 Register thread = r15_thread; 259 #else 260 Register thread = rcx; 261 if (thread == dst) { 262 thread = rbx; 263 } 264 __ push(thread); 265 __ get_thread(thread); 266 #endif 267 268 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 269 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 270 __ jccb(Assembler::zero, done); 271 272 // Use rsi for src address 273 const Register src_addr = rsi; 274 // Setup address parameter first, if it does not clobber oop in dst 275 bool need_addr_setup = (src_addr != dst); 276 277 if (need_addr_setup) { 278 __ push(src_addr); 279 __ lea(src_addr, src); 280 281 if (dst != rax) { 282 // Move obj into rax and save rax 283 __ push(rax); 284 __ movptr(rax, dst); 285 } 286 } else { 287 // dst == rsi 288 __ push(rax); 289 __ movptr(rax, dst); 290 291 // we can clobber it, since it is outgoing register 292 __ lea(src_addr, src); 293 } 294 295 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 296 297 if (need_addr_setup) { 298 if (dst != rax) { 299 __ movptr(dst, rax); 300 __ pop(rax); 301 } 302 __ pop(src_addr); 303 } else { 304 __ movptr(dst, rax); 305 __ pop(rax); 306 } 307 308 __ bind(done); 309 310 #ifndef _LP64 311 __ pop(thread); 312 #endif 313 } 314 315 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src) { 316 if (!ShenandoahLoadRefBarrier) { 317 return; 318 } 319 320 Label done; 321 Label not_null; 322 Label slow_path; 323 __ block_comment("load_reference_barrier_native { "); 324 325 // null check 326 __ testptr(dst, dst); 327 __ jcc(Assembler::notZero, not_null); 328 __ jmp(done); 329 __ bind(not_null); 330 331 332 #ifdef _LP64 333 Register thread = r15_thread; 334 #else 335 Register thread = rcx; 336 if (thread == dst) { 337 thread = rbx; 338 } 339 __ push(thread); 340 __ get_thread(thread); 341 #endif 342 assert_different_registers(dst, thread); 343 344 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 345 __ testb(gc_state, ShenandoahHeap::EVACUATION); 346 #ifndef _LP64 347 __ pop(thread); 348 #endif 349 __ jccb(Assembler::notZero, slow_path); 350 __ jmp(done); 351 __ bind(slow_path); 352 353 if (dst != rax) { 354 __ push(rax); 355 } 356 __ push(rcx); 357 __ push(rdx); 358 __ push(rdi); 359 __ push(rsi); 360 #ifdef _LP64 361 __ push(r8); 362 __ push(r9); 363 __ push(r10); 364 __ push(r11); 365 __ push(r12); 366 __ push(r13); 367 __ push(r14); 368 __ push(r15); 369 #endif 370 371 assert_different_registers(dst, rsi); 372 __ lea(rsi, src); 373 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), dst, rsi); 374 375 #ifdef _LP64 376 __ pop(r15); 377 __ pop(r14); 378 __ pop(r13); 379 __ pop(r12); 380 __ pop(r11); 381 __ pop(r10); 382 __ pop(r9); 383 __ pop(r8); 384 #endif 385 __ pop(rsi); 386 __ pop(rdi); 387 __ pop(rdx); 388 __ pop(rcx); 389 390 if (dst != rax) { 391 __ movptr(dst, rax); 392 __ pop(rax); 393 } 394 395 __ bind(done); 396 __ block_comment("load_reference_barrier_native { "); 397 } 398 399 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 400 if (ShenandoahStoreValEnqueueBarrier) { 401 storeval_barrier_impl(masm, dst, tmp); 402 } 403 } 404 405 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 406 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 407 408 if (dst == noreg) return; 409 410 if (ShenandoahStoreValEnqueueBarrier) { 411 // The set of registers to be saved+restored is the same as in the write-barrier above. 412 // Those are the commonly used registers in the interpreter. 413 __ pusha(); 414 // __ push_callee_saved_registers(); 415 __ subptr(rsp, 2 * Interpreter::stackElementSize); 416 __ movdbl(Address(rsp, 0), xmm0); 417 418 #ifdef _LP64 419 Register thread = r15_thread; 420 #else 421 Register thread = rcx; 422 if (thread == dst || thread == tmp) { 423 thread = rdi; 424 } 425 if (thread == dst || thread == tmp) { 426 thread = rbx; 427 } 428 __ get_thread(thread); 429 #endif 430 assert_different_registers(dst, tmp, thread); 431 432 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 433 __ movdbl(xmm0, Address(rsp, 0)); 434 __ addptr(rsp, 2 * Interpreter::stackElementSize); 435 //__ pop_callee_saved_registers(); 436 __ popa(); 437 } 438 } 439 440 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) { 441 if (ShenandoahLoadRefBarrier) { 442 Label done; 443 __ testptr(dst, dst); 444 __ jcc(Assembler::zero, done); 445 load_reference_barrier_not_null(masm, dst, src); 446 __ bind(done); 447 } 448 } 449 450 // 451 // Arguments: 452 // 453 // Inputs: 454 // src: oop location, might be clobbered 455 // tmp1: scratch register, might not be valid. 456 // 457 // Output: 458 // dst: oop loaded from src location 459 // 460 // Kill: 461 // tmp1 (if it is valid) 462 // 463 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 464 Register dst, Address src, Register tmp1, Register tmp_thread) { 465 // 1: non-reference load, no additional barrier is needed 466 if (!is_reference_type(type)) { 467 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 468 return; 469 } 470 471 assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected"); 472 473 // 2: load a reference from src location and apply LRB if needed 474 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { 475 Register result_dst = dst; 476 bool use_tmp1_for_dst = false; 477 478 // Preserve src location for LRB 479 if (dst == src.base() || dst == src.index()) { 480 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() 481 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) { 482 dst = tmp1; 483 use_tmp1_for_dst = true; 484 } else { 485 dst = rdi; 486 __ push(dst); 487 } 488 assert_different_registers(dst, src.base(), src.index()); 489 } 490 491 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 492 493 if (ShenandoahBarrierSet::use_load_reference_barrier_native(decorators, type)) { 494 load_reference_barrier_native(masm, dst, src); 495 } else { 496 load_reference_barrier(masm, dst, src); 497 } 498 499 // Move loaded oop to final destination 500 if (dst != result_dst) { 501 __ movptr(result_dst, dst); 502 503 if (!use_tmp1_for_dst) { 504 __ pop(dst); 505 } 506 507 dst = result_dst; 508 } 509 } else { 510 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 511 } 512 513 // 3: apply keep-alive barrier if needed 514 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { 515 __ push_IU_state(); 516 // That path can be reached from the c2i adapter with live fp 517 // arguments in registers. 518 LP64_ONLY(assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call")); 519 __ subptr(rsp, 64); 520 __ movdbl(Address(rsp, 0), xmm0); 521 __ movdbl(Address(rsp, 8), xmm1); 522 __ movdbl(Address(rsp, 16), xmm2); 523 __ movdbl(Address(rsp, 24), xmm3); 524 __ movdbl(Address(rsp, 32), xmm4); 525 __ movdbl(Address(rsp, 40), xmm5); 526 __ movdbl(Address(rsp, 48), xmm6); 527 __ movdbl(Address(rsp, 56), xmm7); 528 529 Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 530 assert_different_registers(dst, tmp1, tmp_thread); 531 if (!thread->is_valid()) { 532 thread = rdx; 533 } 534 NOT_LP64(__ get_thread(thread)); 535 // Generate the SATB pre-barrier code to log the value of 536 // the referent field in an SATB buffer. 537 shenandoah_write_barrier_pre(masm /* masm */, 538 noreg /* obj */, 539 dst /* pre_val */, 540 thread /* thread */, 541 tmp1 /* tmp */, 542 true /* tosca_live */, 543 true /* expand_call */); 544 __ movdbl(xmm0, Address(rsp, 0)); 545 __ movdbl(xmm1, Address(rsp, 8)); 546 __ movdbl(xmm2, Address(rsp, 16)); 547 __ movdbl(xmm3, Address(rsp, 24)); 548 __ movdbl(xmm4, Address(rsp, 32)); 549 __ movdbl(xmm5, Address(rsp, 40)); 550 __ movdbl(xmm6, Address(rsp, 48)); 551 __ movdbl(xmm7, Address(rsp, 56)); 552 __ addptr(rsp, 64); 553 __ pop_IU_state(); 554 } 555 } 556 557 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 558 Address dst, Register val, Register tmp1, Register tmp2) { 559 560 bool on_oop = is_reference_type(type); 561 bool in_heap = (decorators & IN_HEAP) != 0; 562 bool as_normal = (decorators & AS_NORMAL) != 0; 563 if (on_oop && in_heap) { 564 bool needs_pre_barrier = as_normal; 565 566 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 567 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 568 // flatten object address if needed 569 // We do it regardless of precise because we need the registers 570 if (dst.index() == noreg && dst.disp() == 0) { 571 if (dst.base() != tmp1) { 572 __ movptr(tmp1, dst.base()); 573 } 574 } else { 575 __ lea(tmp1, dst); 576 } 577 578 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 579 580 #ifndef _LP64 581 __ get_thread(rthread); 582 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 583 imasm->save_bcp(); 584 #endif 585 586 if (needs_pre_barrier) { 587 shenandoah_write_barrier_pre(masm /*masm*/, 588 tmp1 /* obj */, 589 tmp2 /* pre_val */, 590 rthread /* thread */, 591 tmp3 /* tmp */, 592 val != noreg /* tosca_live */, 593 false /* expand_call */); 594 } 595 if (val == noreg) { 596 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 597 } else { 598 storeval_barrier(masm, val, tmp3); 599 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 600 } 601 NOT_LP64(imasm->restore_bcp()); 602 } else { 603 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 604 } 605 } 606 607 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 608 Register obj, Register tmp, Label& slowpath) { 609 Label done; 610 // Resolve jobject 611 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 612 613 // Check for null. 614 __ testptr(obj, obj); 615 __ jcc(Assembler::zero, done); 616 617 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 618 __ testb(gc_state, ShenandoahHeap::EVACUATION); 619 __ jccb(Assembler::notZero, slowpath); 620 __ bind(done); 621 } 622 623 // Special Shenandoah CAS implementation that handles false negatives 624 // due to concurrent evacuation. 625 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 626 Register res, Address addr, Register oldval, Register newval, 627 bool exchange, Register tmp1, Register tmp2) { 628 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 629 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 630 assert_different_registers(oldval, newval, tmp1, tmp2); 631 632 Label L_success, L_failure; 633 634 // Remember oldval for retry logic below 635 #ifdef _LP64 636 if (UseCompressedOops) { 637 __ movl(tmp1, oldval); 638 } else 639 #endif 640 { 641 __ movptr(tmp1, oldval); 642 } 643 644 // Step 1. Fast-path. 645 // 646 // Try to CAS with given arguments. If successful, then we are done. 647 648 if (os::is_MP()) __ lock(); 649 #ifdef _LP64 650 if (UseCompressedOops) { 651 __ cmpxchgl(newval, addr); 652 } else 653 #endif 654 { 655 __ cmpxchgptr(newval, addr); 656 } 657 __ jcc(Assembler::equal, L_success); 658 659 // Step 2. CAS had failed. This may be a false negative. 660 // 661 // The trouble comes when we compare the to-space pointer with the from-space 662 // pointer to the same object. To resolve this, it will suffice to resolve 663 // the value from memory -- this will give both to-space pointers. 664 // If they mismatch, then it was a legitimate failure. 665 // 666 // Before reaching to resolve sequence, see if we can avoid the whole shebang 667 // with filters. 668 669 // Filter: when offending in-memory value is NULL, the failure is definitely legitimate 670 __ testptr(oldval, oldval); 671 __ jcc(Assembler::zero, L_failure); 672 673 // Filter: when heap is stable, the failure is definitely legitimate 674 #ifdef _LP64 675 const Register thread = r15_thread; 676 #else 677 const Register thread = tmp2; 678 __ get_thread(thread); 679 #endif 680 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 681 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 682 __ jcc(Assembler::zero, L_failure); 683 684 #ifdef _LP64 685 if (UseCompressedOops) { 686 __ movl(tmp2, oldval); 687 __ decode_heap_oop(tmp2); 688 } else 689 #endif 690 { 691 __ movptr(tmp2, oldval); 692 } 693 694 // Decode offending in-memory value. 695 // Test if-forwarded 696 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value); 697 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded 698 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded 699 700 // Load and mask forwarding pointer 701 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); 702 __ shrptr(tmp2, 2); 703 __ shlptr(tmp2, 2); 704 705 #ifdef _LP64 706 if (UseCompressedOops) { 707 __ decode_heap_oop(tmp1); // decode for comparison 708 } 709 #endif 710 711 // Now we have the forwarded offender in tmp2. 712 // Compare and if they don't match, we have legitimate failure 713 __ cmpptr(tmp1, tmp2); 714 __ jcc(Assembler::notEqual, L_failure); 715 716 // Step 3. Need to fix the memory ptr before continuing. 717 // 718 // At this point, we have from-space oldval in the register, and its to-space 719 // address is in tmp2. Let's try to update it into memory. We don't care if it 720 // succeeds or not. If it does, then the retrying CAS would see it and succeed. 721 // If this fixup fails, this means somebody else beat us to it, and necessarily 722 // with to-space ptr store. We still have to do the retry, because the GC might 723 // have updated the reference for us. 724 725 #ifdef _LP64 726 if (UseCompressedOops) { 727 __ encode_heap_oop(tmp2); // previously decoded at step 2. 728 } 729 #endif 730 731 if (os::is_MP()) __ lock(); 732 #ifdef _LP64 733 if (UseCompressedOops) { 734 __ cmpxchgl(tmp2, addr); 735 } else 736 #endif 737 { 738 __ cmpxchgptr(tmp2, addr); 739 } 740 741 // Step 4. Try to CAS again. 742 // 743 // This is guaranteed not to have false negatives, because oldval is definitely 744 // to-space, and memory pointer is to-space as well. Nothing is able to store 745 // from-space ptr into memory anymore. Make sure oldval is restored, after being 746 // garbled during retries. 747 // 748 #ifdef _LP64 749 if (UseCompressedOops) { 750 __ movl(oldval, tmp2); 751 } else 752 #endif 753 { 754 __ movptr(oldval, tmp2); 755 } 756 757 if (os::is_MP()) __ lock(); 758 #ifdef _LP64 759 if (UseCompressedOops) { 760 __ cmpxchgl(newval, addr); 761 } else 762 #endif 763 { 764 __ cmpxchgptr(newval, addr); 765 } 766 if (!exchange) { 767 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump 768 } 769 770 // Step 5. If we need a boolean result out of CAS, set the flag appropriately. 771 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. 772 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. 773 774 if (exchange) { 775 __ bind(L_failure); 776 __ bind(L_success); 777 } else { 778 assert(res != NULL, "need result register"); 779 780 Label exit; 781 __ bind(L_failure); 782 __ xorptr(res, res); 783 __ jmpb(exit); 784 785 __ bind(L_success); 786 __ movptr(res, 1); 787 __ bind(exit); 788 } 789 } 790 791 #undef __ 792 793 #ifdef COMPILER1 794 795 #define __ ce->masm()-> 796 797 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 798 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 799 // At this point we know that marking is in progress. 800 // If do_load() is true then we have to emit the 801 // load of the previous value; otherwise it has already 802 // been loaded into _pre_val. 803 804 __ bind(*stub->entry()); 805 assert(stub->pre_val()->is_register(), "Precondition."); 806 807 Register pre_val_reg = stub->pre_val()->as_register(); 808 809 if (stub->do_load()) { 810 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 811 } 812 813 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 814 __ jcc(Assembler::equal, *stub->continuation()); 815 ce->store_parameter(stub->pre_val()->as_register(), 0); 816 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 817 __ jmp(*stub->continuation()); 818 819 } 820 821 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 822 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 823 __ bind(*stub->entry()); 824 825 Register obj = stub->obj()->as_register(); 826 Register res = stub->result()->as_register(); 827 Register addr = stub->addr()->as_pointer_register(); 828 Register tmp1 = stub->tmp1()->as_register(); 829 Register tmp2 = stub->tmp2()->as_register(); 830 assert_different_registers(obj, res, addr, tmp1, tmp2); 831 832 Label slow_path; 833 834 assert(res == rax, "result must arrive in rax"); 835 836 if (res != obj) { 837 __ mov(res, obj); 838 } 839 840 // Check for null. 841 __ testptr(res, res); 842 __ jcc(Assembler::zero, *stub->continuation()); 843 844 // Check for object being in the collection set. 845 __ mov(tmp1, res); 846 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 847 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 848 #ifdef _LP64 849 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 850 __ testbool(tmp2); 851 #else 852 // On x86_32, C1 register allocator can give us the register without 8-bit support. 853 // Do the full-register access and test to avoid compilation failures. 854 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 855 __ testptr(tmp2, 0xFF); 856 #endif 857 __ jcc(Assembler::zero, *stub->continuation()); 858 859 __ bind(slow_path); 860 ce->store_parameter(res, 0); 861 ce->store_parameter(addr, 1); 862 if (stub->is_native()) { 863 __ call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin())); 864 } else { 865 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 866 } 867 __ jmp(*stub->continuation()); 868 } 869 870 #undef __ 871 872 #define __ sasm-> 873 874 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 875 __ prologue("shenandoah_pre_barrier", false); 876 // arg0 : previous value of memory 877 878 __ push(rax); 879 __ push(rdx); 880 881 const Register pre_val = rax; 882 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 883 const Register tmp = rdx; 884 885 NOT_LP64(__ get_thread(thread);) 886 887 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 888 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 889 890 Label done; 891 Label runtime; 892 893 // Is SATB still active? 894 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 895 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 896 __ jcc(Assembler::zero, done); 897 898 // Can we store original value in the thread's buffer? 899 900 __ movptr(tmp, queue_index); 901 __ testptr(tmp, tmp); 902 __ jcc(Assembler::zero, runtime); 903 __ subptr(tmp, wordSize); 904 __ movptr(queue_index, tmp); 905 __ addptr(tmp, buffer); 906 907 // prev_val (rax) 908 __ load_parameter(0, pre_val); 909 __ movptr(Address(tmp, 0), pre_val); 910 __ jmp(done); 911 912 __ bind(runtime); 913 914 __ save_live_registers_no_oop_map(true); 915 916 // load the pre-value 917 __ load_parameter(0, rcx); 918 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 919 920 __ restore_live_registers(true); 921 922 __ bind(done); 923 924 __ pop(rdx); 925 __ pop(rax); 926 927 __ epilogue(); 928 } 929 930 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool native) { 931 __ prologue("shenandoah_load_reference_barrier", false); 932 // arg0 : object to be resolved 933 934 __ save_live_registers_no_oop_map(true); 935 936 #ifdef _LP64 937 __ load_parameter(0, c_rarg0); 938 __ load_parameter(1, c_rarg1); 939 if (native) { 940 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), c_rarg0, c_rarg1); 941 } else if (UseCompressedOops) { 942 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1); 943 } else { 944 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1); 945 } 946 #else 947 __ load_parameter(0, rax); 948 __ load_parameter(1, rbx); 949 if (native) { 950 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), rax, rbx); 951 } else { 952 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx); 953 } 954 #endif 955 956 __ restore_live_registers_except_rax(true); 957 958 __ epilogue(); 959 } 960 961 #undef __ 962 963 #endif // COMPILER1 964 965 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 966 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 967 return _shenandoah_lrb; 968 } 969 970 #define __ cgen->assembler()-> 971 972 /* 973 * Incoming parameters: 974 * rax: oop 975 * rsi: load address 976 */ 977 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 978 __ align(CodeEntryAlignment); 979 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 980 address start = __ pc(); 981 982 Label resolve_oop, slow_path; 983 984 // We use RDI, which also serves as argument register for slow call. 985 // RAX always holds the src object ptr, except after the slow call, 986 // then it holds the result. R8/RBX is used as temporary register. 987 988 Register tmp1 = rdi; 989 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 990 991 __ push(tmp1); 992 __ push(tmp2); 993 994 // Check for object being in the collection set. 995 __ mov(tmp1, rax); 996 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 997 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 998 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 999 __ testbool(tmp2); 1000 __ jccb(Assembler::notZero, resolve_oop); 1001 __ pop(tmp2); 1002 __ pop(tmp1); 1003 __ ret(0); 1004 1005 // Test if object is already resolved. 1006 __ bind(resolve_oop); 1007 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 1008 // Test if both lowest bits are set. We trick it by negating the bits 1009 // then test for both bits clear. 1010 __ notptr(tmp2); 1011 __ testb(tmp2, markWord::marked_value); 1012 __ jccb(Assembler::notZero, slow_path); 1013 // Clear both lower bits. It's still inverted, so set them, and then invert back. 1014 __ orptr(tmp2, markWord::marked_value); 1015 __ notptr(tmp2); 1016 // At this point, tmp2 contains the decoded forwarding pointer. 1017 __ mov(rax, tmp2); 1018 1019 __ pop(tmp2); 1020 __ pop(tmp1); 1021 __ ret(0); 1022 1023 __ bind(slow_path); 1024 1025 __ push(rcx); 1026 __ push(rdx); 1027 __ push(rdi); 1028 #ifdef _LP64 1029 __ push(r8); 1030 __ push(r9); 1031 __ push(r10); 1032 __ push(r11); 1033 __ push(r12); 1034 __ push(r13); 1035 __ push(r14); 1036 __ push(r15); 1037 #endif 1038 __ push(rbp); 1039 __ movptr(rbp, rsp); 1040 __ andptr(rsp, -StackAlignmentInBytes); 1041 __ push_FPU_state(); 1042 if (UseCompressedOops) { 1043 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi); 1044 } else { 1045 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi); 1046 } 1047 __ pop_FPU_state(); 1048 __ movptr(rsp, rbp); 1049 __ pop(rbp); 1050 #ifdef _LP64 1051 __ pop(r15); 1052 __ pop(r14); 1053 __ pop(r13); 1054 __ pop(r12); 1055 __ pop(r11); 1056 __ pop(r10); 1057 __ pop(r9); 1058 __ pop(r8); 1059 #endif 1060 __ pop(rdi); 1061 __ pop(rdx); 1062 __ pop(rcx); 1063 1064 __ pop(tmp2); 1065 __ pop(tmp1); 1066 __ ret(0); 1067 1068 return start; 1069 } 1070 1071 #undef __ 1072 1073 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 1074 if (ShenandoahLoadRefBarrier) { 1075 int stub_code_size = 4096; 1076 ResourceMark rm; 1077 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 1078 CodeBuffer buf(bb); 1079 StubCodeGenerator cgen(&buf); 1080 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 1081 } 1082 }