1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSet.hpp" 26 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 27 #include "gc/shenandoah/shenandoahForwarding.hpp" 28 #include "gc/shenandoah/shenandoahHeap.hpp" 29 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 30 #include "gc/shenandoah/shenandoahRuntime.hpp" 31 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 32 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" 33 #include "interpreter/interpreter.hpp" 34 #include "interpreter/interp_masm.hpp" 35 #include "runtime/sharedRuntime.hpp" 36 #include "runtime/thread.hpp" 37 #include "utilities/macros.hpp" 38 #ifdef COMPILER1 39 #include "c1/c1_LIRAssembler.hpp" 40 #include "c1/c1_MacroAssembler.hpp" 41 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 42 #endif 43 44 #define __ masm-> 45 46 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 47 48 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 49 Register src, Register dst, Register count) { 50 51 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 52 53 if (type == T_OBJECT || type == T_ARRAY) { 54 55 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahStoreValEnqueueBarrier || ShenandoahLoadRefBarrier) { 56 #ifdef _LP64 57 Register thread = r15_thread; 58 #else 59 Register thread = rax; 60 if (thread == src || thread == dst || thread == count) { 61 thread = rbx; 62 } 63 if (thread == src || thread == dst || thread == count) { 64 thread = rcx; 65 } 66 if (thread == src || thread == dst || thread == count) { 67 thread = rdx; 68 } 69 __ push(thread); 70 __ get_thread(thread); 71 #endif 72 assert_different_registers(src, dst, count, thread); 73 74 Label done; 75 // Short-circuit if count == 0. 76 __ testptr(count, count); 77 __ jcc(Assembler::zero, done); 78 79 // Avoid runtime call when not active. 80 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 81 int flags; 82 if (ShenandoahSATBBarrier && dest_uninitialized) { 83 flags = ShenandoahHeap::HAS_FORWARDED; 84 } else { 85 flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING; 86 } 87 __ testb(gc_state, flags); 88 __ jcc(Assembler::zero, done); 89 90 __ pusha(); // push registers 91 92 #ifdef _LP64 93 assert(src == rdi, "expected"); 94 assert(dst == rsi, "expected"); 95 assert(count == rdx, "expected"); 96 if (UseCompressedOops) { 97 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), 98 src, dst, count); 99 } else 100 #endif 101 { 102 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), 103 src, dst, count); 104 } 105 106 __ popa(); 107 __ bind(done); 108 NOT_LP64(__ pop(thread);) 109 } 110 } 111 112 } 113 114 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 115 Register obj, 116 Register pre_val, 117 Register thread, 118 Register tmp, 119 bool tosca_live, 120 bool expand_call) { 121 122 if (ShenandoahSATBBarrier) { 123 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 124 } 125 } 126 127 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 128 Register obj, 129 Register pre_val, 130 Register thread, 131 Register tmp, 132 bool tosca_live, 133 bool expand_call) { 134 // If expand_call is true then we expand the call_VM_leaf macro 135 // directly to skip generating the check by 136 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 137 138 #ifdef _LP64 139 assert(thread == r15_thread, "must be"); 140 #endif // _LP64 141 142 Label done; 143 Label runtime; 144 145 assert(pre_val != noreg, "check this code"); 146 147 if (obj != noreg) { 148 assert_different_registers(obj, pre_val, tmp); 149 assert(pre_val != rax, "check this code"); 150 } 151 152 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 153 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 154 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 155 156 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 157 __ testb(gc_state, ShenandoahHeap::MARKING); 158 __ jcc(Assembler::zero, done); 159 160 // Do we need to load the previous value? 161 if (obj != noreg) { 162 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 163 } 164 165 // Is the previous value null? 166 __ cmpptr(pre_val, (int32_t) NULL_WORD); 167 __ jcc(Assembler::equal, done); 168 169 // Can we store original value in the thread's buffer? 170 // Is index == 0? 171 // (The index field is typed as size_t.) 172 173 __ movptr(tmp, index); // tmp := *index_adr 174 __ cmpptr(tmp, 0); // tmp == 0? 175 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 176 177 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 178 __ movptr(index, tmp); // *index_adr := tmp 179 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 180 181 // Record the previous value 182 __ movptr(Address(tmp, 0), pre_val); 183 __ jmp(done); 184 185 __ bind(runtime); 186 // save the live input values 187 if(tosca_live) __ push(rax); 188 189 if (obj != noreg && obj != rax) 190 __ push(obj); 191 192 if (pre_val != rax) 193 __ push(pre_val); 194 195 // Calling the runtime using the regular call_VM_leaf mechanism generates 196 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 197 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 198 // 199 // If we care generating the pre-barrier without a frame (e.g. in the 200 // intrinsified Reference.get() routine) then ebp might be pointing to 201 // the caller frame and so this check will most likely fail at runtime. 202 // 203 // Expanding the call directly bypasses the generation of the check. 204 // So when we do not have have a full interpreter frame on the stack 205 // expand_call should be passed true. 206 207 NOT_LP64( __ push(thread); ) 208 209 #ifdef _LP64 210 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 211 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 212 // Note: this should not accidentally smash thread, because thread is always r15. 213 assert(thread != c_rarg0, "smashed arg"); 214 if (c_rarg0 != pre_val) { 215 __ mov(c_rarg0, pre_val); 216 } 217 #endif 218 219 if (expand_call) { 220 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 221 #ifdef _LP64 222 if (c_rarg1 != thread) { 223 __ mov(c_rarg1, thread); 224 } 225 // Already moved pre_val into c_rarg0 above 226 #else 227 __ push(thread); 228 __ push(pre_val); 229 #endif 230 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 231 } else { 232 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 233 } 234 235 NOT_LP64( __ pop(thread); ) 236 237 // save the live input values 238 if (pre_val != rax) 239 __ pop(pre_val); 240 241 if (obj != noreg && obj != rax) 242 __ pop(obj); 243 244 if(tosca_live) __ pop(rax); 245 246 __ bind(done); 247 } 248 249 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) { 250 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 251 252 Label done; 253 254 #ifdef _LP64 255 Register thread = r15_thread; 256 #else 257 Register thread = rcx; 258 if (thread == dst) { 259 thread = rbx; 260 } 261 __ push(thread); 262 __ get_thread(thread); 263 #endif 264 265 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 266 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 267 __ jccb(Assembler::zero, done); 268 269 // Use rsi for src address 270 const Register src_addr = rsi; 271 // Setup address parameter first, if it does not clobber oop in dst 272 bool need_addr_setup = (src_addr != dst); 273 274 if (need_addr_setup) { 275 __ push(src_addr); 276 __ lea(src_addr, src); 277 278 if (dst != rax) { 279 // Move obj into rax and save rax 280 __ push(rax); 281 __ movptr(rax, dst); 282 } 283 } else { 284 // dst == rsi 285 __ push(rax); 286 __ movptr(rax, dst); 287 288 // we can clobber it, since it is outgoing register 289 __ lea(src_addr, src); 290 } 291 292 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 293 294 if (need_addr_setup) { 295 if (dst != rax) { 296 __ movptr(dst, rax); 297 __ pop(rax); 298 } 299 __ pop(src_addr); 300 } else { 301 __ movptr(dst, rax); 302 __ pop(rax); 303 } 304 305 __ bind(done); 306 307 #ifndef _LP64 308 __ pop(thread); 309 #endif 310 } 311 312 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 313 if (ShenandoahStoreValEnqueueBarrier) { 314 storeval_barrier_impl(masm, dst, tmp); 315 } 316 } 317 318 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 319 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 320 321 if (dst == noreg) return; 322 323 if (ShenandoahStoreValEnqueueBarrier) { 324 // The set of registers to be saved+restored is the same as in the write-barrier above. 325 // Those are the commonly used registers in the interpreter. 326 __ pusha(); 327 // __ push_callee_saved_registers(); 328 __ subptr(rsp, 2 * Interpreter::stackElementSize); 329 __ movdbl(Address(rsp, 0), xmm0); 330 331 #ifdef _LP64 332 Register thread = r15_thread; 333 #else 334 Register thread = rcx; 335 if (thread == dst || thread == tmp) { 336 thread = rdi; 337 } 338 if (thread == dst || thread == tmp) { 339 thread = rbx; 340 } 341 __ get_thread(thread); 342 #endif 343 assert_different_registers(dst, tmp, thread); 344 345 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 346 __ movdbl(xmm0, Address(rsp, 0)); 347 __ addptr(rsp, 2 * Interpreter::stackElementSize); 348 //__ pop_callee_saved_registers(); 349 __ popa(); 350 } 351 } 352 353 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) { 354 if (ShenandoahLoadRefBarrier) { 355 Label done; 356 __ testptr(dst, dst); 357 __ jcc(Assembler::zero, done); 358 load_reference_barrier_not_null(masm, dst, src); 359 __ bind(done); 360 } 361 } 362 363 // 364 // Arguments: 365 // 366 // Inputs: 367 // src: oop location, might be clobbered 368 // tmp1: scratch register, might not be valid. 369 // 370 // Output: 371 // dst: oop loaded from src location 372 // 373 // Kill: 374 // tmp1 (if it is valid) 375 // 376 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 377 Register dst, Address src, Register tmp1, Register tmp_thread) { 378 // 1: non-reference load, no additional barrier is needed 379 if (!is_reference_type(type)) { 380 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 381 return; 382 } 383 384 assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected"); 385 386 // 2: load a reference from src location and apply LRB if needed 387 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { 388 Register result_dst = dst; 389 bool use_tmp1_for_dst = false; 390 391 // Preserve src location for LRB 392 if (dst == src.base() || dst == src.index()) { 393 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() 394 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) { 395 dst = tmp1; 396 use_tmp1_for_dst = true; 397 } else { 398 dst = rdi; 399 __ push(dst); 400 } 401 assert_different_registers(dst, src.base(), src.index()); 402 } 403 404 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 405 406 load_reference_barrier(masm, dst, src); 407 408 // Move loaded oop to final destination 409 if (dst != result_dst) { 410 __ movptr(result_dst, dst); 411 412 if (!use_tmp1_for_dst) { 413 __ pop(dst); 414 } 415 416 dst = result_dst; 417 } 418 } else { 419 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 420 } 421 422 // 3: apply keep-alive barrier if needed 423 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { 424 __ push_IU_state(); 425 // That path can be reached from the c2i adapter with live fp 426 // arguments in registers. 427 LP64_ONLY(assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call")); 428 __ subptr(rsp, 64); 429 __ movdbl(Address(rsp, 0), xmm0); 430 __ movdbl(Address(rsp, 8), xmm1); 431 __ movdbl(Address(rsp, 16), xmm2); 432 __ movdbl(Address(rsp, 24), xmm3); 433 __ movdbl(Address(rsp, 32), xmm4); 434 __ movdbl(Address(rsp, 40), xmm5); 435 __ movdbl(Address(rsp, 48), xmm6); 436 __ movdbl(Address(rsp, 56), xmm7); 437 438 Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 439 assert_different_registers(dst, tmp1, tmp_thread); 440 if (!thread->is_valid()) { 441 thread = rdx; 442 } 443 NOT_LP64(__ get_thread(thread)); 444 // Generate the SATB pre-barrier code to log the value of 445 // the referent field in an SATB buffer. 446 shenandoah_write_barrier_pre(masm /* masm */, 447 noreg /* obj */, 448 dst /* pre_val */, 449 thread /* thread */, 450 tmp1 /* tmp */, 451 true /* tosca_live */, 452 true /* expand_call */); 453 __ movdbl(xmm0, Address(rsp, 0)); 454 __ movdbl(xmm1, Address(rsp, 8)); 455 __ movdbl(xmm2, Address(rsp, 16)); 456 __ movdbl(xmm3, Address(rsp, 24)); 457 __ movdbl(xmm4, Address(rsp, 32)); 458 __ movdbl(xmm5, Address(rsp, 40)); 459 __ movdbl(xmm6, Address(rsp, 48)); 460 __ movdbl(xmm7, Address(rsp, 56)); 461 __ addptr(rsp, 64); 462 __ pop_IU_state(); 463 } 464 } 465 466 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 467 Address dst, Register val, Register tmp1, Register tmp2) { 468 469 bool on_oop = type == T_OBJECT || type == T_ARRAY; 470 bool in_heap = (decorators & IN_HEAP) != 0; 471 bool as_normal = (decorators & AS_NORMAL) != 0; 472 if (on_oop && in_heap) { 473 bool needs_pre_barrier = as_normal; 474 475 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 476 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 477 // flatten object address if needed 478 // We do it regardless of precise because we need the registers 479 if (dst.index() == noreg && dst.disp() == 0) { 480 if (dst.base() != tmp1) { 481 __ movptr(tmp1, dst.base()); 482 } 483 } else { 484 __ lea(tmp1, dst); 485 } 486 487 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 488 489 #ifndef _LP64 490 __ get_thread(rthread); 491 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 492 imasm->save_bcp(); 493 #endif 494 495 if (needs_pre_barrier) { 496 shenandoah_write_barrier_pre(masm /*masm*/, 497 tmp1 /* obj */, 498 tmp2 /* pre_val */, 499 rthread /* thread */, 500 tmp3 /* tmp */, 501 val != noreg /* tosca_live */, 502 false /* expand_call */); 503 } 504 if (val == noreg) { 505 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 506 } else { 507 storeval_barrier(masm, val, tmp3); 508 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 509 } 510 NOT_LP64(imasm->restore_bcp()); 511 } else { 512 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 513 } 514 } 515 516 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 517 Register obj, Register tmp, Label& slowpath) { 518 Label done; 519 // Resolve jobject 520 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 521 522 // Check for null. 523 __ testptr(obj, obj); 524 __ jcc(Assembler::zero, done); 525 526 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 527 __ testb(gc_state, ShenandoahHeap::EVACUATION); 528 __ jccb(Assembler::notZero, slowpath); 529 __ bind(done); 530 } 531 532 // Special Shenandoah CAS implementation that handles false negatives 533 // due to concurrent evacuation. 534 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 535 Register res, Address addr, Register oldval, Register newval, 536 bool exchange, Register tmp1, Register tmp2) { 537 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 538 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 539 assert_different_registers(oldval, newval, tmp1, tmp2); 540 541 Label L_success, L_failure; 542 543 // Remember oldval for retry logic below 544 #ifdef _LP64 545 if (UseCompressedOops) { 546 __ movl(tmp1, oldval); 547 } else 548 #endif 549 { 550 __ movptr(tmp1, oldval); 551 } 552 553 // Step 1. Fast-path. 554 // 555 // Try to CAS with given arguments. If successful, then we are done. 556 557 if (os::is_MP()) __ lock(); 558 #ifdef _LP64 559 if (UseCompressedOops) { 560 __ cmpxchgl(newval, addr); 561 } else 562 #endif 563 { 564 __ cmpxchgptr(newval, addr); 565 } 566 __ jcc(Assembler::equal, L_success); 567 568 // Step 2. CAS had failed. This may be a false negative. 569 // 570 // The trouble comes when we compare the to-space pointer with the from-space 571 // pointer to the same object. To resolve this, it will suffice to resolve 572 // the value from memory -- this will give both to-space pointers. 573 // If they mismatch, then it was a legitimate failure. 574 // 575 // Before reaching to resolve sequence, see if we can avoid the whole shebang 576 // with filters. 577 578 // Filter: when offending in-memory value is NULL, the failure is definitely legitimate 579 __ testptr(oldval, oldval); 580 __ jcc(Assembler::zero, L_failure); 581 582 // Filter: when heap is stable, the failure is definitely legitimate 583 #ifdef _LP64 584 const Register thread = r15_thread; 585 #else 586 const Register thread = tmp2; 587 __ get_thread(thread); 588 #endif 589 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 590 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 591 __ jcc(Assembler::zero, L_failure); 592 593 #ifdef _LP64 594 if (UseCompressedOops) { 595 __ movl(tmp2, oldval); 596 __ decode_heap_oop(tmp2); 597 } else 598 #endif 599 { 600 __ movptr(tmp2, oldval); 601 } 602 603 // Decode offending in-memory value. 604 // Test if-forwarded 605 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markOopDesc::marked_value); 606 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded 607 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded 608 609 // Load and mask forwarding pointer 610 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); 611 __ shrptr(tmp2, 2); 612 __ shlptr(tmp2, 2); 613 614 #ifdef _LP64 615 if (UseCompressedOops) { 616 __ decode_heap_oop(tmp1); // decode for comparison 617 } 618 #endif 619 620 // Now we have the forwarded offender in tmp2. 621 // Compare and if they don't match, we have legitimate failure 622 __ cmpptr(tmp1, tmp2); 623 __ jcc(Assembler::notEqual, L_failure); 624 625 // Step 3. Need to fix the memory ptr before continuing. 626 // 627 // At this point, we have from-space oldval in the register, and its to-space 628 // address is in tmp2. Let's try to update it into memory. We don't care if it 629 // succeeds or not. If it does, then the retrying CAS would see it and succeed. 630 // If this fixup fails, this means somebody else beat us to it, and necessarily 631 // with to-space ptr store. We still have to do the retry, because the GC might 632 // have updated the reference for us. 633 634 #ifdef _LP64 635 if (UseCompressedOops) { 636 __ encode_heap_oop(tmp2); // previously decoded at step 2. 637 } 638 #endif 639 640 if (os::is_MP()) __ lock(); 641 #ifdef _LP64 642 if (UseCompressedOops) { 643 __ cmpxchgl(tmp2, addr); 644 } else 645 #endif 646 { 647 __ cmpxchgptr(tmp2, addr); 648 } 649 650 // Step 4. Try to CAS again. 651 // 652 // This is guaranteed not to have false negatives, because oldval is definitely 653 // to-space, and memory pointer is to-space as well. Nothing is able to store 654 // from-space ptr into memory anymore. Make sure oldval is restored, after being 655 // garbled during retries. 656 // 657 #ifdef _LP64 658 if (UseCompressedOops) { 659 __ movl(oldval, tmp2); 660 } else 661 #endif 662 { 663 __ movptr(oldval, tmp2); 664 } 665 666 if (os::is_MP()) __ lock(); 667 #ifdef _LP64 668 if (UseCompressedOops) { 669 __ cmpxchgl(newval, addr); 670 } else 671 #endif 672 { 673 __ cmpxchgptr(newval, addr); 674 } 675 if (!exchange) { 676 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump 677 } 678 679 // Step 5. If we need a boolean result out of CAS, set the flag appropriately. 680 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. 681 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. 682 683 if (exchange) { 684 __ bind(L_failure); 685 __ bind(L_success); 686 } else { 687 assert(res != NULL, "need result register"); 688 689 Label exit; 690 __ bind(L_failure); 691 __ xorptr(res, res); 692 __ jmpb(exit); 693 694 __ bind(L_success); 695 __ movptr(res, 1); 696 __ bind(exit); 697 } 698 } 699 700 #undef __ 701 702 #ifdef COMPILER1 703 704 #define __ ce->masm()-> 705 706 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 707 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 708 // At this point we know that marking is in progress. 709 // If do_load() is true then we have to emit the 710 // load of the previous value; otherwise it has already 711 // been loaded into _pre_val. 712 713 __ bind(*stub->entry()); 714 assert(stub->pre_val()->is_register(), "Precondition."); 715 716 Register pre_val_reg = stub->pre_val()->as_register(); 717 718 if (stub->do_load()) { 719 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 720 } 721 722 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 723 __ jcc(Assembler::equal, *stub->continuation()); 724 ce->store_parameter(stub->pre_val()->as_register(), 0); 725 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 726 __ jmp(*stub->continuation()); 727 728 } 729 730 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 731 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 732 __ bind(*stub->entry()); 733 734 Register obj = stub->obj()->as_register(); 735 Register res = stub->result()->as_register(); 736 Register addr = stub->addr()->as_pointer_register(); 737 Register tmp1 = stub->tmp1()->as_register(); 738 Register tmp2 = stub->tmp2()->as_register(); 739 assert_different_registers(obj, res, addr, tmp1, tmp2); 740 741 Label slow_path; 742 743 assert(res == rax, "result must arrive in rax"); 744 745 if (res != obj) { 746 __ mov(res, obj); 747 } 748 749 // Check for null. 750 __ testptr(res, res); 751 __ jcc(Assembler::zero, *stub->continuation()); 752 753 // Check for object being in the collection set. 754 __ mov(tmp1, res); 755 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 756 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 757 #ifdef _LP64 758 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 759 __ testbool(tmp2); 760 #else 761 // On x86_32, C1 register allocator can give us the register without 8-bit support. 762 // Do the full-register access and test to avoid compilation failures. 763 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 764 __ testptr(tmp2, 0xFF); 765 #endif 766 __ jcc(Assembler::zero, *stub->continuation()); 767 768 __ bind(slow_path); 769 ce->store_parameter(res, 0); 770 ce->store_parameter(addr, 1); 771 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 772 773 __ jmp(*stub->continuation()); 774 } 775 776 #undef __ 777 778 #define __ sasm-> 779 780 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 781 __ prologue("shenandoah_pre_barrier", false); 782 // arg0 : previous value of memory 783 784 __ push(rax); 785 __ push(rdx); 786 787 const Register pre_val = rax; 788 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 789 const Register tmp = rdx; 790 791 NOT_LP64(__ get_thread(thread);) 792 793 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 794 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 795 796 Label done; 797 Label runtime; 798 799 // Is SATB still active? 800 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 801 __ testb(gc_state, ShenandoahHeap::MARKING); 802 __ jcc(Assembler::zero, done); 803 804 // Can we store original value in the thread's buffer? 805 806 __ movptr(tmp, queue_index); 807 __ testptr(tmp, tmp); 808 __ jcc(Assembler::zero, runtime); 809 __ subptr(tmp, wordSize); 810 __ movptr(queue_index, tmp); 811 __ addptr(tmp, buffer); 812 813 // prev_val (rax) 814 __ load_parameter(0, pre_val); 815 __ movptr(Address(tmp, 0), pre_val); 816 __ jmp(done); 817 818 __ bind(runtime); 819 820 __ save_live_registers_no_oop_map(true); 821 822 // load the pre-value 823 __ load_parameter(0, rcx); 824 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 825 826 __ restore_live_registers(true); 827 828 __ bind(done); 829 830 __ pop(rdx); 831 __ pop(rax); 832 833 __ epilogue(); 834 } 835 836 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { 837 __ prologue("shenandoah_load_reference_barrier", false); 838 // arg0 : object to be resolved 839 840 __ save_live_registers_no_oop_map(true); 841 842 #ifdef _LP64 843 __ load_parameter(0, c_rarg0); 844 __ load_parameter(1, c_rarg1); 845 if (UseCompressedOops) { 846 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1); 847 } else { 848 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1); 849 } 850 #else 851 __ load_parameter(0, rax); 852 __ load_parameter(1, rbx); 853 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx); 854 #endif 855 856 __ restore_live_registers_except_rax(true); 857 858 __ epilogue(); 859 } 860 861 #undef __ 862 863 #endif // COMPILER1 864 865 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 866 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 867 return _shenandoah_lrb; 868 } 869 870 #define __ cgen->assembler()-> 871 872 /* 873 * Incoming parameters: 874 * rax: oop 875 * rsi: load address 876 */ 877 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 878 __ align(CodeEntryAlignment); 879 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 880 address start = __ pc(); 881 882 Label resolve_oop, slow_path; 883 884 // We use RDI, which also serves as argument register for slow call. 885 // RAX always holds the src object ptr, except after the slow call, 886 // then it holds the result. R8/RBX is used as temporary register. 887 888 Register tmp1 = rdi; 889 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 890 891 __ push(tmp1); 892 __ push(tmp2); 893 894 // Check for object being in the collection set. 895 __ mov(tmp1, rax); 896 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 897 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 898 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 899 __ testbool(tmp2); 900 __ jccb(Assembler::notZero, resolve_oop); 901 __ pop(tmp2); 902 __ pop(tmp1); 903 __ ret(0); 904 905 // Test if object is already resolved. 906 __ bind(resolve_oop); 907 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 908 // Test if both lowest bits are set. We trick it by negating the bits 909 // then test for both bits clear. 910 __ notptr(tmp2); 911 __ testb(tmp2, markOopDesc::marked_value); 912 __ jccb(Assembler::notZero, slow_path); 913 // Clear both lower bits. It's still inverted, so set them, and then invert back. 914 __ orptr(tmp2, markOopDesc::marked_value); 915 __ notptr(tmp2); 916 // At this point, tmp2 contains the decoded forwarding pointer. 917 __ mov(rax, tmp2); 918 919 __ pop(tmp2); 920 __ pop(tmp1); 921 __ ret(0); 922 923 __ bind(slow_path); 924 925 __ push(rcx); 926 __ push(rdx); 927 __ push(rdi); 928 #ifdef _LP64 929 __ push(r8); 930 __ push(r9); 931 __ push(r10); 932 __ push(r11); 933 __ push(r12); 934 __ push(r13); 935 __ push(r14); 936 __ push(r15); 937 #endif 938 __ push(rbp); 939 __ movptr(rbp, rsp); 940 __ andptr(rsp, -StackAlignmentInBytes); 941 __ push_FPU_state(); 942 if (UseCompressedOops) { 943 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi); 944 } else { 945 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi); 946 } 947 __ pop_FPU_state(); 948 __ movptr(rsp, rbp); 949 __ pop(rbp); 950 #ifdef _LP64 951 __ pop(r15); 952 __ pop(r14); 953 __ pop(r13); 954 __ pop(r12); 955 __ pop(r11); 956 __ pop(r10); 957 __ pop(r9); 958 __ pop(r8); 959 #endif 960 __ pop(rdi); 961 __ pop(rdx); 962 __ pop(rcx); 963 964 __ pop(tmp2); 965 __ pop(tmp1); 966 __ ret(0); 967 968 return start; 969 } 970 971 #undef __ 972 973 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 974 if (ShenandoahLoadRefBarrier) { 975 int stub_code_size = 4096; 976 ResourceMark rm; 977 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 978 CodeBuffer buf(bb); 979 StubCodeGenerator cgen(&buf); 980 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 981 } 982 }