1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 26 #include "gc/shenandoah/shenandoahForwarding.hpp" 27 #include "gc/shenandoah/shenandoahHeap.hpp" 28 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 29 #include "gc/shenandoah/shenandoahHeuristics.hpp" 30 #include "gc/shenandoah/shenandoahRuntime.hpp" 31 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 32 #include "interpreter/interpreter.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/thread.hpp" 36 #include "utilities/macros.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 41 #endif 42 43 #define __ masm-> 44 45 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 46 47 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 48 Register src, Register dst, Register count) { 49 50 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 51 52 if (type == T_OBJECT || type == T_ARRAY) { 53 54 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) { 55 #ifdef _LP64 56 Register thread = r15_thread; 57 #else 58 Register thread = rax; 59 if (thread == src || thread == dst || thread == count) { 60 thread = rbx; 61 } 62 if (thread == src || thread == dst || thread == count) { 63 thread = rcx; 64 } 65 if (thread == src || thread == dst || thread == count) { 66 thread = rdx; 67 } 68 __ push(thread); 69 __ get_thread(thread); 70 #endif 71 assert_different_registers(src, dst, count, thread); 72 73 Label done; 74 // Short-circuit if count == 0. 75 __ testptr(count, count); 76 __ jcc(Assembler::zero, done); 77 78 // Avoid runtime call when not marking. 79 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 80 int flags = ShenandoahHeap::HAS_FORWARDED; 81 if (!dest_uninitialized) { 82 flags |= ShenandoahHeap::MARKING; 83 } 84 __ testb(gc_state, flags); 85 __ jcc(Assembler::zero, done); 86 87 __ pusha(); // push registers 88 #ifdef _LP64 89 assert(src == rdi, "expected"); 90 assert(dst == rsi, "expected"); 91 assert(count == rdx, "expected"); 92 if (UseCompressedOops) { 93 if (dest_uninitialized) { 94 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count); 95 } else { 96 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count); 97 } 98 } else 99 #endif 100 { 101 if (dest_uninitialized) { 102 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count); 103 } else { 104 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count); 105 } 106 } 107 __ popa(); 108 __ bind(done); 109 NOT_LP64(__ pop(thread);) 110 } 111 } 112 113 } 114 115 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 116 Register obj, 117 Register pre_val, 118 Register thread, 119 Register tmp, 120 bool tosca_live, 121 bool expand_call) { 122 123 if (ShenandoahSATBBarrier) { 124 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 125 } 126 } 127 128 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 129 Register obj, 130 Register pre_val, 131 Register thread, 132 Register tmp, 133 bool tosca_live, 134 bool expand_call) { 135 // If expand_call is true then we expand the call_VM_leaf macro 136 // directly to skip generating the check by 137 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 138 139 #ifdef _LP64 140 assert(thread == r15_thread, "must be"); 141 #endif // _LP64 142 143 Label done; 144 Label runtime; 145 146 assert(pre_val != noreg, "check this code"); 147 148 if (obj != noreg) { 149 assert_different_registers(obj, pre_val, tmp); 150 assert(pre_val != rax, "check this code"); 151 } 152 153 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 154 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 155 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 156 157 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 158 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 159 __ jcc(Assembler::zero, done); 160 161 // Do we need to load the previous value? 162 if (obj != noreg) { 163 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 164 } 165 166 // Is the previous value null? 167 __ cmpptr(pre_val, (int32_t) NULL_WORD); 168 __ jcc(Assembler::equal, done); 169 170 // Can we store original value in the thread's buffer? 171 // Is index == 0? 172 // (The index field is typed as size_t.) 173 174 __ movptr(tmp, index); // tmp := *index_adr 175 __ cmpptr(tmp, 0); // tmp == 0? 176 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 177 178 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 179 __ movptr(index, tmp); // *index_adr := tmp 180 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 181 182 // Record the previous value 183 __ movptr(Address(tmp, 0), pre_val); 184 __ jmp(done); 185 186 __ bind(runtime); 187 // save the live input values 188 if(tosca_live) __ push(rax); 189 190 if (obj != noreg && obj != rax) 191 __ push(obj); 192 193 if (pre_val != rax) 194 __ push(pre_val); 195 196 // Calling the runtime using the regular call_VM_leaf mechanism generates 197 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 198 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 199 // 200 // If we care generating the pre-barrier without a frame (e.g. in the 201 // intrinsified Reference.get() routine) then ebp might be pointing to 202 // the caller frame and so this check will most likely fail at runtime. 203 // 204 // Expanding the call directly bypasses the generation of the check. 205 // So when we do not have have a full interpreter frame on the stack 206 // expand_call should be passed true. 207 208 NOT_LP64( __ push(thread); ) 209 210 #ifdef _LP64 211 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 212 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 213 // Note: this should not accidentally smash thread, because thread is always r15. 214 assert(thread != c_rarg0, "smashed arg"); 215 if (c_rarg0 != pre_val) { 216 __ mov(c_rarg0, pre_val); 217 } 218 #endif 219 220 if (expand_call) { 221 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 222 #ifdef _LP64 223 if (c_rarg1 != thread) { 224 __ mov(c_rarg1, thread); 225 } 226 // Already moved pre_val into c_rarg0 above 227 #else 228 __ push(thread); 229 __ push(pre_val); 230 #endif 231 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 232 } else { 233 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 234 } 235 236 NOT_LP64( __ pop(thread); ) 237 238 // save the live input values 239 if (pre_val != rax) 240 __ pop(pre_val); 241 242 if (obj != noreg && obj != rax) 243 __ pop(obj); 244 245 if(tosca_live) __ pop(rax); 246 247 __ bind(done); 248 } 249 250 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) { 251 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 252 253 Label done; 254 255 #ifdef _LP64 256 Register thread = r15_thread; 257 #else 258 Register thread = rcx; 259 if (thread == dst) { 260 thread = rbx; 261 } 262 __ push(thread); 263 __ get_thread(thread); 264 #endif 265 266 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 267 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 268 __ jccb(Assembler::zero, done); 269 270 // Use rsi for src address 271 const Register src_addr = rsi; 272 // Setup address parameter first, if it does not clobber oop in dst 273 bool need_addr_setup = (src_addr != dst); 274 275 if (need_addr_setup) { 276 __ push(src_addr); 277 __ lea(src_addr, src); 278 279 if (dst != rax) { 280 // Move obj into rax and save rax 281 __ push(rax); 282 __ movptr(rax, dst); 283 } 284 } else { 285 // dst == rsi 286 __ push(rax); 287 __ movptr(rax, dst); 288 289 // we can clobber it, since it is outgoing register 290 __ lea(src_addr, src); 291 } 292 293 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 294 295 if (need_addr_setup) { 296 if (dst != rax) { 297 __ movptr(dst, rax); 298 __ pop(rax); 299 } 300 __ pop(src_addr); 301 } else { 302 __ movptr(dst, rax); 303 __ pop(rax); 304 } 305 306 __ bind(done); 307 308 #ifndef _LP64 309 __ pop(thread); 310 #endif 311 } 312 313 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 314 if (ShenandoahStoreValEnqueueBarrier) { 315 storeval_barrier_impl(masm, dst, tmp); 316 } 317 } 318 319 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 320 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 321 322 if (dst == noreg) return; 323 324 if (ShenandoahStoreValEnqueueBarrier) { 325 // The set of registers to be saved+restored is the same as in the write-barrier above. 326 // Those are the commonly used registers in the interpreter. 327 __ pusha(); 328 // __ push_callee_saved_registers(); 329 __ subptr(rsp, 2 * Interpreter::stackElementSize); 330 __ movdbl(Address(rsp, 0), xmm0); 331 332 #ifdef _LP64 333 Register thread = r15_thread; 334 #else 335 Register thread = rcx; 336 if (thread == dst || thread == tmp) { 337 thread = rdi; 338 } 339 if (thread == dst || thread == tmp) { 340 thread = rbx; 341 } 342 __ get_thread(thread); 343 #endif 344 assert_different_registers(dst, tmp, thread); 345 346 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 347 __ movdbl(xmm0, Address(rsp, 0)); 348 __ addptr(rsp, 2 * Interpreter::stackElementSize); 349 //__ pop_callee_saved_registers(); 350 __ popa(); 351 } 352 } 353 354 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) { 355 if (ShenandoahLoadRefBarrier) { 356 Label done; 357 __ testptr(dst, dst); 358 __ jcc(Assembler::zero, done); 359 load_reference_barrier_not_null(masm, dst, src); 360 __ bind(done); 361 } 362 } 363 364 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 365 Register dst, Address src, Register tmp1, Register tmp_thread) { 366 // 1: non-reference load, no additional barrier is needed 367 if (!is_reference_type(type)) { 368 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 369 return; 370 } 371 372 assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected"); 373 374 // 2: load a reference from src location and apply LRB if needed 375 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { 376 Register result_dst = dst; 377 bool use_tmp1_for_dst = false; 378 379 // Preserve src location for LRB 380 if (dst == src.base() || dst == src.index()) { 381 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() 382 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) { 383 dst = tmp1; 384 use_tmp1_for_dst = true; 385 } else { 386 dst = rdi; 387 __ push(dst); 388 } 389 assert_different_registers(dst, src.base(), src.index()); 390 } 391 392 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 393 394 load_reference_barrier(masm, dst, src); 395 396 // Move loaded oop to final destination 397 if (dst != result_dst) { 398 __ movptr(result_dst, dst); 399 400 if (!use_tmp1_for_dst) { 401 __ pop(dst); 402 } 403 404 dst = result_dst; 405 } 406 } else { 407 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 408 } 409 410 // 3: apply keep-alive barrier if needed 411 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { 412 __ push_IU_state(); 413 Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 414 assert_different_registers(dst, tmp1, tmp_thread); 415 if (!thread->is_valid()) { 416 thread = rdx; 417 } 418 NOT_LP64(__ get_thread(thread)); 419 // Generate the SATB pre-barrier code to log the value of 420 // the referent field in an SATB buffer. 421 shenandoah_write_barrier_pre(masm /* masm */, 422 noreg /* obj */, 423 dst /* pre_val */, 424 thread /* thread */, 425 tmp1 /* tmp */, 426 true /* tosca_live */, 427 true /* expand_call */); 428 __ pop_IU_state(); 429 } 430 } 431 432 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 433 Address dst, Register val, Register tmp1, Register tmp2) { 434 435 bool on_oop = type == T_OBJECT || type == T_ARRAY; 436 bool in_heap = (decorators & IN_HEAP) != 0; 437 bool as_normal = (decorators & AS_NORMAL) != 0; 438 if (on_oop && in_heap) { 439 bool needs_pre_barrier = as_normal; 440 441 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 442 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 443 // flatten object address if needed 444 // We do it regardless of precise because we need the registers 445 if (dst.index() == noreg && dst.disp() == 0) { 446 if (dst.base() != tmp1) { 447 __ movptr(tmp1, dst.base()); 448 } 449 } else { 450 __ lea(tmp1, dst); 451 } 452 453 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 454 455 #ifndef _LP64 456 __ get_thread(rthread); 457 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 458 imasm->save_bcp(); 459 #endif 460 461 if (needs_pre_barrier) { 462 shenandoah_write_barrier_pre(masm /*masm*/, 463 tmp1 /* obj */, 464 tmp2 /* pre_val */, 465 rthread /* thread */, 466 tmp3 /* tmp */, 467 val != noreg /* tosca_live */, 468 false /* expand_call */); 469 } 470 if (val == noreg) { 471 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 472 } else { 473 storeval_barrier(masm, val, tmp3); 474 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 475 } 476 NOT_LP64(imasm->restore_bcp()); 477 } else { 478 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 479 } 480 } 481 482 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 483 Register obj, Register tmp, Label& slowpath) { 484 Label done; 485 // Resolve jobject 486 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 487 488 // Check for null. 489 __ testptr(obj, obj); 490 __ jcc(Assembler::zero, done); 491 492 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 493 __ testb(gc_state, ShenandoahHeap::EVACUATION); 494 __ jccb(Assembler::notZero, slowpath); 495 __ bind(done); 496 } 497 498 // Special Shenandoah CAS implementation that handles false negatives 499 // due to concurrent evacuation. 500 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 501 Register res, Address addr, Register oldval, Register newval, 502 bool exchange, Register tmp1, Register tmp2) { 503 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 504 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 505 assert_different_registers(oldval, newval, tmp1, tmp2); 506 507 Label L_success, L_failure; 508 509 // Remember oldval for retry logic below 510 #ifdef _LP64 511 if (UseCompressedOops) { 512 __ movl(tmp1, oldval); 513 } else 514 #endif 515 { 516 __ movptr(tmp1, oldval); 517 } 518 519 // Step 1. Fast-path. 520 // 521 // Try to CAS with given arguments. If successful, then we are done. 522 523 if (os::is_MP()) __ lock(); 524 #ifdef _LP64 525 if (UseCompressedOops) { 526 __ cmpxchgl(newval, addr); 527 } else 528 #endif 529 { 530 __ cmpxchgptr(newval, addr); 531 } 532 __ jcc(Assembler::equal, L_success); 533 534 // Step 2. CAS had failed. This may be a false negative. 535 // 536 // The trouble comes when we compare the to-space pointer with the from-space 537 // pointer to the same object. To resolve this, it will suffice to resolve 538 // the value from memory -- this will give both to-space pointers. 539 // If they mismatch, then it was a legitimate failure. 540 // 541 // Before reaching to resolve sequence, see if we can avoid the whole shebang 542 // with filters. 543 544 // Filter: when offending in-memory value is NULL, the failure is definitely legitimate 545 __ testptr(oldval, oldval); 546 __ jcc(Assembler::zero, L_failure); 547 548 // Filter: when heap is stable, the failure is definitely legitimate 549 #ifdef _LP64 550 const Register thread = r15_thread; 551 #else 552 const Register thread = tmp2; 553 __ get_thread(thread); 554 #endif 555 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 556 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 557 __ jcc(Assembler::zero, L_failure); 558 559 #ifdef _LP64 560 if (UseCompressedOops) { 561 __ movl(tmp2, oldval); 562 __ decode_heap_oop(tmp2); 563 } else 564 #endif 565 { 566 __ movptr(tmp2, oldval); 567 } 568 569 // Decode offending in-memory value. 570 // Test if-forwarded 571 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markOopDesc::marked_value); 572 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded 573 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded 574 575 // Load and mask forwarding pointer 576 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); 577 __ shrptr(tmp2, 2); 578 __ shlptr(tmp2, 2); 579 580 #ifdef _LP64 581 if (UseCompressedOops) { 582 __ decode_heap_oop(tmp1); // decode for comparison 583 } 584 #endif 585 586 // Now we have the forwarded offender in tmp2. 587 // Compare and if they don't match, we have legitimate failure 588 __ cmpptr(tmp1, tmp2); 589 __ jcc(Assembler::notEqual, L_failure); 590 591 // Step 3. Need to fix the memory ptr before continuing. 592 // 593 // At this point, we have from-space oldval in the register, and its to-space 594 // address is in tmp2. Let's try to update it into memory. We don't care if it 595 // succeeds or not. If it does, then the retrying CAS would see it and succeed. 596 // If this fixup fails, this means somebody else beat us to it, and necessarily 597 // with to-space ptr store. We still have to do the retry, because the GC might 598 // have updated the reference for us. 599 600 #ifdef _LP64 601 if (UseCompressedOops) { 602 __ encode_heap_oop(tmp2); // previously decoded at step 2. 603 } 604 #endif 605 606 if (os::is_MP()) __ lock(); 607 #ifdef _LP64 608 if (UseCompressedOops) { 609 __ cmpxchgl(tmp2, addr); 610 } else 611 #endif 612 { 613 __ cmpxchgptr(tmp2, addr); 614 } 615 616 // Step 4. Try to CAS again. 617 // 618 // This is guaranteed not to have false negatives, because oldval is definitely 619 // to-space, and memory pointer is to-space as well. Nothing is able to store 620 // from-space ptr into memory anymore. Make sure oldval is restored, after being 621 // garbled during retries. 622 // 623 #ifdef _LP64 624 if (UseCompressedOops) { 625 __ movl(oldval, tmp2); 626 } else 627 #endif 628 { 629 __ movptr(oldval, tmp2); 630 } 631 632 if (os::is_MP()) __ lock(); 633 #ifdef _LP64 634 if (UseCompressedOops) { 635 __ cmpxchgl(newval, addr); 636 } else 637 #endif 638 { 639 __ cmpxchgptr(newval, addr); 640 } 641 if (!exchange) { 642 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump 643 } 644 645 // Step 5. If we need a boolean result out of CAS, set the flag appropriately. 646 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. 647 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. 648 649 if (exchange) { 650 __ bind(L_failure); 651 __ bind(L_success); 652 } else { 653 assert(res != NULL, "need result register"); 654 655 Label exit; 656 __ bind(L_failure); 657 __ xorptr(res, res); 658 __ jmpb(exit); 659 660 __ bind(L_success); 661 __ movptr(res, 1); 662 __ bind(exit); 663 } 664 } 665 666 #undef __ 667 668 #ifdef COMPILER1 669 670 #define __ ce->masm()-> 671 672 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 673 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 674 // At this point we know that marking is in progress. 675 // If do_load() is true then we have to emit the 676 // load of the previous value; otherwise it has already 677 // been loaded into _pre_val. 678 679 __ bind(*stub->entry()); 680 assert(stub->pre_val()->is_register(), "Precondition."); 681 682 Register pre_val_reg = stub->pre_val()->as_register(); 683 684 if (stub->do_load()) { 685 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 686 } 687 688 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 689 __ jcc(Assembler::equal, *stub->continuation()); 690 ce->store_parameter(stub->pre_val()->as_register(), 0); 691 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 692 __ jmp(*stub->continuation()); 693 694 } 695 696 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 697 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 698 __ bind(*stub->entry()); 699 700 Register obj = stub->obj()->as_register(); 701 Register res = stub->result()->as_register(); 702 Register addr = stub->addr()->as_register(); 703 Register tmp1 = stub->tmp1()->as_register(); 704 Register tmp2 = stub->tmp2()->as_register(); 705 assert_different_registers(obj, res, addr, tmp1, tmp2); 706 707 Label slow_path; 708 709 assert(res == rax, "result must arrive in rax"); 710 711 if (res != obj) { 712 __ mov(res, obj); 713 } 714 715 // Check for null. 716 __ testptr(res, res); 717 __ jcc(Assembler::zero, *stub->continuation()); 718 719 // Check for object being in the collection set. 720 __ mov(tmp1, res); 721 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 722 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 723 #ifdef _LP64 724 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 725 __ testbool(tmp2); 726 #else 727 // On x86_32, C1 register allocator can give us the register without 8-bit support. 728 // Do the full-register access and test to avoid compilation failures. 729 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 730 __ testptr(tmp2, 0xFF); 731 #endif 732 __ jcc(Assembler::zero, *stub->continuation()); 733 734 __ bind(slow_path); 735 ce->store_parameter(res, 0); 736 ce->store_parameter(addr, 1); 737 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 738 739 __ jmp(*stub->continuation()); 740 } 741 742 #undef __ 743 744 #define __ sasm-> 745 746 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 747 __ prologue("shenandoah_pre_barrier", false); 748 // arg0 : previous value of memory 749 750 __ push(rax); 751 __ push(rdx); 752 753 const Register pre_val = rax; 754 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 755 const Register tmp = rdx; 756 757 NOT_LP64(__ get_thread(thread);) 758 759 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 760 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 761 762 Label done; 763 Label runtime; 764 765 // Is SATB still active? 766 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 767 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 768 __ jcc(Assembler::zero, done); 769 770 // Can we store original value in the thread's buffer? 771 772 __ movptr(tmp, queue_index); 773 __ testptr(tmp, tmp); 774 __ jcc(Assembler::zero, runtime); 775 __ subptr(tmp, wordSize); 776 __ movptr(queue_index, tmp); 777 __ addptr(tmp, buffer); 778 779 // prev_val (rax) 780 __ load_parameter(0, pre_val); 781 __ movptr(Address(tmp, 0), pre_val); 782 __ jmp(done); 783 784 __ bind(runtime); 785 786 __ save_live_registers_no_oop_map(true); 787 788 // load the pre-value 789 __ load_parameter(0, rcx); 790 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 791 792 __ restore_live_registers(true); 793 794 __ bind(done); 795 796 __ pop(rdx); 797 __ pop(rax); 798 799 __ epilogue(); 800 } 801 802 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { 803 __ prologue("shenandoah_load_reference_barrier", false); 804 // arg0 : object to be resolved 805 806 __ save_live_registers_no_oop_map(true); 807 808 #ifdef _LP64 809 __ load_parameter(0, c_rarg0); 810 __ load_parameter(1, c_rarg1); 811 if (UseCompressedOops) { 812 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1); 813 } else { 814 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1); 815 } 816 #else 817 __ load_parameter(0, rax); 818 __ load_parameter(1, rbx); 819 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx); 820 #endif 821 822 __ restore_live_registers_except_rax(true); 823 824 __ epilogue(); 825 } 826 827 #undef __ 828 829 #endif // COMPILER1 830 831 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 832 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 833 return _shenandoah_lrb; 834 } 835 836 #define __ cgen->assembler()-> 837 838 /* 839 * Incoming parameters: 840 * rax: oop 841 * rsi: load address 842 */ 843 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 844 __ align(CodeEntryAlignment); 845 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 846 address start = __ pc(); 847 848 Label resolve_oop, slow_path; 849 850 // We use RDI, which also serves as argument register for slow call. 851 // RAX always holds the src object ptr, except after the slow call, 852 // then it holds the result. R8/RBX is used as temporary register. 853 854 Register tmp1 = rdi; 855 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 856 857 __ push(tmp1); 858 __ push(tmp2); 859 860 // Check for object being in the collection set. 861 __ mov(tmp1, rax); 862 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 863 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 864 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 865 __ testbool(tmp2); 866 __ jccb(Assembler::notZero, resolve_oop); 867 __ pop(tmp2); 868 __ pop(tmp1); 869 __ ret(0); 870 871 // Test if object is already resolved. 872 __ bind(resolve_oop); 873 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 874 // Test if both lowest bits are set. We trick it by negating the bits 875 // then test for both bits clear. 876 __ notptr(tmp2); 877 __ testb(tmp2, markOopDesc::marked_value); 878 __ jccb(Assembler::notZero, slow_path); 879 // Clear both lower bits. It's still inverted, so set them, and then invert back. 880 __ orptr(tmp2, markOopDesc::marked_value); 881 __ notptr(tmp2); 882 // At this point, tmp2 contains the decoded forwarding pointer. 883 __ mov(rax, tmp2); 884 885 __ pop(tmp2); 886 __ pop(tmp1); 887 __ ret(0); 888 889 __ bind(slow_path); 890 891 __ push(rcx); 892 __ push(rdx); 893 __ push(rdi); 894 #ifdef _LP64 895 __ push(r8); 896 __ push(r9); 897 __ push(r10); 898 __ push(r11); 899 __ push(r12); 900 __ push(r13); 901 __ push(r14); 902 __ push(r15); 903 #endif 904 __ push(rbp); 905 __ movptr(rbp, rsp); 906 __ andptr(rsp, -StackAlignmentInBytes); 907 __ push_FPU_state(); 908 if (UseCompressedOops) { 909 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi); 910 } else { 911 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi); 912 } 913 __ pop_FPU_state(); 914 __ movptr(rsp, rbp); 915 __ pop(rbp); 916 #ifdef _LP64 917 __ pop(r15); 918 __ pop(r14); 919 __ pop(r13); 920 __ pop(r12); 921 __ pop(r11); 922 __ pop(r10); 923 __ pop(r9); 924 __ pop(r8); 925 #endif 926 __ pop(rdi); 927 __ pop(rdx); 928 __ pop(rcx); 929 930 __ pop(tmp2); 931 __ pop(tmp1); 932 __ ret(0); 933 934 return start; 935 } 936 937 #undef __ 938 939 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 940 if (ShenandoahLoadRefBarrier) { 941 int stub_code_size = 4096; 942 ResourceMark rm; 943 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 944 CodeBuffer buf(bb); 945 StubCodeGenerator cgen(&buf); 946 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 947 } 948 }