1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 26 #include "gc/shenandoah/shenandoahForwarding.hpp" 27 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 28 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 29 #include "gc/shenandoah/shenandoahHeuristics.hpp" 30 #include "gc/shenandoah/shenandoahRuntime.hpp" 31 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 32 #include "interpreter/interpreter.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/thread.hpp" 36 #include "utilities/macros.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 41 #endif 42 43 #define __ masm-> 44 45 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 46 47 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 48 Register src, Register dst, Register count) { 49 50 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 51 52 if (type == T_OBJECT || type == T_ARRAY) { 53 54 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) { 55 #ifdef _LP64 56 Register thread = r15_thread; 57 #else 58 Register thread = rax; 59 if (thread == src || thread == dst || thread == count) { 60 thread = rbx; 61 } 62 if (thread == src || thread == dst || thread == count) { 63 thread = rcx; 64 } 65 if (thread == src || thread == dst || thread == count) { 66 thread = rdx; 67 } 68 __ push(thread); 69 __ get_thread(thread); 70 #endif 71 assert_different_registers(src, dst, count, thread); 72 73 Label done; 74 // Short-circuit if count == 0. 75 __ testptr(count, count); 76 __ jcc(Assembler::zero, done); 77 78 // Avoid runtime call when not marking. 79 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 80 int flags = ShenandoahHeap::HAS_FORWARDED; 81 if (!dest_uninitialized) { 82 flags |= ShenandoahHeap::MARKING; 83 } 84 __ testb(gc_state, flags); 85 __ jcc(Assembler::zero, done); 86 87 __ pusha(); // push registers 88 #ifdef _LP64 89 assert(src == rdi, "expected"); 90 assert(dst == rsi, "expected"); 91 assert(count == rdx, "expected"); 92 if (UseCompressedOops) { 93 if (dest_uninitialized) { 94 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count); 95 } else { 96 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count); 97 } 98 } else { 99 if (dest_uninitialized) { 100 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count); 101 } else { 102 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count); 103 } 104 } 105 #else 106 if (dest_uninitialized) { 107 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), 108 src, dst, count); 109 } else { 110 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), 111 src, dst, count); 112 } 113 #endif 114 __ popa(); 115 __ bind(done); 116 NOT_LP64(__ pop(thread);) 117 } 118 } 119 120 } 121 122 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 123 Register obj, 124 Register pre_val, 125 Register thread, 126 Register tmp, 127 bool tosca_live, 128 bool expand_call) { 129 130 if (ShenandoahSATBBarrier) { 131 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 132 } 133 } 134 135 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 136 Register obj, 137 Register pre_val, 138 Register thread, 139 Register tmp, 140 bool tosca_live, 141 bool expand_call) { 142 // If expand_call is true then we expand the call_VM_leaf macro 143 // directly to skip generating the check by 144 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 145 146 #ifdef _LP64 147 assert(thread == r15_thread, "must be"); 148 #endif // _LP64 149 150 Label done; 151 Label runtime; 152 153 assert(pre_val != noreg, "check this code"); 154 155 if (obj != noreg) { 156 assert_different_registers(obj, pre_val, tmp); 157 assert(pre_val != rax, "check this code"); 158 } 159 160 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 161 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 162 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 163 164 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 165 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 166 __ jcc(Assembler::zero, done); 167 168 // Do we need to load the previous value? 169 if (obj != noreg) { 170 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 171 } 172 173 // Is the previous value null? 174 __ cmpptr(pre_val, (int32_t) NULL_WORD); 175 __ jcc(Assembler::equal, done); 176 177 // Can we store original value in the thread's buffer? 178 // Is index == 0? 179 // (The index field is typed as size_t.) 180 181 __ movptr(tmp, index); // tmp := *index_adr 182 __ cmpptr(tmp, 0); // tmp == 0? 183 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 184 185 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 186 __ movptr(index, tmp); // *index_adr := tmp 187 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 188 189 // Record the previous value 190 __ movptr(Address(tmp, 0), pre_val); 191 __ jmp(done); 192 193 __ bind(runtime); 194 // save the live input values 195 if(tosca_live) __ push(rax); 196 197 if (obj != noreg && obj != rax) 198 __ push(obj); 199 200 if (pre_val != rax) 201 __ push(pre_val); 202 203 // Calling the runtime using the regular call_VM_leaf mechanism generates 204 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 205 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 206 // 207 // If we care generating the pre-barrier without a frame (e.g. in the 208 // intrinsified Reference.get() routine) then ebp might be pointing to 209 // the caller frame and so this check will most likely fail at runtime. 210 // 211 // Expanding the call directly bypasses the generation of the check. 212 // So when we do not have have a full interpreter frame on the stack 213 // expand_call should be passed true. 214 215 NOT_LP64( __ push(thread); ) 216 217 #ifdef _LP64 218 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 219 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 220 // Note: this should not accidentally smash thread, because thread is always r15. 221 assert(thread != c_rarg0, "smashed arg"); 222 if (c_rarg0 != pre_val) { 223 __ mov(c_rarg0, pre_val); 224 } 225 #endif 226 227 if (expand_call) { 228 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 229 #ifdef _LP64 230 if (c_rarg1 != thread) { 231 __ mov(c_rarg1, thread); 232 } 233 // Already moved pre_val into c_rarg0 above 234 #else 235 __ push(thread); 236 __ push(pre_val); 237 #endif 238 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 239 } else { 240 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 241 } 242 243 NOT_LP64( __ pop(thread); ) 244 245 // save the live input values 246 if (pre_val != rax) 247 __ pop(pre_val); 248 249 if (obj != noreg && obj != rax) 250 __ pop(obj); 251 252 if(tosca_live) __ pop(rax); 253 254 __ bind(done); 255 } 256 257 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { 258 assert(ShenandoahCASBarrier, "should be enabled"); 259 Label is_null; 260 __ testptr(dst, dst); 261 __ jcc(Assembler::zero, is_null); 262 resolve_forward_pointer_not_null(masm, dst, tmp); 263 __ bind(is_null); 264 } 265 266 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { 267 assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled"); 268 // The below loads the mark word, checks if the lowest two bits are 269 // set, and if so, clear the lowest two bits and copy the result 270 // to dst. Otherwise it leaves dst alone. 271 // Implementing this is surprisingly awkward. I do it here by: 272 // - Inverting the mark word 273 // - Test lowest two bits == 0 274 // - If so, set the lowest two bits 275 // - Invert the result back, and copy to dst 276 277 bool borrow_reg = (tmp == noreg); 278 if (borrow_reg) { 279 // No free registers available. Make one useful. 280 tmp = LP64_ONLY(rscratch1) NOT_LP64(rdx); 281 __ push(tmp); 282 } 283 284 Label done; 285 __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); 286 __ notptr(tmp); 287 __ testb(tmp, markWord::marked_value); 288 __ jccb(Assembler::notZero, done); 289 __ orptr(tmp, markWord::marked_value); 290 __ notptr(tmp); 291 __ mov(dst, tmp); 292 __ bind(done); 293 294 if (borrow_reg) { 295 __ pop(tmp); 296 } 297 } 298 299 300 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) { 301 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 302 303 Label done; 304 305 #ifdef _LP64 306 Register thread = r15_thread; 307 #else 308 Register thread = rcx; 309 if (thread == dst) { 310 thread = rbx; 311 } 312 __ push(thread); 313 __ get_thread(thread); 314 #endif 315 assert_different_registers(dst, thread); 316 317 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 318 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 319 __ jccb(Assembler::zero, done); 320 321 if (dst != rax) { 322 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. 323 } 324 325 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 326 327 if (dst != rax) { 328 __ xchgptr(rax, dst); // Swap back obj with rax. 329 } 330 331 __ bind(done); 332 333 #ifndef _LP64 334 __ pop(thread); 335 #endif 336 } 337 338 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst) { 339 if (!ShenandoahLoadRefBarrier) { 340 return; 341 } 342 343 Label done; 344 Label not_null; 345 Label slow_path; 346 347 // null check 348 __ testptr(dst, dst); 349 __ jcc(Assembler::notZero, not_null); 350 __ jmp(done); 351 __ bind(not_null); 352 353 354 #ifdef _LP64 355 Register thread = r15_thread; 356 #else 357 Register thread = rcx; 358 if (thread == dst) { 359 thread = rbx; 360 } 361 __ push(thread); 362 __ get_thread(thread); 363 #endif 364 assert_different_registers(dst, thread); 365 366 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 367 __ testb(gc_state, ShenandoahHeap::EVACUATION); 368 #ifndef _LP64 369 __ pop(thread); 370 #endif 371 __ jccb(Assembler::notZero, slow_path); 372 __ jmp(done); 373 __ bind(slow_path); 374 375 if (dst != rax) { 376 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. 377 } 378 __ push(rcx); 379 __ push(rdx); 380 __ push(rdi); 381 __ push(rsi); 382 #ifdef _LP64 383 __ push(r8); 384 __ push(r9); 385 __ push(r10); 386 __ push(r11); 387 __ push(r12); 388 __ push(r13); 389 __ push(r14); 390 __ push(r15); 391 #endif 392 393 __ movptr(rdi, rax); 394 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), rdi); 395 396 #ifdef _LP64 397 __ pop(r15); 398 __ pop(r14); 399 __ pop(r13); 400 __ pop(r12); 401 __ pop(r11); 402 __ pop(r10); 403 __ pop(r9); 404 __ pop(r8); 405 #endif 406 __ pop(rsi); 407 __ pop(rdi); 408 __ pop(rdx); 409 __ pop(rcx); 410 411 if (dst != rax) { 412 __ xchgptr(rax, dst); // Swap back obj with rax. 413 } 414 415 __ bind(done); 416 } 417 418 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 419 if (ShenandoahStoreValEnqueueBarrier) { 420 storeval_barrier_impl(masm, dst, tmp); 421 } 422 } 423 424 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 425 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 426 427 if (dst == noreg) return; 428 429 if (ShenandoahStoreValEnqueueBarrier) { 430 // The set of registers to be saved+restored is the same as in the write-barrier above. 431 // Those are the commonly used registers in the interpreter. 432 __ pusha(); 433 // __ push_callee_saved_registers(); 434 __ subptr(rsp, 2 * Interpreter::stackElementSize); 435 __ movdbl(Address(rsp, 0), xmm0); 436 437 #ifdef _LP64 438 Register thread = r15_thread; 439 #else 440 Register thread = rcx; 441 if (thread == dst || thread == tmp) { 442 thread = rdi; 443 } 444 if (thread == dst || thread == tmp) { 445 thread = rbx; 446 } 447 __ get_thread(thread); 448 #endif 449 assert_different_registers(dst, tmp, thread); 450 451 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 452 __ movdbl(xmm0, Address(rsp, 0)); 453 __ addptr(rsp, 2 * Interpreter::stackElementSize); 454 //__ pop_callee_saved_registers(); 455 __ popa(); 456 } 457 } 458 459 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst) { 460 if (ShenandoahLoadRefBarrier) { 461 Label done; 462 __ testptr(dst, dst); 463 __ jcc(Assembler::zero, done); 464 load_reference_barrier_not_null(masm, dst); 465 __ bind(done); 466 } 467 } 468 469 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 470 Register dst, Address src, Register tmp1, Register tmp_thread) { 471 bool on_oop = type == T_OBJECT || type == T_ARRAY; 472 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 473 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 474 bool not_in_heap = (decorators & IN_NATIVE) != 0; 475 bool on_reference = on_weak || on_phantom; 476 bool keep_alive = (decorators & AS_NO_KEEPALIVE) == 0; 477 478 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 479 if (on_oop) { 480 if (not_in_heap) { 481 if (ShenandoahHeap::heap()->is_traversal_mode()) { 482 load_reference_barrier(masm, dst); 483 keep_alive = true; 484 } else { 485 load_reference_barrier_native(masm, dst); 486 } 487 } else { 488 load_reference_barrier(masm, dst); 489 } 490 491 if (ShenandoahKeepAliveBarrier && on_reference && keep_alive) { 492 const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 493 assert_different_registers(dst, tmp1, tmp_thread); 494 NOT_LP64(__ get_thread(thread)); 495 // Generate the SATB pre-barrier code to log the value of 496 // the referent field in an SATB buffer. 497 shenandoah_write_barrier_pre(masm /* masm */, 498 noreg /* obj */, 499 dst /* pre_val */, 500 thread /* thread */, 501 tmp1 /* tmp */, 502 true /* tosca_live */, 503 true /* expand_call */); 504 } 505 } 506 } 507 508 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 509 Address dst, Register val, Register tmp1, Register tmp2) { 510 511 bool on_oop = type == T_OBJECT || type == T_ARRAY; 512 bool in_heap = (decorators & IN_HEAP) != 0; 513 bool as_normal = (decorators & AS_NORMAL) != 0; 514 if (on_oop && in_heap) { 515 bool needs_pre_barrier = as_normal; 516 517 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 518 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 519 // flatten object address if needed 520 // We do it regardless of precise because we need the registers 521 if (dst.index() == noreg && dst.disp() == 0) { 522 if (dst.base() != tmp1) { 523 __ movptr(tmp1, dst.base()); 524 } 525 } else { 526 __ lea(tmp1, dst); 527 } 528 529 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 530 531 #ifndef _LP64 532 __ get_thread(rthread); 533 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 534 imasm->save_bcp(); 535 #endif 536 537 if (needs_pre_barrier) { 538 shenandoah_write_barrier_pre(masm /*masm*/, 539 tmp1 /* obj */, 540 tmp2 /* pre_val */, 541 rthread /* thread */, 542 tmp3 /* tmp */, 543 val != noreg /* tosca_live */, 544 false /* expand_call */); 545 } 546 if (val == noreg) { 547 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 548 } else { 549 storeval_barrier(masm, val, tmp3); 550 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 551 } 552 NOT_LP64(imasm->restore_bcp()); 553 } else { 554 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 555 } 556 } 557 558 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 559 Register obj, Register tmp, Label& slowpath) { 560 Label done; 561 // Resolve jobject 562 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 563 564 // Check for null. 565 __ testptr(obj, obj); 566 __ jcc(Assembler::zero, done); 567 568 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 569 __ testb(gc_state, ShenandoahHeap::EVACUATION); 570 __ jccb(Assembler::notZero, slowpath); 571 __ bind(done); 572 } 573 574 // Special Shenandoah CAS implementation that handles false negatives 575 // due to concurrent evacuation. 576 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 577 Register res, Address addr, Register oldval, Register newval, 578 bool exchange, Register tmp1, Register tmp2) { 579 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 580 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 581 582 Label retry, done; 583 584 // Remember oldval for retry logic below 585 #ifdef _LP64 586 if (UseCompressedOops) { 587 __ movl(tmp1, oldval); 588 } else 589 #endif 590 { 591 __ movptr(tmp1, oldval); 592 } 593 594 // Step 1. Try to CAS with given arguments. If successful, then we are done, 595 // and can safely return. 596 if (os::is_MP()) __ lock(); 597 #ifdef _LP64 598 if (UseCompressedOops) { 599 __ cmpxchgl(newval, addr); 600 } else 601 #endif 602 { 603 __ cmpxchgptr(newval, addr); 604 } 605 __ jcc(Assembler::equal, done, true); 606 607 // Step 2. CAS had failed. This may be a false negative. 608 // 609 // The trouble comes when we compare the to-space pointer with the from-space 610 // pointer to the same object. To resolve this, it will suffice to resolve both 611 // oldval and the value from memory -- this will give both to-space pointers. 612 // If they mismatch, then it was a legitimate failure. 613 // 614 #ifdef _LP64 615 if (UseCompressedOops) { 616 __ decode_heap_oop(tmp1); 617 } 618 #endif 619 resolve_forward_pointer(masm, tmp1); 620 621 #ifdef _LP64 622 if (UseCompressedOops) { 623 __ movl(tmp2, oldval); 624 __ decode_heap_oop(tmp2); 625 } else 626 #endif 627 { 628 __ movptr(tmp2, oldval); 629 } 630 resolve_forward_pointer(masm, tmp2); 631 632 __ cmpptr(tmp1, tmp2); 633 __ jcc(Assembler::notEqual, done, true); 634 635 // Step 3. Try to CAS again with resolved to-space pointers. 636 // 637 // Corner case: it may happen that somebody stored the from-space pointer 638 // to memory while we were preparing for retry. Therefore, we can fail again 639 // on retry, and so need to do this in loop, always resolving the failure 640 // witness. 641 __ bind(retry); 642 if (os::is_MP()) __ lock(); 643 #ifdef _LP64 644 if (UseCompressedOops) { 645 __ cmpxchgl(newval, addr); 646 } else 647 #endif 648 { 649 __ cmpxchgptr(newval, addr); 650 } 651 __ jcc(Assembler::equal, done, true); 652 653 #ifdef _LP64 654 if (UseCompressedOops) { 655 __ movl(tmp2, oldval); 656 __ decode_heap_oop(tmp2); 657 } else 658 #endif 659 { 660 __ movptr(tmp2, oldval); 661 } 662 resolve_forward_pointer(masm, tmp2); 663 664 __ cmpptr(tmp1, tmp2); 665 __ jcc(Assembler::equal, retry, true); 666 667 // Step 4. If we need a boolean result out of CAS, check the flag again, 668 // and promote the result. Note that we handle the flag from both the CAS 669 // itself and from the retry loop. 670 __ bind(done); 671 if (!exchange) { 672 assert(res != NULL, "need result register"); 673 #ifdef _LP64 674 __ setb(Assembler::equal, res); 675 __ movzbl(res, res); 676 #else 677 // Need something else to clean the result, because some registers 678 // do not have byte encoding that movzbl wants. Cannot do the xor first, 679 // because it modifies the flags. 680 Label res_non_zero; 681 __ movptr(res, 1); 682 __ jcc(Assembler::equal, res_non_zero, true); 683 __ xorptr(res, res); 684 __ bind(res_non_zero); 685 #endif 686 } 687 } 688 689 #undef __ 690 691 #ifdef COMPILER1 692 693 #define __ ce->masm()-> 694 695 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 696 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 697 // At this point we know that marking is in progress. 698 // If do_load() is true then we have to emit the 699 // load of the previous value; otherwise it has already 700 // been loaded into _pre_val. 701 702 __ bind(*stub->entry()); 703 assert(stub->pre_val()->is_register(), "Precondition."); 704 705 Register pre_val_reg = stub->pre_val()->as_register(); 706 707 if (stub->do_load()) { 708 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 709 } 710 711 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 712 __ jcc(Assembler::equal, *stub->continuation()); 713 ce->store_parameter(stub->pre_val()->as_register(), 0); 714 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 715 __ jmp(*stub->continuation()); 716 717 } 718 719 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 720 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 721 __ bind(*stub->entry()); 722 723 Register obj = stub->obj()->as_register(); 724 Register res = stub->result()->as_register(); 725 Register tmp1 = stub->tmp1()->as_register(); 726 Register tmp2 = stub->tmp2()->as_register(); 727 728 Label slow_path; 729 730 assert(res == rax, "result must arrive in rax"); 731 732 if (res != obj) { 733 __ mov(res, obj); 734 } 735 736 // Check for null. 737 __ testptr(res, res); 738 __ jcc(Assembler::zero, *stub->continuation()); 739 740 // Check for object being in the collection set. 741 __ mov(tmp1, res); 742 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 743 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 744 #ifdef _LP64 745 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 746 __ testbool(tmp2); 747 #else 748 // On x86_32, C1 register allocator can give us the register without 8-bit support. 749 // Do the full-register access and test to avoid compilation failures. 750 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 751 __ testptr(tmp2, 0xFF); 752 #endif 753 __ jcc(Assembler::zero, *stub->continuation()); 754 755 // Test if object is resolved. 756 __ movptr(tmp1, Address(res, oopDesc::mark_offset_in_bytes())); 757 // Test if both lowest bits are set. We trick it by negating the bits 758 // then test for both bits clear. 759 __ notptr(tmp1); 760 #ifdef _LP64 761 __ testb(tmp1, markWord::marked_value); 762 #else 763 // On x86_32, C1 register allocator can give us the register without 8-bit support. 764 // Do the full-register access and test to avoid compilation failures. 765 __ testptr(tmp1, markWord::marked_value); 766 #endif 767 __ jccb(Assembler::notZero, slow_path); 768 // Clear both lower bits. It's still inverted, so set them, and then invert back. 769 __ orptr(tmp1, markWord::marked_value); 770 __ notptr(tmp1); 771 // At this point, tmp1 contains the decoded forwarding pointer. 772 __ mov(res, tmp1); 773 774 __ jmp(*stub->continuation()); 775 776 __ bind(slow_path); 777 ce->store_parameter(res, 0); 778 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 779 780 __ jmp(*stub->continuation()); 781 } 782 783 #undef __ 784 785 #define __ sasm-> 786 787 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 788 __ prologue("shenandoah_pre_barrier", false); 789 // arg0 : previous value of memory 790 791 __ push(rax); 792 __ push(rdx); 793 794 const Register pre_val = rax; 795 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 796 const Register tmp = rdx; 797 798 NOT_LP64(__ get_thread(thread);) 799 800 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 801 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 802 803 Label done; 804 Label runtime; 805 806 // Is SATB still active? 807 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 808 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 809 __ jcc(Assembler::zero, done); 810 811 // Can we store original value in the thread's buffer? 812 813 __ movptr(tmp, queue_index); 814 __ testptr(tmp, tmp); 815 __ jcc(Assembler::zero, runtime); 816 __ subptr(tmp, wordSize); 817 __ movptr(queue_index, tmp); 818 __ addptr(tmp, buffer); 819 820 // prev_val (rax) 821 __ load_parameter(0, pre_val); 822 __ movptr(Address(tmp, 0), pre_val); 823 __ jmp(done); 824 825 __ bind(runtime); 826 827 __ save_live_registers_no_oop_map(true); 828 829 // load the pre-value 830 __ load_parameter(0, rcx); 831 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 832 833 __ restore_live_registers(true); 834 835 __ bind(done); 836 837 __ pop(rdx); 838 __ pop(rax); 839 840 __ epilogue(); 841 } 842 843 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { 844 __ prologue("shenandoah_load_reference_barrier", false); 845 // arg0 : object to be resolved 846 847 __ save_live_registers_no_oop_map(true); 848 __ load_parameter(0, LP64_ONLY(c_rarg0) NOT_LP64(rax)); 849 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), LP64_ONLY(c_rarg0) NOT_LP64(rax)); 850 __ restore_live_registers_except_rax(true); 851 852 __ epilogue(); 853 } 854 855 #undef __ 856 857 #endif // COMPILER1 858 859 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 860 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 861 return _shenandoah_lrb; 862 } 863 864 #define __ cgen->assembler()-> 865 866 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 867 __ align(CodeEntryAlignment); 868 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 869 address start = __ pc(); 870 871 Label resolve_oop, slow_path; 872 873 // We use RDI, which also serves as argument register for slow call. 874 // RAX always holds the src object ptr, except after the slow call, 875 // then it holds the result. R8/RBX is used as temporary register. 876 877 Register tmp1 = rdi; 878 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 879 880 __ push(tmp1); 881 __ push(tmp2); 882 883 // Check for object being in the collection set. 884 __ mov(tmp1, rax); 885 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 886 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 887 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 888 __ testbool(tmp2); 889 __ jccb(Assembler::notZero, resolve_oop); 890 __ pop(tmp2); 891 __ pop(tmp1); 892 __ ret(0); 893 894 // Test if object is already resolved. 895 __ bind(resolve_oop); 896 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 897 // Test if both lowest bits are set. We trick it by negating the bits 898 // then test for both bits clear. 899 __ notptr(tmp2); 900 __ testb(tmp2, markWord::marked_value); 901 __ jccb(Assembler::notZero, slow_path); 902 // Clear both lower bits. It's still inverted, so set them, and then invert back. 903 __ orptr(tmp2, markWord::marked_value); 904 __ notptr(tmp2); 905 // At this point, tmp2 contains the decoded forwarding pointer. 906 __ mov(rax, tmp2); 907 908 __ pop(tmp2); 909 __ pop(tmp1); 910 __ ret(0); 911 912 __ bind(slow_path); 913 914 __ push(rcx); 915 __ push(rdx); 916 __ push(rdi); 917 __ push(rsi); 918 #ifdef _LP64 919 __ push(r8); 920 __ push(r9); 921 __ push(r10); 922 __ push(r11); 923 __ push(r12); 924 __ push(r13); 925 __ push(r14); 926 __ push(r15); 927 #endif 928 __ push(rbp); 929 __ movptr(rbp, rsp); 930 __ andptr(rsp, -StackAlignmentInBytes); 931 __ push_FPU_state(); 932 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax); 933 __ pop_FPU_state(); 934 __ movptr(rsp, rbp); 935 __ pop(rbp); 936 #ifdef _LP64 937 __ pop(r15); 938 __ pop(r14); 939 __ pop(r13); 940 __ pop(r12); 941 __ pop(r11); 942 __ pop(r10); 943 __ pop(r9); 944 __ pop(r8); 945 #endif 946 __ pop(rsi); 947 __ pop(rdi); 948 __ pop(rdx); 949 __ pop(rcx); 950 951 __ pop(tmp2); 952 __ pop(tmp1); 953 __ ret(0); 954 955 return start; 956 } 957 958 #undef __ 959 960 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 961 if (ShenandoahLoadRefBarrier) { 962 int stub_code_size = 4096; 963 ResourceMark rm; 964 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 965 CodeBuffer buf(bb); 966 StubCodeGenerator cgen(&buf); 967 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 968 } 969 }