1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 26 #include "gc/shenandoah/shenandoahForwarding.hpp" 27 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 28 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 29 #include "gc/shenandoah/shenandoahHeuristics.hpp" 30 #include "gc/shenandoah/shenandoahRuntime.hpp" 31 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 32 #include "interpreter/interpreter.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/thread.hpp" 36 #include "utilities/macros.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 41 #endif 42 43 #define __ masm-> 44 45 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 46 47 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 48 Register src, Register dst, Register count) { 49 50 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 51 52 if (is_reference_type(type)) { 53 54 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) { 55 #ifdef _LP64 56 Register thread = r15_thread; 57 #else 58 Register thread = rax; 59 if (thread == src || thread == dst || thread == count) { 60 thread = rbx; 61 } 62 if (thread == src || thread == dst || thread == count) { 63 thread = rcx; 64 } 65 if (thread == src || thread == dst || thread == count) { 66 thread = rdx; 67 } 68 __ push(thread); 69 __ get_thread(thread); 70 #endif 71 assert_different_registers(src, dst, count, thread); 72 73 Label done; 74 // Short-circuit if count == 0. 75 __ testptr(count, count); 76 __ jcc(Assembler::zero, done); 77 78 // Avoid runtime call when not marking. 79 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 80 int flags = ShenandoahHeap::HAS_FORWARDED; 81 if (!dest_uninitialized) { 82 flags |= ShenandoahHeap::MARKING; 83 } 84 __ testb(gc_state, flags); 85 __ jcc(Assembler::zero, done); 86 87 __ pusha(); // push registers 88 #ifdef _LP64 89 assert(src == rdi, "expected"); 90 assert(dst == rsi, "expected"); 91 assert(count == rdx, "expected"); 92 if (UseCompressedOops) { 93 if (dest_uninitialized) { 94 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count); 95 } else { 96 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count); 97 } 98 } else 99 #endif 100 { 101 if (dest_uninitialized) { 102 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count); 103 } else { 104 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count); 105 } 106 } 107 __ popa(); 108 __ bind(done); 109 NOT_LP64(__ pop(thread);) 110 } 111 } 112 113 } 114 115 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 116 Register obj, 117 Register pre_val, 118 Register thread, 119 Register tmp, 120 bool tosca_live, 121 bool expand_call) { 122 123 if (ShenandoahSATBBarrier) { 124 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 125 } 126 } 127 128 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 129 Register obj, 130 Register pre_val, 131 Register thread, 132 Register tmp, 133 bool tosca_live, 134 bool expand_call) { 135 // If expand_call is true then we expand the call_VM_leaf macro 136 // directly to skip generating the check by 137 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 138 139 #ifdef _LP64 140 assert(thread == r15_thread, "must be"); 141 #endif // _LP64 142 143 Label done; 144 Label runtime; 145 146 assert(pre_val != noreg, "check this code"); 147 148 if (obj != noreg) { 149 assert_different_registers(obj, pre_val, tmp); 150 assert(pre_val != rax, "check this code"); 151 } 152 153 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 154 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 155 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 156 157 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 158 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 159 __ jcc(Assembler::zero, done); 160 161 // Do we need to load the previous value? 162 if (obj != noreg) { 163 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 164 } 165 166 // Is the previous value null? 167 __ cmpptr(pre_val, (int32_t) NULL_WORD); 168 __ jcc(Assembler::equal, done); 169 170 // Can we store original value in the thread's buffer? 171 // Is index == 0? 172 // (The index field is typed as size_t.) 173 174 __ movptr(tmp, index); // tmp := *index_adr 175 __ cmpptr(tmp, 0); // tmp == 0? 176 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 177 178 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 179 __ movptr(index, tmp); // *index_adr := tmp 180 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 181 182 // Record the previous value 183 __ movptr(Address(tmp, 0), pre_val); 184 __ jmp(done); 185 186 __ bind(runtime); 187 // save the live input values 188 if(tosca_live) __ push(rax); 189 190 if (obj != noreg && obj != rax) 191 __ push(obj); 192 193 if (pre_val != rax) 194 __ push(pre_val); 195 196 // Calling the runtime using the regular call_VM_leaf mechanism generates 197 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 198 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 199 // 200 // If we care generating the pre-barrier without a frame (e.g. in the 201 // intrinsified Reference.get() routine) then ebp might be pointing to 202 // the caller frame and so this check will most likely fail at runtime. 203 // 204 // Expanding the call directly bypasses the generation of the check. 205 // So when we do not have have a full interpreter frame on the stack 206 // expand_call should be passed true. 207 208 NOT_LP64( __ push(thread); ) 209 210 #ifdef _LP64 211 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 212 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 213 // Note: this should not accidentally smash thread, because thread is always r15. 214 assert(thread != c_rarg0, "smashed arg"); 215 if (c_rarg0 != pre_val) { 216 __ mov(c_rarg0, pre_val); 217 } 218 #endif 219 220 if (expand_call) { 221 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 222 #ifdef _LP64 223 if (c_rarg1 != thread) { 224 __ mov(c_rarg1, thread); 225 } 226 // Already moved pre_val into c_rarg0 above 227 #else 228 __ push(thread); 229 __ push(pre_val); 230 #endif 231 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 232 } else { 233 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 234 } 235 236 NOT_LP64( __ pop(thread); ) 237 238 // save the live input values 239 if (pre_val != rax) 240 __ pop(pre_val); 241 242 if (obj != noreg && obj != rax) 243 __ pop(obj); 244 245 if(tosca_live) __ pop(rax); 246 247 __ bind(done); 248 } 249 250 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { 251 assert(ShenandoahCASBarrier, "should be enabled"); 252 Label is_null; 253 __ testptr(dst, dst); 254 __ jcc(Assembler::zero, is_null); 255 resolve_forward_pointer_not_null(masm, dst, tmp); 256 __ bind(is_null); 257 } 258 259 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { 260 assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled"); 261 // The below loads the mark word, checks if the lowest two bits are 262 // set, and if so, clear the lowest two bits and copy the result 263 // to dst. Otherwise it leaves dst alone. 264 // Implementing this is surprisingly awkward. I do it here by: 265 // - Inverting the mark word 266 // - Test lowest two bits == 0 267 // - If so, set the lowest two bits 268 // - Invert the result back, and copy to dst 269 270 bool borrow_reg = (tmp == noreg); 271 if (borrow_reg) { 272 // No free registers available. Make one useful. 273 tmp = LP64_ONLY(rscratch1) NOT_LP64(rdx); 274 __ push(tmp); 275 } 276 277 Label done; 278 __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); 279 __ notptr(tmp); 280 __ testb(tmp, markWord::marked_value); 281 __ jccb(Assembler::notZero, done); 282 __ orptr(tmp, markWord::marked_value); 283 __ notptr(tmp); 284 __ mov(dst, tmp); 285 __ bind(done); 286 287 if (borrow_reg) { 288 __ pop(tmp); 289 } 290 } 291 292 293 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) { 294 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 295 296 Label done; 297 298 #ifdef _LP64 299 Register thread = r15_thread; 300 #else 301 Register thread = rcx; 302 if (thread == dst) { 303 thread = rbx; 304 } 305 __ push(thread); 306 __ get_thread(thread); 307 #endif 308 assert_different_registers(dst, thread); 309 310 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 311 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 312 __ jccb(Assembler::zero, done); 313 314 if (dst != rax) { 315 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. 316 } 317 318 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 319 320 if (dst != rax) { 321 __ xchgptr(rax, dst); // Swap back obj with rax. 322 } 323 324 __ bind(done); 325 326 #ifndef _LP64 327 __ pop(thread); 328 #endif 329 } 330 331 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst) { 332 if (!ShenandoahLoadRefBarrier) { 333 return; 334 } 335 336 Label done; 337 Label not_null; 338 Label slow_path; 339 340 // null check 341 __ testptr(dst, dst); 342 __ jcc(Assembler::notZero, not_null); 343 __ jmp(done); 344 __ bind(not_null); 345 346 347 #ifdef _LP64 348 Register thread = r15_thread; 349 #else 350 Register thread = rcx; 351 if (thread == dst) { 352 thread = rbx; 353 } 354 __ push(thread); 355 __ get_thread(thread); 356 #endif 357 assert_different_registers(dst, thread); 358 359 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 360 __ testb(gc_state, ShenandoahHeap::EVACUATION); 361 #ifndef _LP64 362 __ pop(thread); 363 #endif 364 __ jccb(Assembler::notZero, slow_path); 365 __ jmp(done); 366 __ bind(slow_path); 367 368 if (dst != rax) { 369 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. 370 } 371 __ push(rcx); 372 __ push(rdx); 373 __ push(rdi); 374 __ push(rsi); 375 #ifdef _LP64 376 __ push(r8); 377 __ push(r9); 378 __ push(r10); 379 __ push(r11); 380 __ push(r12); 381 __ push(r13); 382 __ push(r14); 383 __ push(r15); 384 #endif 385 386 __ movptr(rdi, rax); 387 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), rdi); 388 389 #ifdef _LP64 390 __ pop(r15); 391 __ pop(r14); 392 __ pop(r13); 393 __ pop(r12); 394 __ pop(r11); 395 __ pop(r10); 396 __ pop(r9); 397 __ pop(r8); 398 #endif 399 __ pop(rsi); 400 __ pop(rdi); 401 __ pop(rdx); 402 __ pop(rcx); 403 404 if (dst != rax) { 405 __ xchgptr(rax, dst); // Swap back obj with rax. 406 } 407 408 __ bind(done); 409 } 410 411 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 412 if (ShenandoahStoreValEnqueueBarrier) { 413 storeval_barrier_impl(masm, dst, tmp); 414 } 415 } 416 417 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 418 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 419 420 if (dst == noreg) return; 421 422 if (ShenandoahStoreValEnqueueBarrier) { 423 // The set of registers to be saved+restored is the same as in the write-barrier above. 424 // Those are the commonly used registers in the interpreter. 425 __ pusha(); 426 // __ push_callee_saved_registers(); 427 __ subptr(rsp, 2 * Interpreter::stackElementSize); 428 __ movdbl(Address(rsp, 0), xmm0); 429 430 #ifdef _LP64 431 Register thread = r15_thread; 432 #else 433 Register thread = rcx; 434 if (thread == dst || thread == tmp) { 435 thread = rdi; 436 } 437 if (thread == dst || thread == tmp) { 438 thread = rbx; 439 } 440 __ get_thread(thread); 441 #endif 442 assert_different_registers(dst, tmp, thread); 443 444 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 445 __ movdbl(xmm0, Address(rsp, 0)); 446 __ addptr(rsp, 2 * Interpreter::stackElementSize); 447 //__ pop_callee_saved_registers(); 448 __ popa(); 449 } 450 } 451 452 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst) { 453 if (ShenandoahLoadRefBarrier) { 454 Label done; 455 __ testptr(dst, dst); 456 __ jcc(Assembler::zero, done); 457 load_reference_barrier_not_null(masm, dst); 458 __ bind(done); 459 } 460 } 461 462 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 463 Register dst, Address src, Register tmp1, Register tmp_thread) { 464 bool on_oop = is_reference_type(type); 465 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 466 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 467 bool not_in_heap = (decorators & IN_NATIVE) != 0; 468 bool on_reference = on_weak || on_phantom; 469 bool is_traversal_mode = ShenandoahHeap::heap()->is_traversal_mode(); 470 bool keep_alive = ((decorators & AS_NO_KEEPALIVE) == 0) || is_traversal_mode; 471 472 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 473 if (on_oop) { 474 if (not_in_heap && !is_traversal_mode) { 475 load_reference_barrier_native(masm, dst); 476 } else { 477 load_reference_barrier(masm, dst); 478 } 479 480 if (ShenandoahKeepAliveBarrier && on_reference && keep_alive) { 481 const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 482 assert_different_registers(dst, tmp1, tmp_thread); 483 NOT_LP64(__ get_thread(thread)); 484 // Generate the SATB pre-barrier code to log the value of 485 // the referent field in an SATB buffer. 486 shenandoah_write_barrier_pre(masm /* masm */, 487 noreg /* obj */, 488 dst /* pre_val */, 489 thread /* thread */, 490 tmp1 /* tmp */, 491 true /* tosca_live */, 492 true /* expand_call */); 493 } 494 } 495 } 496 497 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 498 Address dst, Register val, Register tmp1, Register tmp2) { 499 500 bool on_oop = is_reference_type(type); 501 bool in_heap = (decorators & IN_HEAP) != 0; 502 bool as_normal = (decorators & AS_NORMAL) != 0; 503 if (on_oop && in_heap) { 504 bool needs_pre_barrier = as_normal; 505 506 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 507 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 508 // flatten object address if needed 509 // We do it regardless of precise because we need the registers 510 if (dst.index() == noreg && dst.disp() == 0) { 511 if (dst.base() != tmp1) { 512 __ movptr(tmp1, dst.base()); 513 } 514 } else { 515 __ lea(tmp1, dst); 516 } 517 518 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 519 520 #ifndef _LP64 521 __ get_thread(rthread); 522 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 523 imasm->save_bcp(); 524 #endif 525 526 if (needs_pre_barrier) { 527 shenandoah_write_barrier_pre(masm /*masm*/, 528 tmp1 /* obj */, 529 tmp2 /* pre_val */, 530 rthread /* thread */, 531 tmp3 /* tmp */, 532 val != noreg /* tosca_live */, 533 false /* expand_call */); 534 } 535 if (val == noreg) { 536 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 537 } else { 538 storeval_barrier(masm, val, tmp3); 539 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 540 } 541 NOT_LP64(imasm->restore_bcp()); 542 } else { 543 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 544 } 545 } 546 547 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 548 Register obj, Register tmp, Label& slowpath) { 549 Label done; 550 // Resolve jobject 551 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 552 553 // Check for null. 554 __ testptr(obj, obj); 555 __ jcc(Assembler::zero, done); 556 557 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 558 __ testb(gc_state, ShenandoahHeap::EVACUATION); 559 __ jccb(Assembler::notZero, slowpath); 560 __ bind(done); 561 } 562 563 // Special Shenandoah CAS implementation that handles false negatives 564 // due to concurrent evacuation. 565 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 566 Register res, Address addr, Register oldval, Register newval, 567 bool exchange, Register tmp1, Register tmp2) { 568 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 569 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 570 571 Label retry, done; 572 573 // Remember oldval for retry logic below 574 #ifdef _LP64 575 if (UseCompressedOops) { 576 __ movl(tmp1, oldval); 577 } else 578 #endif 579 { 580 __ movptr(tmp1, oldval); 581 } 582 583 // Step 1. Try to CAS with given arguments. If successful, then we are done, 584 // and can safely return. 585 if (os::is_MP()) __ lock(); 586 #ifdef _LP64 587 if (UseCompressedOops) { 588 __ cmpxchgl(newval, addr); 589 } else 590 #endif 591 { 592 __ cmpxchgptr(newval, addr); 593 } 594 __ jcc(Assembler::equal, done, true); 595 596 // Step 2. CAS had failed. This may be a false negative. 597 // 598 // The trouble comes when we compare the to-space pointer with the from-space 599 // pointer to the same object. To resolve this, it will suffice to resolve both 600 // oldval and the value from memory -- this will give both to-space pointers. 601 // If they mismatch, then it was a legitimate failure. 602 // 603 #ifdef _LP64 604 if (UseCompressedOops) { 605 __ decode_heap_oop(tmp1); 606 } 607 #endif 608 resolve_forward_pointer(masm, tmp1); 609 610 #ifdef _LP64 611 if (UseCompressedOops) { 612 __ movl(tmp2, oldval); 613 __ decode_heap_oop(tmp2); 614 } else 615 #endif 616 { 617 __ movptr(tmp2, oldval); 618 } 619 resolve_forward_pointer(masm, tmp2); 620 621 __ cmpptr(tmp1, tmp2); 622 __ jcc(Assembler::notEqual, done, true); 623 624 // Step 3. Try to CAS again with resolved to-space pointers. 625 // 626 // Corner case: it may happen that somebody stored the from-space pointer 627 // to memory while we were preparing for retry. Therefore, we can fail again 628 // on retry, and so need to do this in loop, always resolving the failure 629 // witness. 630 __ bind(retry); 631 if (os::is_MP()) __ lock(); 632 #ifdef _LP64 633 if (UseCompressedOops) { 634 __ cmpxchgl(newval, addr); 635 } else 636 #endif 637 { 638 __ cmpxchgptr(newval, addr); 639 } 640 __ jcc(Assembler::equal, done, true); 641 642 #ifdef _LP64 643 if (UseCompressedOops) { 644 __ movl(tmp2, oldval); 645 __ decode_heap_oop(tmp2); 646 } else 647 #endif 648 { 649 __ movptr(tmp2, oldval); 650 } 651 resolve_forward_pointer(masm, tmp2); 652 653 __ cmpptr(tmp1, tmp2); 654 __ jcc(Assembler::equal, retry, true); 655 656 // Step 4. If we need a boolean result out of CAS, check the flag again, 657 // and promote the result. Note that we handle the flag from both the CAS 658 // itself and from the retry loop. 659 __ bind(done); 660 if (!exchange) { 661 assert(res != NULL, "need result register"); 662 #ifdef _LP64 663 __ setb(Assembler::equal, res); 664 __ movzbl(res, res); 665 #else 666 // Need something else to clean the result, because some registers 667 // do not have byte encoding that movzbl wants. Cannot do the xor first, 668 // because it modifies the flags. 669 Label res_non_zero; 670 __ movptr(res, 1); 671 __ jcc(Assembler::equal, res_non_zero, true); 672 __ xorptr(res, res); 673 __ bind(res_non_zero); 674 #endif 675 } 676 } 677 678 #undef __ 679 680 #ifdef COMPILER1 681 682 #define __ ce->masm()-> 683 684 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 685 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 686 // At this point we know that marking is in progress. 687 // If do_load() is true then we have to emit the 688 // load of the previous value; otherwise it has already 689 // been loaded into _pre_val. 690 691 __ bind(*stub->entry()); 692 assert(stub->pre_val()->is_register(), "Precondition."); 693 694 Register pre_val_reg = stub->pre_val()->as_register(); 695 696 if (stub->do_load()) { 697 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 698 } 699 700 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 701 __ jcc(Assembler::equal, *stub->continuation()); 702 ce->store_parameter(stub->pre_val()->as_register(), 0); 703 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 704 __ jmp(*stub->continuation()); 705 706 } 707 708 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 709 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 710 __ bind(*stub->entry()); 711 712 Register obj = stub->obj()->as_register(); 713 Register res = stub->result()->as_register(); 714 Register addr = stub->addr()->as_register(); 715 Register tmp1 = stub->tmp1()->as_register(); 716 Register tmp2 = stub->tmp2()->as_register(); 717 assert_different_registers(obj, res, addr, tmp1, tmp2); 718 719 Label slow_path; 720 721 assert(res == rax, "result must arrive in rax"); 722 723 if (res != obj) { 724 __ mov(res, obj); 725 } 726 727 // Check for null. 728 __ testptr(res, res); 729 __ jcc(Assembler::zero, *stub->continuation()); 730 731 // Check for object being in the collection set. 732 __ mov(tmp1, res); 733 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 734 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 735 #ifdef _LP64 736 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 737 __ testbool(tmp2); 738 #else 739 // On x86_32, C1 register allocator can give us the register without 8-bit support. 740 // Do the full-register access and test to avoid compilation failures. 741 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 742 __ testptr(tmp2, 0xFF); 743 #endif 744 __ jcc(Assembler::zero, *stub->continuation()); 745 746 __ bind(slow_path); 747 ce->store_parameter(res, 0); 748 ce->store_parameter(addr, 1); 749 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 750 751 __ jmp(*stub->continuation()); 752 } 753 754 #undef __ 755 756 #define __ sasm-> 757 758 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 759 __ prologue("shenandoah_pre_barrier", false); 760 // arg0 : previous value of memory 761 762 __ push(rax); 763 __ push(rdx); 764 765 const Register pre_val = rax; 766 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 767 const Register tmp = rdx; 768 769 NOT_LP64(__ get_thread(thread);) 770 771 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 772 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 773 774 Label done; 775 Label runtime; 776 777 // Is SATB still active? 778 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 779 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 780 __ jcc(Assembler::zero, done); 781 782 // Can we store original value in the thread's buffer? 783 784 __ movptr(tmp, queue_index); 785 __ testptr(tmp, tmp); 786 __ jcc(Assembler::zero, runtime); 787 __ subptr(tmp, wordSize); 788 __ movptr(queue_index, tmp); 789 __ addptr(tmp, buffer); 790 791 // prev_val (rax) 792 __ load_parameter(0, pre_val); 793 __ movptr(Address(tmp, 0), pre_val); 794 __ jmp(done); 795 796 __ bind(runtime); 797 798 __ save_live_registers_no_oop_map(true); 799 800 // load the pre-value 801 __ load_parameter(0, rcx); 802 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 803 804 __ restore_live_registers(true); 805 806 __ bind(done); 807 808 __ pop(rdx); 809 __ pop(rax); 810 811 __ epilogue(); 812 } 813 814 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { 815 __ prologue("shenandoah_load_reference_barrier", false); 816 // arg0 : object to be resolved 817 818 __ save_live_registers_no_oop_map(true); 819 820 #ifdef _LP64 821 __ load_parameter(0, c_rarg0); 822 __ load_parameter(1, c_rarg1); 823 if (UseCompressedOops) { 824 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow), c_rarg0, c_rarg1); 825 } else { 826 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup), c_rarg0, c_rarg1); 827 } 828 #else 829 __ load_parameter(0, rax); 830 __ load_parameter(1, rbx); 831 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup), rax, rbx); 832 #endif 833 834 __ restore_live_registers_except_rax(true); 835 836 __ epilogue(); 837 } 838 839 #undef __ 840 841 #endif // COMPILER1 842 843 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 844 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 845 return _shenandoah_lrb; 846 } 847 848 #define __ cgen->assembler()-> 849 850 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 851 __ align(CodeEntryAlignment); 852 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 853 address start = __ pc(); 854 855 Label resolve_oop, slow_path; 856 857 // We use RDI, which also serves as argument register for slow call. 858 // RAX always holds the src object ptr, except after the slow call, 859 // then it holds the result. R8/RBX is used as temporary register. 860 861 Register tmp1 = rdi; 862 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 863 864 __ push(tmp1); 865 __ push(tmp2); 866 867 // Check for object being in the collection set. 868 __ mov(tmp1, rax); 869 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 870 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 871 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 872 __ testbool(tmp2); 873 __ jccb(Assembler::notZero, resolve_oop); 874 __ pop(tmp2); 875 __ pop(tmp1); 876 __ ret(0); 877 878 // Test if object is already resolved. 879 __ bind(resolve_oop); 880 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 881 // Test if both lowest bits are set. We trick it by negating the bits 882 // then test for both bits clear. 883 __ notptr(tmp2); 884 __ testb(tmp2, markWord::marked_value); 885 __ jccb(Assembler::notZero, slow_path); 886 // Clear both lower bits. It's still inverted, so set them, and then invert back. 887 __ orptr(tmp2, markWord::marked_value); 888 __ notptr(tmp2); 889 // At this point, tmp2 contains the decoded forwarding pointer. 890 __ mov(rax, tmp2); 891 892 __ pop(tmp2); 893 __ pop(tmp1); 894 __ ret(0); 895 896 __ bind(slow_path); 897 898 __ push(rcx); 899 __ push(rdx); 900 __ push(rdi); 901 __ push(rsi); 902 #ifdef _LP64 903 __ push(r8); 904 __ push(r9); 905 __ push(r10); 906 __ push(r11); 907 __ push(r12); 908 __ push(r13); 909 __ push(r14); 910 __ push(r15); 911 #endif 912 __ push(rbp); 913 __ movptr(rbp, rsp); 914 __ andptr(rsp, -StackAlignmentInBytes); 915 __ push_FPU_state(); 916 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax); 917 __ pop_FPU_state(); 918 __ movptr(rsp, rbp); 919 __ pop(rbp); 920 #ifdef _LP64 921 __ pop(r15); 922 __ pop(r14); 923 __ pop(r13); 924 __ pop(r12); 925 __ pop(r11); 926 __ pop(r10); 927 __ pop(r9); 928 __ pop(r8); 929 #endif 930 __ pop(rsi); 931 __ pop(rdi); 932 __ pop(rdx); 933 __ pop(rcx); 934 935 __ pop(tmp2); 936 __ pop(tmp1); 937 __ ret(0); 938 939 return start; 940 } 941 942 #undef __ 943 944 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 945 if (ShenandoahLoadRefBarrier) { 946 int stub_code_size = 4096; 947 ResourceMark rm; 948 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 949 CodeBuffer buf(bb); 950 StubCodeGenerator cgen(&buf); 951 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 952 } 953 }