1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 26 #include "gc/shenandoah/shenandoahForwarding.hpp" 27 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 28 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 29 #include "gc/shenandoah/shenandoahHeuristics.hpp" 30 #include "gc/shenandoah/shenandoahRuntime.hpp" 31 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 32 #include "interpreter/interpreter.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/thread.hpp" 36 #include "utilities/macros.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 41 #endif 42 43 #define __ masm-> 44 45 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 46 47 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 48 Register src, Register dst, Register count) { 49 50 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 51 52 if (is_reference_type(type)) { 53 54 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) { 55 #ifdef _LP64 56 Register thread = r15_thread; 57 #else 58 Register thread = rax; 59 if (thread == src || thread == dst || thread == count) { 60 thread = rbx; 61 } 62 if (thread == src || thread == dst || thread == count) { 63 thread = rcx; 64 } 65 if (thread == src || thread == dst || thread == count) { 66 thread = rdx; 67 } 68 __ push(thread); 69 __ get_thread(thread); 70 #endif 71 assert_different_registers(src, dst, count, thread); 72 73 Label done; 74 // Short-circuit if count == 0. 75 __ testptr(count, count); 76 __ jcc(Assembler::zero, done); 77 78 // Avoid runtime call when not marking. 79 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 80 int flags = ShenandoahHeap::HAS_FORWARDED; 81 if (!dest_uninitialized) { 82 flags |= ShenandoahHeap::MARKING; 83 } 84 __ testb(gc_state, flags); 85 __ jcc(Assembler::zero, done); 86 87 __ pusha(); // push registers 88 #ifdef _LP64 89 assert(src == rdi, "expected"); 90 assert(dst == rsi, "expected"); 91 assert(count == rdx, "expected"); 92 if (UseCompressedOops) { 93 if (dest_uninitialized) { 94 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count); 95 } else { 96 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count); 97 } 98 } else 99 #endif 100 { 101 if (dest_uninitialized) { 102 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count); 103 } else { 104 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count); 105 } 106 } 107 __ popa(); 108 __ bind(done); 109 NOT_LP64(__ pop(thread);) 110 } 111 } 112 113 } 114 115 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 116 Register obj, 117 Register pre_val, 118 Register thread, 119 Register tmp, 120 bool tosca_live, 121 bool expand_call) { 122 123 if (ShenandoahSATBBarrier) { 124 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 125 } 126 } 127 128 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 129 Register obj, 130 Register pre_val, 131 Register thread, 132 Register tmp, 133 bool tosca_live, 134 bool expand_call) { 135 // If expand_call is true then we expand the call_VM_leaf macro 136 // directly to skip generating the check by 137 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 138 139 #ifdef _LP64 140 assert(thread == r15_thread, "must be"); 141 #endif // _LP64 142 143 Label done; 144 Label runtime; 145 146 assert(pre_val != noreg, "check this code"); 147 148 if (obj != noreg) { 149 assert_different_registers(obj, pre_val, tmp); 150 assert(pre_val != rax, "check this code"); 151 } 152 153 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 154 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 155 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 156 157 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 158 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 159 __ jcc(Assembler::zero, done); 160 161 // Do we need to load the previous value? 162 if (obj != noreg) { 163 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 164 } 165 166 // Is the previous value null? 167 __ cmpptr(pre_val, (int32_t) NULL_WORD); 168 __ jcc(Assembler::equal, done); 169 170 // Can we store original value in the thread's buffer? 171 // Is index == 0? 172 // (The index field is typed as size_t.) 173 174 __ movptr(tmp, index); // tmp := *index_adr 175 __ cmpptr(tmp, 0); // tmp == 0? 176 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 177 178 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 179 __ movptr(index, tmp); // *index_adr := tmp 180 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 181 182 // Record the previous value 183 __ movptr(Address(tmp, 0), pre_val); 184 __ jmp(done); 185 186 __ bind(runtime); 187 // save the live input values 188 if(tosca_live) __ push(rax); 189 190 if (obj != noreg && obj != rax) 191 __ push(obj); 192 193 if (pre_val != rax) 194 __ push(pre_val); 195 196 // Calling the runtime using the regular call_VM_leaf mechanism generates 197 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 198 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 199 // 200 // If we care generating the pre-barrier without a frame (e.g. in the 201 // intrinsified Reference.get() routine) then ebp might be pointing to 202 // the caller frame and so this check will most likely fail at runtime. 203 // 204 // Expanding the call directly bypasses the generation of the check. 205 // So when we do not have have a full interpreter frame on the stack 206 // expand_call should be passed true. 207 208 NOT_LP64( __ push(thread); ) 209 210 #ifdef _LP64 211 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 212 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 213 // Note: this should not accidentally smash thread, because thread is always r15. 214 assert(thread != c_rarg0, "smashed arg"); 215 if (c_rarg0 != pre_val) { 216 __ mov(c_rarg0, pre_val); 217 } 218 #endif 219 220 if (expand_call) { 221 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 222 #ifdef _LP64 223 if (c_rarg1 != thread) { 224 __ mov(c_rarg1, thread); 225 } 226 // Already moved pre_val into c_rarg0 above 227 #else 228 __ push(thread); 229 __ push(pre_val); 230 #endif 231 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 232 } else { 233 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 234 } 235 236 NOT_LP64( __ pop(thread); ) 237 238 // save the live input values 239 if (pre_val != rax) 240 __ pop(pre_val); 241 242 if (obj != noreg && obj != rax) 243 __ pop(obj); 244 245 if(tosca_live) __ pop(rax); 246 247 __ bind(done); 248 } 249 250 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { 251 assert(ShenandoahCASBarrier, "should be enabled"); 252 Label is_null; 253 __ testptr(dst, dst); 254 __ jcc(Assembler::zero, is_null); 255 resolve_forward_pointer_not_null(masm, dst, tmp); 256 __ bind(is_null); 257 } 258 259 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { 260 assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled"); 261 // The below loads the mark word, checks if the lowest two bits are 262 // set, and if so, clear the lowest two bits and copy the result 263 // to dst. Otherwise it leaves dst alone. 264 // Implementing this is surprisingly awkward. I do it here by: 265 // - Inverting the mark word 266 // - Test lowest two bits == 0 267 // - If so, set the lowest two bits 268 // - Invert the result back, and copy to dst 269 270 Label done; 271 __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); 272 __ notptr(tmp); 273 __ testb(tmp, markWord::marked_value); 274 __ jccb(Assembler::notZero, done); 275 __ orptr(tmp, markWord::marked_value); 276 __ notptr(tmp); 277 __ mov(dst, tmp); 278 __ bind(done); 279 } 280 281 282 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) { 283 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 284 285 Label done; 286 287 #ifdef _LP64 288 Register thread = r15_thread; 289 #else 290 Register thread = rcx; 291 if (thread == dst) { 292 thread = rbx; 293 } 294 __ push(thread); 295 __ get_thread(thread); 296 #endif 297 assert_different_registers(dst, thread); 298 299 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 300 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 301 __ jccb(Assembler::zero, done); 302 303 if (dst != rax) { 304 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. 305 } 306 307 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 308 309 if (dst != rax) { 310 __ xchgptr(rax, dst); // Swap back obj with rax. 311 } 312 313 __ bind(done); 314 315 #ifndef _LP64 316 __ pop(thread); 317 #endif 318 } 319 320 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst) { 321 if (!ShenandoahLoadRefBarrier) { 322 return; 323 } 324 325 Label done; 326 Label not_null; 327 Label slow_path; 328 329 // null check 330 __ testptr(dst, dst); 331 __ jcc(Assembler::notZero, not_null); 332 __ jmp(done); 333 __ bind(not_null); 334 335 336 #ifdef _LP64 337 Register thread = r15_thread; 338 #else 339 Register thread = rcx; 340 if (thread == dst) { 341 thread = rbx; 342 } 343 __ push(thread); 344 __ get_thread(thread); 345 #endif 346 assert_different_registers(dst, thread); 347 348 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 349 __ testb(gc_state, ShenandoahHeap::EVACUATION); 350 #ifndef _LP64 351 __ pop(thread); 352 #endif 353 __ jccb(Assembler::notZero, slow_path); 354 __ jmp(done); 355 __ bind(slow_path); 356 357 if (dst != rax) { 358 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. 359 } 360 __ push(rcx); 361 __ push(rdx); 362 __ push(rdi); 363 __ push(rsi); 364 #ifdef _LP64 365 __ push(r8); 366 __ push(r9); 367 __ push(r10); 368 __ push(r11); 369 __ push(r12); 370 __ push(r13); 371 __ push(r14); 372 __ push(r15); 373 #endif 374 375 __ movptr(rdi, rax); 376 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), rdi); 377 378 #ifdef _LP64 379 __ pop(r15); 380 __ pop(r14); 381 __ pop(r13); 382 __ pop(r12); 383 __ pop(r11); 384 __ pop(r10); 385 __ pop(r9); 386 __ pop(r8); 387 #endif 388 __ pop(rsi); 389 __ pop(rdi); 390 __ pop(rdx); 391 __ pop(rcx); 392 393 if (dst != rax) { 394 __ xchgptr(rax, dst); // Swap back obj with rax. 395 } 396 397 __ bind(done); 398 } 399 400 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 401 if (ShenandoahStoreValEnqueueBarrier) { 402 storeval_barrier_impl(masm, dst, tmp); 403 } 404 } 405 406 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 407 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 408 409 if (dst == noreg) return; 410 411 if (ShenandoahStoreValEnqueueBarrier) { 412 // The set of registers to be saved+restored is the same as in the write-barrier above. 413 // Those are the commonly used registers in the interpreter. 414 __ pusha(); 415 // __ push_callee_saved_registers(); 416 __ subptr(rsp, 2 * Interpreter::stackElementSize); 417 __ movdbl(Address(rsp, 0), xmm0); 418 419 #ifdef _LP64 420 Register thread = r15_thread; 421 #else 422 Register thread = rcx; 423 if (thread == dst || thread == tmp) { 424 thread = rdi; 425 } 426 if (thread == dst || thread == tmp) { 427 thread = rbx; 428 } 429 __ get_thread(thread); 430 #endif 431 assert_different_registers(dst, tmp, thread); 432 433 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 434 __ movdbl(xmm0, Address(rsp, 0)); 435 __ addptr(rsp, 2 * Interpreter::stackElementSize); 436 //__ pop_callee_saved_registers(); 437 __ popa(); 438 } 439 } 440 441 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst) { 442 if (ShenandoahLoadRefBarrier) { 443 Label done; 444 __ testptr(dst, dst); 445 __ jcc(Assembler::zero, done); 446 load_reference_barrier_not_null(masm, dst); 447 __ bind(done); 448 } 449 } 450 451 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 452 Register dst, Address src, Register tmp1, Register tmp_thread) { 453 bool on_oop = is_reference_type(type); 454 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 455 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 456 bool not_in_heap = (decorators & IN_NATIVE) != 0; 457 bool on_reference = on_weak || on_phantom; 458 bool is_traversal_mode = ShenandoahHeap::heap()->is_traversal_mode(); 459 bool keep_alive = ((decorators & AS_NO_KEEPALIVE) == 0) || is_traversal_mode; 460 461 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 462 if (on_oop) { 463 if (not_in_heap && !is_traversal_mode) { 464 load_reference_barrier_native(masm, dst); 465 } else { 466 load_reference_barrier(masm, dst); 467 } 468 469 if (ShenandoahKeepAliveBarrier && on_reference && keep_alive) { 470 const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 471 assert_different_registers(dst, tmp1, tmp_thread); 472 NOT_LP64(__ get_thread(thread)); 473 // Generate the SATB pre-barrier code to log the value of 474 // the referent field in an SATB buffer. 475 shenandoah_write_barrier_pre(masm /* masm */, 476 noreg /* obj */, 477 dst /* pre_val */, 478 thread /* thread */, 479 tmp1 /* tmp */, 480 true /* tosca_live */, 481 true /* expand_call */); 482 } 483 } 484 } 485 486 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 487 Address dst, Register val, Register tmp1, Register tmp2) { 488 489 bool on_oop = is_reference_type(type); 490 bool in_heap = (decorators & IN_HEAP) != 0; 491 bool as_normal = (decorators & AS_NORMAL) != 0; 492 if (on_oop && in_heap) { 493 bool needs_pre_barrier = as_normal; 494 495 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 496 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 497 // flatten object address if needed 498 // We do it regardless of precise because we need the registers 499 if (dst.index() == noreg && dst.disp() == 0) { 500 if (dst.base() != tmp1) { 501 __ movptr(tmp1, dst.base()); 502 } 503 } else { 504 __ lea(tmp1, dst); 505 } 506 507 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 508 509 #ifndef _LP64 510 __ get_thread(rthread); 511 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 512 imasm->save_bcp(); 513 #endif 514 515 if (needs_pre_barrier) { 516 shenandoah_write_barrier_pre(masm /*masm*/, 517 tmp1 /* obj */, 518 tmp2 /* pre_val */, 519 rthread /* thread */, 520 tmp3 /* tmp */, 521 val != noreg /* tosca_live */, 522 false /* expand_call */); 523 } 524 if (val == noreg) { 525 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 526 } else { 527 storeval_barrier(masm, val, tmp3); 528 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 529 } 530 NOT_LP64(imasm->restore_bcp()); 531 } else { 532 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 533 } 534 } 535 536 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 537 Register obj, Register tmp, Label& slowpath) { 538 Label done; 539 // Resolve jobject 540 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 541 542 // Check for null. 543 __ testptr(obj, obj); 544 __ jcc(Assembler::zero, done); 545 546 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 547 __ testb(gc_state, ShenandoahHeap::EVACUATION); 548 __ jccb(Assembler::notZero, slowpath); 549 __ bind(done); 550 } 551 552 // Special Shenandoah CAS implementation that handles false negatives 553 // due to concurrent evacuation. 554 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 555 Register res, Address addr, Register oldval, Register newval, 556 bool exchange, Register tmp1, Register tmp2, Register tmp3) { 557 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 558 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 559 560 Label retry, done; 561 562 // Remember oldval for retry logic below 563 #ifdef _LP64 564 if (UseCompressedOops) { 565 __ movl(tmp1, oldval); 566 } else 567 #endif 568 { 569 __ movptr(tmp1, oldval); 570 } 571 572 // Step 1. Try to CAS with given arguments. If successful, then we are done, 573 // and can safely return. 574 if (os::is_MP()) __ lock(); 575 #ifdef _LP64 576 if (UseCompressedOops) { 577 __ cmpxchgl(newval, addr); 578 } else 579 #endif 580 { 581 __ cmpxchgptr(newval, addr); 582 } 583 __ jcc(Assembler::equal, done, true); 584 585 // Step 2. CAS had failed. This may be a false negative. 586 // 587 // The trouble comes when we compare the to-space pointer with the from-space 588 // pointer to the same object. To resolve this, it will suffice to resolve both 589 // oldval and the value from memory -- this will give both to-space pointers. 590 // If they mismatch, then it was a legitimate failure. 591 // 592 #ifdef _LP64 593 if (UseCompressedOops) { 594 __ decode_heap_oop(tmp1); 595 } 596 #endif 597 resolve_forward_pointer(masm, tmp1, tmp3); 598 599 #ifdef _LP64 600 if (UseCompressedOops) { 601 __ movl(tmp2, oldval); 602 __ decode_heap_oop(tmp2); 603 } else 604 #endif 605 { 606 __ movptr(tmp2, oldval); 607 } 608 resolve_forward_pointer(masm, tmp2, tmp3); 609 610 __ cmpptr(tmp1, tmp2); 611 __ jcc(Assembler::notEqual, done, true); 612 613 // Step 3. Try to CAS again with resolved to-space pointers. 614 // 615 // Corner case: it may happen that somebody stored the from-space pointer 616 // to memory while we were preparing for retry. Therefore, we can fail again 617 // on retry, and so need to do this in loop, always resolving the failure 618 // witness. 619 __ bind(retry); 620 if (os::is_MP()) __ lock(); 621 #ifdef _LP64 622 if (UseCompressedOops) { 623 __ cmpxchgl(newval, addr); 624 } else 625 #endif 626 { 627 __ cmpxchgptr(newval, addr); 628 } 629 __ jcc(Assembler::equal, done, true); 630 631 #ifdef _LP64 632 if (UseCompressedOops) { 633 __ movl(tmp2, oldval); 634 __ decode_heap_oop(tmp2); 635 } else 636 #endif 637 { 638 __ movptr(tmp2, oldval); 639 } 640 resolve_forward_pointer(masm, tmp2, tmp3); 641 642 __ cmpptr(tmp1, tmp2); 643 __ jcc(Assembler::equal, retry, true); 644 645 // Step 4. If we need a boolean result out of CAS, check the flag again, 646 // and promote the result. Note that we handle the flag from both the CAS 647 // itself and from the retry loop. 648 __ bind(done); 649 if (!exchange) { 650 assert(res != NULL, "need result register"); 651 #ifdef _LP64 652 __ setb(Assembler::equal, res); 653 __ movzbl(res, res); 654 #else 655 // Need something else to clean the result, because some registers 656 // do not have byte encoding that movzbl wants. Cannot do the xor first, 657 // because it modifies the flags. 658 Label res_non_zero; 659 __ movptr(res, 1); 660 __ jcc(Assembler::equal, res_non_zero, true); 661 __ xorptr(res, res); 662 __ bind(res_non_zero); 663 #endif 664 } 665 } 666 667 #undef __ 668 669 #ifdef COMPILER1 670 671 #define __ ce->masm()-> 672 673 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 674 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 675 // At this point we know that marking is in progress. 676 // If do_load() is true then we have to emit the 677 // load of the previous value; otherwise it has already 678 // been loaded into _pre_val. 679 680 __ bind(*stub->entry()); 681 assert(stub->pre_val()->is_register(), "Precondition."); 682 683 Register pre_val_reg = stub->pre_val()->as_register(); 684 685 if (stub->do_load()) { 686 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 687 } 688 689 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 690 __ jcc(Assembler::equal, *stub->continuation()); 691 ce->store_parameter(stub->pre_val()->as_register(), 0); 692 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 693 __ jmp(*stub->continuation()); 694 695 } 696 697 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 698 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 699 __ bind(*stub->entry()); 700 701 Register obj = stub->obj()->as_register(); 702 Register res = stub->result()->as_register(); 703 Register addr = stub->addr()->as_register(); 704 Register tmp1 = stub->tmp1()->as_register(); 705 Register tmp2 = stub->tmp2()->as_register(); 706 assert_different_registers(obj, res, addr, tmp1, tmp2); 707 708 Label slow_path; 709 710 assert(res == rax, "result must arrive in rax"); 711 712 if (res != obj) { 713 __ mov(res, obj); 714 } 715 716 // Check for null. 717 __ testptr(res, res); 718 __ jcc(Assembler::zero, *stub->continuation()); 719 720 // Check for object being in the collection set. 721 __ mov(tmp1, res); 722 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 723 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 724 #ifdef _LP64 725 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 726 __ testbool(tmp2); 727 #else 728 // On x86_32, C1 register allocator can give us the register without 8-bit support. 729 // Do the full-register access and test to avoid compilation failures. 730 __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); 731 __ testptr(tmp2, 0xFF); 732 #endif 733 __ jcc(Assembler::zero, *stub->continuation()); 734 735 __ bind(slow_path); 736 ce->store_parameter(res, 0); 737 ce->store_parameter(addr, 1); 738 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); 739 740 __ jmp(*stub->continuation()); 741 } 742 743 #undef __ 744 745 #define __ sasm-> 746 747 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 748 __ prologue("shenandoah_pre_barrier", false); 749 // arg0 : previous value of memory 750 751 __ push(rax); 752 __ push(rdx); 753 754 const Register pre_val = rax; 755 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 756 const Register tmp = rdx; 757 758 NOT_LP64(__ get_thread(thread);) 759 760 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 761 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 762 763 Label done; 764 Label runtime; 765 766 // Is SATB still active? 767 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 768 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 769 __ jcc(Assembler::zero, done); 770 771 // Can we store original value in the thread's buffer? 772 773 __ movptr(tmp, queue_index); 774 __ testptr(tmp, tmp); 775 __ jcc(Assembler::zero, runtime); 776 __ subptr(tmp, wordSize); 777 __ movptr(queue_index, tmp); 778 __ addptr(tmp, buffer); 779 780 // prev_val (rax) 781 __ load_parameter(0, pre_val); 782 __ movptr(Address(tmp, 0), pre_val); 783 __ jmp(done); 784 785 __ bind(runtime); 786 787 __ save_live_registers_no_oop_map(true); 788 789 // load the pre-value 790 __ load_parameter(0, rcx); 791 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 792 793 __ restore_live_registers(true); 794 795 __ bind(done); 796 797 __ pop(rdx); 798 __ pop(rax); 799 800 __ epilogue(); 801 } 802 803 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { 804 __ prologue("shenandoah_load_reference_barrier", false); 805 // arg0 : object to be resolved 806 807 __ save_live_registers_no_oop_map(true); 808 809 #ifdef _LP64 810 __ load_parameter(0, c_rarg0); 811 __ load_parameter(1, c_rarg1); 812 if (UseCompressedOops) { 813 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow), c_rarg0, c_rarg1); 814 } else { 815 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup), c_rarg0, c_rarg1); 816 } 817 #else 818 __ load_parameter(0, rax); 819 __ load_parameter(1, rbx); 820 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup), rax, rbx); 821 #endif 822 823 __ restore_live_registers_except_rax(true); 824 825 __ epilogue(); 826 } 827 828 #undef __ 829 830 #endif // COMPILER1 831 832 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 833 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 834 return _shenandoah_lrb; 835 } 836 837 #define __ cgen->assembler()-> 838 839 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 840 __ align(CodeEntryAlignment); 841 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 842 address start = __ pc(); 843 844 Label resolve_oop, slow_path; 845 846 // We use RDI, which also serves as argument register for slow call. 847 // RAX always holds the src object ptr, except after the slow call, 848 // then it holds the result. R8/RBX is used as temporary register. 849 850 Register tmp1 = rdi; 851 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 852 853 __ push(tmp1); 854 __ push(tmp2); 855 856 // Check for object being in the collection set. 857 __ mov(tmp1, rax); 858 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 859 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 860 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 861 __ testbool(tmp2); 862 __ jccb(Assembler::notZero, resolve_oop); 863 __ pop(tmp2); 864 __ pop(tmp1); 865 __ ret(0); 866 867 // Test if object is already resolved. 868 __ bind(resolve_oop); 869 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 870 // Test if both lowest bits are set. We trick it by negating the bits 871 // then test for both bits clear. 872 __ notptr(tmp2); 873 __ testb(tmp2, markWord::marked_value); 874 __ jccb(Assembler::notZero, slow_path); 875 // Clear both lower bits. It's still inverted, so set them, and then invert back. 876 __ orptr(tmp2, markWord::marked_value); 877 __ notptr(tmp2); 878 // At this point, tmp2 contains the decoded forwarding pointer. 879 __ mov(rax, tmp2); 880 881 __ pop(tmp2); 882 __ pop(tmp1); 883 __ ret(0); 884 885 __ bind(slow_path); 886 887 __ push(rcx); 888 __ push(rdx); 889 __ push(rdi); 890 __ push(rsi); 891 #ifdef _LP64 892 __ push(r8); 893 __ push(r9); 894 __ push(r10); 895 __ push(r11); 896 __ push(r12); 897 __ push(r13); 898 __ push(r14); 899 __ push(r15); 900 #endif 901 __ push(rbp); 902 __ movptr(rbp, rsp); 903 __ andptr(rsp, -StackAlignmentInBytes); 904 __ push_FPU_state(); 905 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax); 906 __ pop_FPU_state(); 907 __ movptr(rsp, rbp); 908 __ pop(rbp); 909 #ifdef _LP64 910 __ pop(r15); 911 __ pop(r14); 912 __ pop(r13); 913 __ pop(r12); 914 __ pop(r11); 915 __ pop(r10); 916 __ pop(r9); 917 __ pop(r8); 918 #endif 919 __ pop(rsi); 920 __ pop(rdi); 921 __ pop(rdx); 922 __ pop(rcx); 923 924 __ pop(tmp2); 925 __ pop(tmp1); 926 __ ret(0); 927 928 return start; 929 } 930 931 #undef __ 932 933 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 934 if (ShenandoahLoadRefBarrier) { 935 int stub_code_size = 4096; 936 ResourceMark rm; 937 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 938 CodeBuffer buf(bb); 939 StubCodeGenerator cgen(&buf); 940 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 941 } 942 }