1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 26 #include "gc/shenandoah/shenandoahForwarding.hpp" 27 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 28 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 29 #include "gc/shenandoah/shenandoahHeuristics.hpp" 30 #include "gc/shenandoah/shenandoahRuntime.hpp" 31 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 32 #include "interpreter/interpreter.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/thread.hpp" 36 #include "utilities/macros.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 41 #endif 42 43 #define __ masm-> 44 45 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 46 47 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 48 Register src, Register dst, Register count) { 49 50 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; 51 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0; 52 bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops); 53 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 54 55 if (type == T_OBJECT || type == T_ARRAY) { 56 #ifdef _LP64 57 if (!checkcast) { 58 if (!obj_int) { 59 // Save count for barrier 60 __ movptr(r11, count); 61 } else if (disjoint) { 62 // Save dst in r11 in the disjoint case 63 __ movq(r11, dst); 64 } 65 } 66 #else 67 if (disjoint) { 68 __ mov(rdx, dst); // save 'to' 69 } 70 #endif 71 72 if (ShenandoahSATBBarrier && !dest_uninitialized) { 73 Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 74 assert_different_registers(dst, count, thread); // we don't care about src here? 75 #ifndef _LP64 76 __ push(thread); 77 __ get_thread(thread); 78 #endif 79 80 Label done; 81 // Short-circuit if count == 0. 82 __ testptr(count, count); 83 __ jcc(Assembler::zero, done); 84 85 // Avoid runtime call when not marking. 86 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 87 __ testb(gc_state, ShenandoahHeap::MARKING); 88 __ jcc(Assembler::zero, done); 89 90 __ pusha(); // push registers 91 #ifdef _LP64 92 if (count == c_rarg0) { 93 if (dst == c_rarg1) { 94 // exactly backwards!! 95 __ xchgptr(c_rarg1, c_rarg0); 96 } else { 97 __ movptr(c_rarg1, count); 98 __ movptr(c_rarg0, dst); 99 } 100 } else { 101 __ movptr(c_rarg0, dst); 102 __ movptr(c_rarg1, count); 103 } 104 if (UseCompressedOops) { 105 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), 2); 106 } else { 107 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), 2); 108 } 109 #else 110 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), 111 dst, count); 112 #endif 113 __ popa(); 114 __ bind(done); 115 NOT_LP64(__ pop(thread);) 116 } 117 } 118 119 } 120 121 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 122 Register src, Register dst, Register count) { 123 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; 124 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0; 125 bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops); 126 Register tmp = rax; 127 128 if (type == T_OBJECT || type == T_ARRAY) { 129 #ifdef _LP64 130 if (!checkcast) { 131 if (!obj_int) { 132 // Save count for barrier 133 count = r11; 134 } else if (disjoint && obj_int) { 135 // Use the saved dst in the disjoint case 136 dst = r11; 137 } 138 } else { 139 tmp = rscratch1; 140 } 141 #else 142 if (disjoint) { 143 __ mov(dst, rdx); // restore 'to' 144 } 145 #endif 146 147 Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 148 assert_different_registers(dst, thread); // do we care about src at all here? 149 150 #ifndef _LP64 151 __ push(thread); 152 __ get_thread(thread); 153 #endif 154 155 // Short-circuit if count == 0. 156 Label done; 157 __ testptr(count, count); 158 __ jcc(Assembler::zero, done); 159 160 // Skip runtime call if no forwarded objects. 161 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 162 __ testb(gc_state, ShenandoahHeap::UPDATEREFS); 163 __ jcc(Assembler::zero, done); 164 165 __ pusha(); // push registers (overkill) 166 #ifdef _LP64 167 if (c_rarg0 == count) { // On win64 c_rarg0 == rcx 168 assert_different_registers(c_rarg1, dst); 169 __ mov(c_rarg1, count); 170 __ mov(c_rarg0, dst); 171 } else { 172 assert_different_registers(c_rarg0, count); 173 __ mov(c_rarg0, dst); 174 __ mov(c_rarg1, count); 175 } 176 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_post_entry), 2); 177 #else 178 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_post_entry), 179 dst, count); 180 #endif 181 __ popa(); 182 183 __ bind(done); 184 NOT_LP64(__ pop(thread);) 185 } 186 } 187 188 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 189 Register obj, 190 Register pre_val, 191 Register thread, 192 Register tmp, 193 bool tosca_live, 194 bool expand_call) { 195 196 if (ShenandoahSATBBarrier) { 197 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 198 } 199 } 200 201 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 202 Register obj, 203 Register pre_val, 204 Register thread, 205 Register tmp, 206 bool tosca_live, 207 bool expand_call) { 208 // If expand_call is true then we expand the call_VM_leaf macro 209 // directly to skip generating the check by 210 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 211 212 #ifdef _LP64 213 assert(thread == r15_thread, "must be"); 214 #endif // _LP64 215 216 Label done; 217 Label runtime; 218 219 assert(pre_val != noreg, "check this code"); 220 221 if (obj != noreg) { 222 assert_different_registers(obj, pre_val, tmp); 223 assert(pre_val != rax, "check this code"); 224 } 225 226 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 227 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 228 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 229 230 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 231 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 232 __ jcc(Assembler::zero, done); 233 234 // Do we need to load the previous value? 235 if (obj != noreg) { 236 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 237 } 238 239 // Is the previous value null? 240 __ cmpptr(pre_val, (int32_t) NULL_WORD); 241 __ jcc(Assembler::equal, done); 242 243 // Can we store original value in the thread's buffer? 244 // Is index == 0? 245 // (The index field is typed as size_t.) 246 247 __ movptr(tmp, index); // tmp := *index_adr 248 __ cmpptr(tmp, 0); // tmp == 0? 249 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 250 251 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 252 __ movptr(index, tmp); // *index_adr := tmp 253 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 254 255 // Record the previous value 256 __ movptr(Address(tmp, 0), pre_val); 257 __ jmp(done); 258 259 __ bind(runtime); 260 // save the live input values 261 if(tosca_live) __ push(rax); 262 263 if (obj != noreg && obj != rax) 264 __ push(obj); 265 266 if (pre_val != rax) 267 __ push(pre_val); 268 269 // Calling the runtime using the regular call_VM_leaf mechanism generates 270 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 271 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 272 // 273 // If we care generating the pre-barrier without a frame (e.g. in the 274 // intrinsified Reference.get() routine) then ebp might be pointing to 275 // the caller frame and so this check will most likely fail at runtime. 276 // 277 // Expanding the call directly bypasses the generation of the check. 278 // So when we do not have have a full interpreter frame on the stack 279 // expand_call should be passed true. 280 281 NOT_LP64( __ push(thread); ) 282 283 #ifdef _LP64 284 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 285 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 286 // Note: this should not accidentally smash thread, because thread is always r15. 287 assert(thread != c_rarg0, "smashed arg"); 288 if (c_rarg0 != pre_val) { 289 __ mov(c_rarg0, pre_val); 290 } 291 #endif 292 293 if (expand_call) { 294 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 295 #ifdef _LP64 296 if (c_rarg1 != thread) { 297 __ mov(c_rarg1, thread); 298 } 299 // Already moved pre_val into c_rarg0 above 300 #else 301 __ push(thread); 302 __ push(pre_val); 303 #endif 304 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 305 } else { 306 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 307 } 308 309 NOT_LP64( __ pop(thread); ) 310 311 // save the live input values 312 if (pre_val != rax) 313 __ pop(pre_val); 314 315 if (obj != noreg && obj != rax) 316 __ pop(obj); 317 318 if(tosca_live) __ pop(rax); 319 320 __ bind(done); 321 } 322 323 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { 324 assert(ShenandoahCASBarrier, "should be enabled"); 325 Label is_null; 326 __ testptr(dst, dst); 327 __ jcc(Assembler::zero, is_null); 328 resolve_forward_pointer_not_null(masm, dst, tmp); 329 __ bind(is_null); 330 } 331 332 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { 333 assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled"); 334 // The below loads the mark word, checks if the lowest two bits are 335 // set, and if so, clear the lowest two bits and copy the result 336 // to dst. Otherwise it leaves dst alone. 337 // Implementing this is surprisingly awkward. I do it here by: 338 // - Inverting the mark word 339 // - Test lowest two bits == 0 340 // - If so, set the lowest two bits 341 // - Invert the result back, and copy to dst 342 343 bool borrow_reg = (tmp == noreg); 344 if (borrow_reg) { 345 // No free registers available. Make one useful. 346 tmp = LP64_ONLY(rscratch1) NOT_LP64(rdx); 347 __ push(tmp); 348 } 349 350 Label done; 351 __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); 352 __ notptr(tmp); 353 __ testb(tmp, markOopDesc::marked_value); 354 __ jccb(Assembler::notZero, done); 355 __ orptr(tmp, markOopDesc::marked_value); 356 __ notptr(tmp); 357 __ mov(dst, tmp); 358 __ bind(done); 359 360 if (borrow_reg) { 361 __ pop(tmp); 362 } 363 } 364 365 366 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) { 367 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 368 369 Label done; 370 371 #ifdef _LP64 372 Register thread = r15_thread; 373 #else 374 Register thread = rcx; 375 if (thread == dst) { 376 thread = rbx; 377 } 378 __ push(thread); 379 __ get_thread(thread); 380 #endif 381 assert_different_registers(dst, thread); 382 383 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 384 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 385 __ jccb(Assembler::zero, done); 386 387 if (dst != rax) { 388 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. 389 } 390 391 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 392 393 if (dst != rax) { 394 __ xchgptr(rax, dst); // Swap back obj with rax. 395 } 396 397 __ bind(done); 398 399 #ifndef _LP64 400 __ pop(thread); 401 #endif 402 } 403 404 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst) { 405 if (!ShenandoahLoadRefBarrier) { 406 return; 407 } 408 409 Label done; 410 Label not_null; 411 Label slow_path; 412 413 // null check 414 __ testptr(dst, dst); 415 __ jcc(Assembler::notZero, not_null); 416 __ jmp(done); 417 __ bind(not_null); 418 419 420 #ifdef _LP64 421 Register thread = r15_thread; 422 #else 423 Register thread = rcx; 424 if (thread == dst) { 425 thread = rbx; 426 } 427 __ push(thread); 428 __ get_thread(thread); 429 #endif 430 assert_different_registers(dst, thread); 431 432 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 433 __ testb(gc_state, ShenandoahHeap::EVACUATION); 434 #ifndef _LP64 435 __ pop(thread); 436 #endif 437 __ jccb(Assembler::notZero, slow_path); 438 __ jmp(done); 439 __ bind(slow_path); 440 441 if (dst != rax) { 442 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. 443 } 444 __ push(rcx); 445 __ push(rdx); 446 __ push(rdi); 447 __ push(rsi); 448 #ifdef _LP64 449 __ push(r8); 450 __ push(r9); 451 __ push(r10); 452 __ push(r11); 453 __ push(r12); 454 __ push(r13); 455 __ push(r14); 456 __ push(r15); 457 #endif 458 459 __ movptr(rdi, rax); 460 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), rdi); 461 462 #ifdef _LP64 463 __ pop(r15); 464 __ pop(r14); 465 __ pop(r13); 466 __ pop(r12); 467 __ pop(r11); 468 __ pop(r10); 469 __ pop(r9); 470 __ pop(r8); 471 #endif 472 __ pop(rsi); 473 __ pop(rdi); 474 __ pop(rdx); 475 __ pop(rcx); 476 477 if (dst != rax) { 478 __ xchgptr(rax, dst); // Swap back obj with rax. 479 } 480 481 __ bind(done); 482 } 483 484 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 485 if (ShenandoahStoreValEnqueueBarrier) { 486 storeval_barrier_impl(masm, dst, tmp); 487 } 488 } 489 490 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 491 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 492 493 if (dst == noreg) return; 494 495 if (ShenandoahStoreValEnqueueBarrier) { 496 // The set of registers to be saved+restored is the same as in the write-barrier above. 497 // Those are the commonly used registers in the interpreter. 498 __ pusha(); 499 // __ push_callee_saved_registers(); 500 __ subptr(rsp, 2 * Interpreter::stackElementSize); 501 __ movdbl(Address(rsp, 0), xmm0); 502 503 #ifdef _LP64 504 Register thread = r15_thread; 505 #else 506 Register thread = rcx; 507 if (thread == dst || thread == tmp) { 508 thread = rdi; 509 } 510 if (thread == dst || thread == tmp) { 511 thread = rbx; 512 } 513 __ get_thread(thread); 514 #endif 515 assert_different_registers(dst, tmp, thread); 516 517 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 518 __ movdbl(xmm0, Address(rsp, 0)); 519 __ addptr(rsp, 2 * Interpreter::stackElementSize); 520 //__ pop_callee_saved_registers(); 521 __ popa(); 522 } 523 } 524 525 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst) { 526 if (ShenandoahLoadRefBarrier) { 527 Label done; 528 __ testptr(dst, dst); 529 __ jcc(Assembler::zero, done); 530 load_reference_barrier_not_null(masm, dst); 531 __ bind(done); 532 } 533 } 534 535 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 536 Register dst, Address src, Register tmp1, Register tmp_thread) { 537 bool on_oop = type == T_OBJECT || type == T_ARRAY; 538 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 539 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 540 bool not_in_heap = (decorators & IN_NATIVE) != 0; 541 bool on_reference = on_weak || on_phantom; 542 bool keep_alive = (decorators & AS_NO_KEEPALIVE) == 0; 543 544 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 545 if (on_oop) { 546 if (not_in_heap) { 547 if (ShenandoahHeap::heap()->is_traversal_mode()) { 548 load_reference_barrier(masm, dst); 549 keep_alive = true; 550 } else { 551 load_reference_barrier_native(masm, dst); 552 } 553 } else { 554 load_reference_barrier(masm, dst); 555 } 556 557 if (ShenandoahKeepAliveBarrier && on_reference && keep_alive) { 558 const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 559 assert_different_registers(dst, tmp1, tmp_thread); 560 NOT_LP64(__ get_thread(thread)); 561 // Generate the SATB pre-barrier code to log the value of 562 // the referent field in an SATB buffer. 563 shenandoah_write_barrier_pre(masm /* masm */, 564 noreg /* obj */, 565 dst /* pre_val */, 566 thread /* thread */, 567 tmp1 /* tmp */, 568 true /* tosca_live */, 569 true /* expand_call */); 570 } 571 } 572 } 573 574 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 575 Address dst, Register val, Register tmp1, Register tmp2) { 576 577 bool on_oop = type == T_OBJECT || type == T_ARRAY; 578 bool in_heap = (decorators & IN_HEAP) != 0; 579 bool as_normal = (decorators & AS_NORMAL) != 0; 580 if (on_oop && in_heap) { 581 bool needs_pre_barrier = as_normal; 582 583 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 584 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 585 // flatten object address if needed 586 // We do it regardless of precise because we need the registers 587 if (dst.index() == noreg && dst.disp() == 0) { 588 if (dst.base() != tmp1) { 589 __ movptr(tmp1, dst.base()); 590 } 591 } else { 592 __ lea(tmp1, dst); 593 } 594 595 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 596 597 #ifndef _LP64 598 __ get_thread(rthread); 599 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 600 imasm->save_bcp(); 601 #endif 602 603 if (needs_pre_barrier) { 604 shenandoah_write_barrier_pre(masm /*masm*/, 605 tmp1 /* obj */, 606 tmp2 /* pre_val */, 607 rthread /* thread */, 608 tmp3 /* tmp */, 609 val != noreg /* tosca_live */, 610 false /* expand_call */); 611 } 612 if (val == noreg) { 613 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 614 } else { 615 storeval_barrier(masm, val, tmp3); 616 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 617 } 618 NOT_LP64(imasm->restore_bcp()); 619 } else { 620 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 621 } 622 } 623 624 // Special Shenandoah CAS implementation that handles false negatives 625 // due to concurrent evacuation. 626 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 627 Register res, Address addr, Register oldval, Register newval, 628 bool exchange, Register tmp1, Register tmp2) { 629 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 630 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 631 632 Label retry, done; 633 634 // Remember oldval for retry logic below 635 #ifdef _LP64 636 if (UseCompressedOops) { 637 __ movl(tmp1, oldval); 638 } else 639 #endif 640 { 641 __ movptr(tmp1, oldval); 642 } 643 644 // Step 1. Try to CAS with given arguments. If successful, then we are done, 645 // and can safely return. 646 if (os::is_MP()) __ lock(); 647 #ifdef _LP64 648 if (UseCompressedOops) { 649 __ cmpxchgl(newval, addr); 650 } else 651 #endif 652 { 653 __ cmpxchgptr(newval, addr); 654 } 655 __ jcc(Assembler::equal, done, true); 656 657 // Step 2. CAS had failed. This may be a false negative. 658 // 659 // The trouble comes when we compare the to-space pointer with the from-space 660 // pointer to the same object. To resolve this, it will suffice to resolve both 661 // oldval and the value from memory -- this will give both to-space pointers. 662 // If they mismatch, then it was a legitimate failure. 663 // 664 #ifdef _LP64 665 if (UseCompressedOops) { 666 __ decode_heap_oop(tmp1); 667 } 668 #endif 669 resolve_forward_pointer(masm, tmp1); 670 671 #ifdef _LP64 672 if (UseCompressedOops) { 673 __ movl(tmp2, oldval); 674 __ decode_heap_oop(tmp2); 675 } else 676 #endif 677 { 678 __ movptr(tmp2, oldval); 679 } 680 resolve_forward_pointer(masm, tmp2); 681 682 __ cmpptr(tmp1, tmp2); 683 __ jcc(Assembler::notEqual, done, true); 684 685 // Step 3. Try to CAS again with resolved to-space pointers. 686 // 687 // Corner case: it may happen that somebody stored the from-space pointer 688 // to memory while we were preparing for retry. Therefore, we can fail again 689 // on retry, and so need to do this in loop, always resolving the failure 690 // witness. 691 __ bind(retry); 692 if (os::is_MP()) __ lock(); 693 #ifdef _LP64 694 if (UseCompressedOops) { 695 __ cmpxchgl(newval, addr); 696 } else 697 #endif 698 { 699 __ cmpxchgptr(newval, addr); 700 } 701 __ jcc(Assembler::equal, done, true); 702 703 #ifdef _LP64 704 if (UseCompressedOops) { 705 __ movl(tmp2, oldval); 706 __ decode_heap_oop(tmp2); 707 } else 708 #endif 709 { 710 __ movptr(tmp2, oldval); 711 } 712 resolve_forward_pointer(masm, tmp2); 713 714 __ cmpptr(tmp1, tmp2); 715 __ jcc(Assembler::equal, retry, true); 716 717 // Step 4. If we need a boolean result out of CAS, check the flag again, 718 // and promote the result. Note that we handle the flag from both the CAS 719 // itself and from the retry loop. 720 __ bind(done); 721 if (!exchange) { 722 assert(res != NULL, "need result register"); 723 #ifdef _LP64 724 __ setb(Assembler::equal, res); 725 __ movzbl(res, res); 726 #else 727 // Need something else to clean the result, because some registers 728 // do not have byte encoding that movzbl wants. Cannot do the xor first, 729 // because it modifies the flags. 730 Label res_non_zero; 731 __ movptr(res, 1); 732 __ jcc(Assembler::equal, res_non_zero, true); 733 __ xorptr(res, res); 734 __ bind(res_non_zero); 735 #endif 736 } 737 } 738 739 void ShenandoahBarrierSetAssembler::save_vector_registers(MacroAssembler* masm) { 740 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8); 741 if (UseAVX > 2) { 742 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8); 743 } 744 745 if (UseSSE == 1) { 746 __ subptr(rsp, sizeof(jdouble)*8); 747 for (int n = 0; n < 8; n++) { 748 __ movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n)); 749 } 750 } else if (UseSSE >= 2) { 751 if (UseAVX > 2) { 752 __ push(rbx); 753 __ movl(rbx, 0xffff); 754 __ kmovwl(k1, rbx); 755 __ pop(rbx); 756 } 757 #ifdef COMPILER2 758 if (MaxVectorSize > 16) { 759 if(UseAVX > 2) { 760 // Save upper half of ZMM registers 761 __ subptr(rsp, 32*num_xmm_regs); 762 for (int n = 0; n < num_xmm_regs; n++) { 763 __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n)); 764 } 765 } 766 assert(UseAVX > 0, "256 bit vectors are supported only with AVX"); 767 // Save upper half of YMM registers 768 __ subptr(rsp, 16*num_xmm_regs); 769 for (int n = 0; n < num_xmm_regs; n++) { 770 __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n)); 771 } 772 } 773 #endif 774 // Save whole 128bit (16 bytes) XMM registers 775 __ subptr(rsp, 16*num_xmm_regs); 776 #ifdef _LP64 777 if (VM_Version::supports_evex()) { 778 for (int n = 0; n < num_xmm_regs; n++) { 779 __ vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0); 780 } 781 } else { 782 for (int n = 0; n < num_xmm_regs; n++) { 783 __ movdqu(Address(rsp, n*16), as_XMMRegister(n)); 784 } 785 } 786 #else 787 for (int n = 0; n < num_xmm_regs; n++) { 788 __ movdqu(Address(rsp, n*16), as_XMMRegister(n)); 789 } 790 #endif 791 } 792 } 793 794 void ShenandoahBarrierSetAssembler::restore_vector_registers(MacroAssembler* masm) { 795 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8); 796 if (UseAVX > 2) { 797 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8); 798 } 799 if (UseSSE == 1) { 800 for (int n = 0; n < 8; n++) { 801 __ movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble))); 802 } 803 __ addptr(rsp, sizeof(jdouble)*8); 804 } else if (UseSSE >= 2) { 805 // Restore whole 128bit (16 bytes) XMM registers 806 #ifdef _LP64 807 if (VM_Version::supports_evex()) { 808 for (int n = 0; n < num_xmm_regs; n++) { 809 __ vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0); 810 } 811 } else { 812 for (int n = 0; n < num_xmm_regs; n++) { 813 __ movdqu(as_XMMRegister(n), Address(rsp, n*16)); 814 } 815 } 816 #else 817 for (int n = 0; n < num_xmm_regs; n++) { 818 __ movdqu(as_XMMRegister(n), Address(rsp, n*16)); 819 } 820 #endif 821 __ addptr(rsp, 16*num_xmm_regs); 822 823 #ifdef COMPILER2 824 if (MaxVectorSize > 16) { 825 // Restore upper half of YMM registers. 826 for (int n = 0; n < num_xmm_regs; n++) { 827 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16)); 828 } 829 __ addptr(rsp, 16*num_xmm_regs); 830 if (UseAVX > 2) { 831 for (int n = 0; n < num_xmm_regs; n++) { 832 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32)); 833 } 834 __ addptr(rsp, 32*num_xmm_regs); 835 } 836 } 837 #endif 838 } 839 } 840 841 #undef __ 842 843 #ifdef COMPILER1 844 845 #define __ ce->masm()-> 846 847 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 848 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 849 // At this point we know that marking is in progress. 850 // If do_load() is true then we have to emit the 851 // load of the previous value; otherwise it has already 852 // been loaded into _pre_val. 853 854 __ bind(*stub->entry()); 855 assert(stub->pre_val()->is_register(), "Precondition."); 856 857 Register pre_val_reg = stub->pre_val()->as_register(); 858 859 if (stub->do_load()) { 860 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 861 } 862 863 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 864 __ jcc(Assembler::equal, *stub->continuation()); 865 ce->store_parameter(stub->pre_val()->as_register(), 0); 866 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 867 __ jmp(*stub->continuation()); 868 869 } 870 871 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 872 __ bind(*stub->entry()); 873 874 Label done; 875 Register obj = stub->obj()->as_register(); 876 Register res = stub->result()->as_register(); 877 878 if (res != obj) { 879 __ mov(res, obj); 880 } 881 882 // Check for null. 883 __ testptr(res, res); 884 __ jcc(Assembler::zero, done); 885 886 load_reference_barrier_not_null(ce->masm(), res); 887 888 __ bind(done); 889 __ jmp(*stub->continuation()); 890 } 891 892 #undef __ 893 894 #define __ sasm-> 895 896 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 897 __ prologue("shenandoah_pre_barrier", false); 898 // arg0 : previous value of memory 899 900 __ push(rax); 901 __ push(rdx); 902 903 const Register pre_val = rax; 904 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 905 const Register tmp = rdx; 906 907 NOT_LP64(__ get_thread(thread);) 908 909 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 910 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 911 912 Label done; 913 Label runtime; 914 915 // Is SATB still active? 916 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 917 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 918 __ jcc(Assembler::zero, done); 919 920 // Can we store original value in the thread's buffer? 921 922 __ movptr(tmp, queue_index); 923 __ testptr(tmp, tmp); 924 __ jcc(Assembler::zero, runtime); 925 __ subptr(tmp, wordSize); 926 __ movptr(queue_index, tmp); 927 __ addptr(tmp, buffer); 928 929 // prev_val (rax) 930 __ load_parameter(0, pre_val); 931 __ movptr(Address(tmp, 0), pre_val); 932 __ jmp(done); 933 934 __ bind(runtime); 935 936 __ save_live_registers_no_oop_map(true); 937 938 // load the pre-value 939 __ load_parameter(0, rcx); 940 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 941 942 __ restore_live_registers(true); 943 944 __ bind(done); 945 946 __ pop(rdx); 947 __ pop(rax); 948 949 __ epilogue(); 950 } 951 952 #undef __ 953 954 #endif // COMPILER1 955 956 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 957 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 958 return _shenandoah_lrb; 959 } 960 961 #define __ cgen->assembler()-> 962 963 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 964 __ align(CodeEntryAlignment); 965 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 966 address start = __ pc(); 967 968 Label resolve_oop, slow_path; 969 970 // We use RDI, which also serves as argument register for slow call. 971 // RAX always holds the src object ptr, except after the slow call and 972 // the cmpxchg, then it holds the result. R8/RBX is used as temporary register. 973 974 Register tmp1 = rdi; 975 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 976 977 __ push(tmp1); 978 __ push(tmp2); 979 980 // Check for object being in the collection set. 981 // TODO: Can we use only 1 register here? 982 // The source object arrives here in rax. 983 // live: rax 984 // live: tmp1 985 __ mov(tmp1, rax); 986 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 987 // live: tmp2 988 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 989 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 990 // unlive: tmp1 991 __ testbool(tmp2); 992 // unlive: tmp2 993 __ jccb(Assembler::notZero, resolve_oop); 994 995 __ pop(tmp2); 996 __ pop(tmp1); 997 __ ret(0); 998 999 __ bind(resolve_oop); 1000 1001 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 1002 // Test if both lowest bits are set. We trick it by negating the bits 1003 // then test for both bits clear. 1004 __ notptr(tmp2); 1005 __ testb(tmp2, markOopDesc::marked_value); 1006 __ jccb(Assembler::notZero, slow_path); 1007 // Clear both lower bits. It's still inverted, so set them, and then invert back. 1008 __ orptr(tmp2, markOopDesc::marked_value); 1009 __ notptr(tmp2); 1010 // At this point, tmp2 contains the decoded forwarding pointer. 1011 __ mov(rax, tmp2); 1012 1013 __ pop(tmp2); 1014 __ pop(tmp1); 1015 __ ret(0); 1016 1017 __ bind(slow_path); 1018 1019 __ push(rcx); 1020 __ push(rdx); 1021 __ push(rdi); 1022 __ push(rsi); 1023 #ifdef _LP64 1024 __ push(r8); 1025 __ push(r9); 1026 __ push(r10); 1027 __ push(r11); 1028 __ push(r12); 1029 __ push(r13); 1030 __ push(r14); 1031 __ push(r15); 1032 #endif 1033 1034 save_vector_registers(cgen->assembler()); 1035 __ movptr(rdi, rax); 1036 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rdi); 1037 restore_vector_registers(cgen->assembler()); 1038 1039 #ifdef _LP64 1040 __ pop(r15); 1041 __ pop(r14); 1042 __ pop(r13); 1043 __ pop(r12); 1044 __ pop(r11); 1045 __ pop(r10); 1046 __ pop(r9); 1047 __ pop(r8); 1048 #endif 1049 __ pop(rsi); 1050 __ pop(rdi); 1051 __ pop(rdx); 1052 __ pop(rcx); 1053 1054 __ pop(tmp2); 1055 __ pop(tmp1); 1056 __ ret(0); 1057 1058 return start; 1059 } 1060 1061 #undef __ 1062 1063 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 1064 if (ShenandoahLoadRefBarrier) { 1065 int stub_code_size = 4096; 1066 ResourceMark rm; 1067 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 1068 CodeBuffer buf(bb); 1069 StubCodeGenerator cgen(&buf); 1070 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 1071 } 1072 }