1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. 7 * 8 * This code is distributed in the hope that it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 * version 2 for more details (a copy is included in the LICENSE file that 12 * accompanied this code). 13 * 14 * You should have received a copy of the GNU General Public License version 15 * 2 along with this work; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 19 * or visit www.oracle.com if you need additional information or have any 20 * questions. 21 * 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" 26 #include "gc/shenandoah/shenandoahForwarding.hpp" 27 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 28 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 29 #include "gc/shenandoah/shenandoahHeuristics.hpp" 30 #include "gc/shenandoah/shenandoahRuntime.hpp" 31 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 32 #include "interpreter/interpreter.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/thread.hpp" 36 #include "utilities/macros.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" 41 #endif 42 43 #define __ masm-> 44 45 address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; 46 47 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 48 Register src, Register dst, Register count) { 49 50 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; 51 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0; 52 bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops); 53 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 54 55 if (type == T_OBJECT || type == T_ARRAY) { 56 #ifdef _LP64 57 if (!checkcast) { 58 if (!obj_int) { 59 // Save count for barrier 60 __ movptr(r11, count); 61 } else if (disjoint) { 62 // Save dst in r11 in the disjoint case 63 __ movq(r11, dst); 64 } 65 } 66 #else 67 if (disjoint) { 68 __ mov(rdx, dst); // save 'to' 69 } 70 #endif 71 72 if (ShenandoahSATBBarrier && !dest_uninitialized) { 73 Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 74 assert_different_registers(dst, count, thread); // we don't care about src here? 75 #ifndef _LP64 76 __ push(thread); 77 __ get_thread(thread); 78 #endif 79 80 Label done; 81 // Short-circuit if count == 0. 82 __ testptr(count, count); 83 __ jcc(Assembler::zero, done); 84 85 // Avoid runtime call when not marking. 86 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 87 __ testb(gc_state, ShenandoahHeap::MARKING); 88 __ jcc(Assembler::zero, done); 89 90 __ pusha(); // push registers 91 #ifdef _LP64 92 if (count == c_rarg0) { 93 if (dst == c_rarg1) { 94 // exactly backwards!! 95 __ xchgptr(c_rarg1, c_rarg0); 96 } else { 97 __ movptr(c_rarg1, count); 98 __ movptr(c_rarg0, dst); 99 } 100 } else { 101 __ movptr(c_rarg0, dst); 102 __ movptr(c_rarg1, count); 103 } 104 if (UseCompressedOops) { 105 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), 2); 106 } else { 107 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), 2); 108 } 109 #else 110 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), 111 dst, count); 112 #endif 113 __ popa(); 114 __ bind(done); 115 NOT_LP64(__ pop(thread);) 116 } 117 } 118 119 } 120 121 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 122 Register src, Register dst, Register count) { 123 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; 124 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0; 125 bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops); 126 Register tmp = rax; 127 128 if (type == T_OBJECT || type == T_ARRAY) { 129 #ifdef _LP64 130 if (!checkcast) { 131 if (!obj_int) { 132 // Save count for barrier 133 count = r11; 134 } else if (disjoint && obj_int) { 135 // Use the saved dst in the disjoint case 136 dst = r11; 137 } 138 } else { 139 tmp = rscratch1; 140 } 141 #else 142 if (disjoint) { 143 __ mov(dst, rdx); // restore 'to' 144 } 145 #endif 146 147 Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 148 assert_different_registers(dst, thread); // do we care about src at all here? 149 150 #ifndef _LP64 151 __ push(thread); 152 __ get_thread(thread); 153 #endif 154 155 // Short-circuit if count == 0. 156 Label done; 157 __ testptr(count, count); 158 __ jcc(Assembler::zero, done); 159 160 // Skip runtime call if no forwarded objects. 161 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 162 __ testb(gc_state, ShenandoahHeap::UPDATEREFS); 163 __ jcc(Assembler::zero, done); 164 165 __ pusha(); // push registers (overkill) 166 #ifdef _LP64 167 if (c_rarg0 == count) { // On win64 c_rarg0 == rcx 168 assert_different_registers(c_rarg1, dst); 169 __ mov(c_rarg1, count); 170 __ mov(c_rarg0, dst); 171 } else { 172 assert_different_registers(c_rarg0, count); 173 __ mov(c_rarg0, dst); 174 __ mov(c_rarg1, count); 175 } 176 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_post_entry), 2); 177 #else 178 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_post_entry), 179 dst, count); 180 #endif 181 __ popa(); 182 183 __ bind(done); 184 NOT_LP64(__ pop(thread);) 185 } 186 } 187 188 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, 189 Register obj, 190 Register pre_val, 191 Register thread, 192 Register tmp, 193 bool tosca_live, 194 bool expand_call) { 195 196 if (ShenandoahSATBBarrier) { 197 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); 198 } 199 } 200 201 void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, 202 Register obj, 203 Register pre_val, 204 Register thread, 205 Register tmp, 206 bool tosca_live, 207 bool expand_call) { 208 // If expand_call is true then we expand the call_VM_leaf macro 209 // directly to skip generating the check by 210 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 211 212 #ifdef _LP64 213 assert(thread == r15_thread, "must be"); 214 #endif // _LP64 215 216 Label done; 217 Label runtime; 218 219 assert(pre_val != noreg, "check this code"); 220 221 if (obj != noreg) { 222 assert_different_registers(obj, pre_val, tmp); 223 assert(pre_val != rax, "check this code"); 224 } 225 226 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); 227 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 228 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 229 230 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 231 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 232 __ jcc(Assembler::zero, done); 233 234 // Do we need to load the previous value? 235 if (obj != noreg) { 236 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 237 } 238 239 // Is the previous value null? 240 __ cmpptr(pre_val, (int32_t) NULL_WORD); 241 __ jcc(Assembler::equal, done); 242 243 // Can we store original value in the thread's buffer? 244 // Is index == 0? 245 // (The index field is typed as size_t.) 246 247 __ movptr(tmp, index); // tmp := *index_adr 248 __ cmpptr(tmp, 0); // tmp == 0? 249 __ jcc(Assembler::equal, runtime); // If yes, goto runtime 250 251 __ subptr(tmp, wordSize); // tmp := tmp - wordSize 252 __ movptr(index, tmp); // *index_adr := tmp 253 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr 254 255 // Record the previous value 256 __ movptr(Address(tmp, 0), pre_val); 257 __ jmp(done); 258 259 __ bind(runtime); 260 // save the live input values 261 if(tosca_live) __ push(rax); 262 263 if (obj != noreg && obj != rax) 264 __ push(obj); 265 266 if (pre_val != rax) 267 __ push(pre_val); 268 269 // Calling the runtime using the regular call_VM_leaf mechanism generates 270 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 271 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 272 // 273 // If we care generating the pre-barrier without a frame (e.g. in the 274 // intrinsified Reference.get() routine) then ebp might be pointing to 275 // the caller frame and so this check will most likely fail at runtime. 276 // 277 // Expanding the call directly bypasses the generation of the check. 278 // So when we do not have have a full interpreter frame on the stack 279 // expand_call should be passed true. 280 281 NOT_LP64( __ push(thread); ) 282 283 #ifdef _LP64 284 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should 285 // pre_val be c_rarg1 (where the call prologue would copy thread argument). 286 // Note: this should not accidentally smash thread, because thread is always r15. 287 assert(thread != c_rarg0, "smashed arg"); 288 if (c_rarg0 != pre_val) { 289 __ mov(c_rarg0, pre_val); 290 } 291 #endif 292 293 if (expand_call) { 294 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 295 #ifdef _LP64 296 if (c_rarg1 != thread) { 297 __ mov(c_rarg1, thread); 298 } 299 // Already moved pre_val into c_rarg0 above 300 #else 301 __ push(thread); 302 __ push(pre_val); 303 #endif 304 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2); 305 } else { 306 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); 307 } 308 309 NOT_LP64( __ pop(thread); ) 310 311 // save the live input values 312 if (pre_val != rax) 313 __ pop(pre_val); 314 315 if (obj != noreg && obj != rax) 316 __ pop(obj); 317 318 if(tosca_live) __ pop(rax); 319 320 __ bind(done); 321 } 322 323 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { 324 assert(ShenandoahCASBarrier, "should be enabled"); 325 Label is_null; 326 __ testptr(dst, dst); 327 __ jcc(Assembler::zero, is_null); 328 resolve_forward_pointer_not_null(masm, dst, tmp); 329 __ bind(is_null); 330 } 331 332 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { 333 assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled"); 334 // The below loads the mark word, checks if the lowest two bits are 335 // set, and if so, clear the lowest two bits and copy the result 336 // to dst. Otherwise it leaves dst alone. 337 // Implementing this is surprisingly awkward. I do it here by: 338 // - Inverting the mark word 339 // - Test lowest two bits == 0 340 // - If so, set the lowest two bits 341 // - Invert the result back, and copy to dst 342 343 bool borrow_reg = (tmp == noreg); 344 if (borrow_reg) { 345 // No free registers available. Make one useful. 346 tmp = LP64_ONLY(rscratch1) NOT_LP64(rdx); 347 __ push(tmp); 348 } 349 350 Label done; 351 __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); 352 __ notptr(tmp); 353 __ testb(tmp, markOopDesc::marked_value); 354 __ jccb(Assembler::notZero, done); 355 __ orptr(tmp, markOopDesc::marked_value); 356 __ notptr(tmp); 357 __ mov(dst, tmp); 358 __ bind(done); 359 360 if (borrow_reg) { 361 __ pop(tmp); 362 } 363 } 364 365 366 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) { 367 assert(ShenandoahLoadRefBarrier, "Should be enabled"); 368 369 Label done; 370 371 #ifdef _LP64 372 Register thread = r15_thread; 373 #else 374 Register thread = rcx; 375 if (thread == dst) { 376 thread = rbx; 377 } 378 __ push(thread); 379 __ get_thread(thread); 380 #endif 381 assert_different_registers(dst, thread); 382 383 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 384 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); 385 __ jccb(Assembler::zero, done); 386 387 if (dst != rax) { 388 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. 389 } 390 391 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); 392 393 if (dst != rax) { 394 __ xchgptr(rax, dst); // Swap back obj with rax. 395 } 396 397 __ bind(done); 398 399 #ifndef _LP64 400 __ pop(thread); 401 #endif 402 } 403 404 void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst) { 405 if (!ShenandoahLoadRefBarrier) { 406 return; 407 } 408 409 Label done; 410 Label not_null; 411 Label slow_path; 412 413 // null check 414 __ testptr(dst, dst); 415 __ jcc(Assembler::notZero, not_null); 416 __ jmp(done); 417 __ bind(not_null); 418 419 420 #ifdef _LP64 421 Register thread = r15_thread; 422 #else 423 Register thread = rcx; 424 if (thread == dst) { 425 thread = rbx; 426 } 427 __ push(thread); 428 __ get_thread(thread); 429 #endif 430 assert_different_registers(dst, thread); 431 432 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 433 __ testb(gc_state, ShenandoahHeap::EVACUATION); 434 #ifndef _LP64 435 __ pop(thread); 436 #endif 437 __ jccb(Assembler::notZero, slow_path); 438 __ jmp(done); 439 __ bind(slow_path); 440 441 if (dst != rax) { 442 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. 443 } 444 __ push(rcx); 445 __ push(rdx); 446 __ push(rdi); 447 __ push(rsi); 448 #ifdef _LP64 449 __ push(r8); 450 __ push(r9); 451 __ push(r10); 452 __ push(r11); 453 __ push(r12); 454 __ push(r13); 455 __ push(r14); 456 __ push(r15); 457 #endif 458 459 __ movptr(rdi, rax); 460 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), rdi); 461 462 #ifdef _LP64 463 __ pop(r15); 464 __ pop(r14); 465 __ pop(r13); 466 __ pop(r12); 467 __ pop(r11); 468 __ pop(r10); 469 __ pop(r9); 470 __ pop(r8); 471 #endif 472 __ pop(rsi); 473 __ pop(rdi); 474 __ pop(rdx); 475 __ pop(rcx); 476 477 if (dst != rax) { 478 __ xchgptr(rax, dst); // Swap back obj with rax. 479 } 480 481 __ bind(done); 482 } 483 484 void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { 485 if (ShenandoahStoreValEnqueueBarrier) { 486 storeval_barrier_impl(masm, dst, tmp); 487 } 488 } 489 490 void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) { 491 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled"); 492 493 if (dst == noreg) return; 494 495 if (ShenandoahStoreValEnqueueBarrier) { 496 // The set of registers to be saved+restored is the same as in the write-barrier above. 497 // Those are the commonly used registers in the interpreter. 498 __ pusha(); 499 // __ push_callee_saved_registers(); 500 __ subptr(rsp, 2 * Interpreter::stackElementSize); 501 __ movdbl(Address(rsp, 0), xmm0); 502 503 #ifdef _LP64 504 Register thread = r15_thread; 505 #else 506 Register thread = rcx; 507 if (thread == dst || thread == tmp) { 508 thread = rdi; 509 } 510 if (thread == dst || thread == tmp) { 511 thread = rbx; 512 } 513 __ get_thread(thread); 514 #endif 515 assert_different_registers(dst, tmp, thread); 516 517 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false); 518 __ movdbl(xmm0, Address(rsp, 0)); 519 __ addptr(rsp, 2 * Interpreter::stackElementSize); 520 //__ pop_callee_saved_registers(); 521 __ popa(); 522 } 523 } 524 525 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst) { 526 if (ShenandoahLoadRefBarrier) { 527 Label done; 528 __ testptr(dst, dst); 529 __ jcc(Assembler::zero, done); 530 load_reference_barrier_not_null(masm, dst); 531 __ bind(done); 532 } 533 } 534 535 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 536 Register dst, Address src, Register tmp1, Register tmp_thread) { 537 bool on_oop = type == T_OBJECT || type == T_ARRAY; 538 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 539 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 540 bool not_in_heap = (decorators & IN_NATIVE) != 0; 541 bool on_reference = on_weak || on_phantom; 542 bool keep_alive = (decorators & AS_NO_KEEPALIVE) == 0; 543 544 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); 545 if (on_oop) { 546 if (not_in_heap) { 547 if (ShenandoahHeap::heap()->is_traversal_mode()) { 548 load_reference_barrier(masm, dst); 549 keep_alive = true; 550 } else { 551 load_reference_barrier_native(masm, dst); 552 } 553 } else { 554 load_reference_barrier(masm, dst); 555 } 556 557 if (ShenandoahKeepAliveBarrier && on_reference && keep_alive) { 558 const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); 559 assert_different_registers(dst, tmp1, tmp_thread); 560 NOT_LP64(__ get_thread(thread)); 561 // Generate the SATB pre-barrier code to log the value of 562 // the referent field in an SATB buffer. 563 shenandoah_write_barrier_pre(masm /* masm */, 564 noreg /* obj */, 565 dst /* pre_val */, 566 thread /* thread */, 567 tmp1 /* tmp */, 568 true /* tosca_live */, 569 true /* expand_call */); 570 } 571 } 572 } 573 574 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 575 Address dst, Register val, Register tmp1, Register tmp2) { 576 577 bool on_oop = type == T_OBJECT || type == T_ARRAY; 578 bool in_heap = (decorators & IN_HEAP) != 0; 579 bool as_normal = (decorators & AS_NORMAL) != 0; 580 if (on_oop && in_heap) { 581 bool needs_pre_barrier = as_normal; 582 583 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); 584 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); 585 // flatten object address if needed 586 // We do it regardless of precise because we need the registers 587 if (dst.index() == noreg && dst.disp() == 0) { 588 if (dst.base() != tmp1) { 589 __ movptr(tmp1, dst.base()); 590 } 591 } else { 592 __ lea(tmp1, dst); 593 } 594 595 assert_different_registers(val, tmp1, tmp2, tmp3, rthread); 596 597 #ifndef _LP64 598 __ get_thread(rthread); 599 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm); 600 imasm->save_bcp(); 601 #endif 602 603 if (needs_pre_barrier) { 604 shenandoah_write_barrier_pre(masm /*masm*/, 605 tmp1 /* obj */, 606 tmp2 /* pre_val */, 607 rthread /* thread */, 608 tmp3 /* tmp */, 609 val != noreg /* tosca_live */, 610 false /* expand_call */); 611 } 612 if (val == noreg) { 613 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 614 } else { 615 storeval_barrier(masm, val, tmp3); 616 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); 617 } 618 NOT_LP64(imasm->restore_bcp()); 619 } else { 620 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); 621 } 622 } 623 624 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, 625 Register obj, Register tmp, Label& slowpath) { 626 Label done; 627 // Resolve jobject 628 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); 629 630 // Check for null. 631 __ testptr(obj, obj); 632 __ jcc(Assembler::zero, done); 633 634 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); 635 __ testb(gc_state, ShenandoahHeap::EVACUATION); 636 __ jccb(Assembler::notZero, slowpath); 637 __ bind(done); 638 } 639 640 // Special Shenandoah CAS implementation that handles false negatives 641 // due to concurrent evacuation. 642 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, 643 Register res, Address addr, Register oldval, Register newval, 644 bool exchange, Register tmp1, Register tmp2) { 645 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); 646 assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); 647 648 Label retry, done; 649 650 // Remember oldval for retry logic below 651 #ifdef _LP64 652 if (UseCompressedOops) { 653 __ movl(tmp1, oldval); 654 } else 655 #endif 656 { 657 __ movptr(tmp1, oldval); 658 } 659 660 // Step 1. Try to CAS with given arguments. If successful, then we are done, 661 // and can safely return. 662 if (os::is_MP()) __ lock(); 663 #ifdef _LP64 664 if (UseCompressedOops) { 665 __ cmpxchgl(newval, addr); 666 } else 667 #endif 668 { 669 __ cmpxchgptr(newval, addr); 670 } 671 __ jcc(Assembler::equal, done, true); 672 673 // Step 2. CAS had failed. This may be a false negative. 674 // 675 // The trouble comes when we compare the to-space pointer with the from-space 676 // pointer to the same object. To resolve this, it will suffice to resolve both 677 // oldval and the value from memory -- this will give both to-space pointers. 678 // If they mismatch, then it was a legitimate failure. 679 // 680 #ifdef _LP64 681 if (UseCompressedOops) { 682 __ decode_heap_oop(tmp1); 683 } 684 #endif 685 resolve_forward_pointer(masm, tmp1); 686 687 #ifdef _LP64 688 if (UseCompressedOops) { 689 __ movl(tmp2, oldval); 690 __ decode_heap_oop(tmp2); 691 } else 692 #endif 693 { 694 __ movptr(tmp2, oldval); 695 } 696 resolve_forward_pointer(masm, tmp2); 697 698 __ cmpptr(tmp1, tmp2); 699 __ jcc(Assembler::notEqual, done, true); 700 701 // Step 3. Try to CAS again with resolved to-space pointers. 702 // 703 // Corner case: it may happen that somebody stored the from-space pointer 704 // to memory while we were preparing for retry. Therefore, we can fail again 705 // on retry, and so need to do this in loop, always resolving the failure 706 // witness. 707 __ bind(retry); 708 if (os::is_MP()) __ lock(); 709 #ifdef _LP64 710 if (UseCompressedOops) { 711 __ cmpxchgl(newval, addr); 712 } else 713 #endif 714 { 715 __ cmpxchgptr(newval, addr); 716 } 717 __ jcc(Assembler::equal, done, true); 718 719 #ifdef _LP64 720 if (UseCompressedOops) { 721 __ movl(tmp2, oldval); 722 __ decode_heap_oop(tmp2); 723 } else 724 #endif 725 { 726 __ movptr(tmp2, oldval); 727 } 728 resolve_forward_pointer(masm, tmp2); 729 730 __ cmpptr(tmp1, tmp2); 731 __ jcc(Assembler::equal, retry, true); 732 733 // Step 4. If we need a boolean result out of CAS, check the flag again, 734 // and promote the result. Note that we handle the flag from both the CAS 735 // itself and from the retry loop. 736 __ bind(done); 737 if (!exchange) { 738 assert(res != NULL, "need result register"); 739 #ifdef _LP64 740 __ setb(Assembler::equal, res); 741 __ movzbl(res, res); 742 #else 743 // Need something else to clean the result, because some registers 744 // do not have byte encoding that movzbl wants. Cannot do the xor first, 745 // because it modifies the flags. 746 Label res_non_zero; 747 __ movptr(res, 1); 748 __ jcc(Assembler::equal, res_non_zero, true); 749 __ xorptr(res, res); 750 __ bind(res_non_zero); 751 #endif 752 } 753 } 754 755 void ShenandoahBarrierSetAssembler::save_vector_registers(MacroAssembler* masm) { 756 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8); 757 if (UseAVX > 2) { 758 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8); 759 } 760 761 if (UseSSE == 1) { 762 __ subptr(rsp, sizeof(jdouble)*8); 763 for (int n = 0; n < 8; n++) { 764 __ movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n)); 765 } 766 } else if (UseSSE >= 2) { 767 if (UseAVX > 2) { 768 __ push(rbx); 769 __ movl(rbx, 0xffff); 770 __ kmovwl(k1, rbx); 771 __ pop(rbx); 772 } 773 #ifdef COMPILER2 774 if (MaxVectorSize > 16) { 775 if(UseAVX > 2) { 776 // Save upper half of ZMM registers 777 __ subptr(rsp, 32*num_xmm_regs); 778 for (int n = 0; n < num_xmm_regs; n++) { 779 __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n)); 780 } 781 } 782 assert(UseAVX > 0, "256 bit vectors are supported only with AVX"); 783 // Save upper half of YMM registers 784 __ subptr(rsp, 16*num_xmm_regs); 785 for (int n = 0; n < num_xmm_regs; n++) { 786 __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n)); 787 } 788 } 789 #endif 790 // Save whole 128bit (16 bytes) XMM registers 791 __ subptr(rsp, 16*num_xmm_regs); 792 #ifdef _LP64 793 if (VM_Version::supports_evex()) { 794 for (int n = 0; n < num_xmm_regs; n++) { 795 __ vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0); 796 } 797 } else { 798 for (int n = 0; n < num_xmm_regs; n++) { 799 __ movdqu(Address(rsp, n*16), as_XMMRegister(n)); 800 } 801 } 802 #else 803 for (int n = 0; n < num_xmm_regs; n++) { 804 __ movdqu(Address(rsp, n*16), as_XMMRegister(n)); 805 } 806 #endif 807 } 808 } 809 810 void ShenandoahBarrierSetAssembler::restore_vector_registers(MacroAssembler* masm) { 811 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8); 812 if (UseAVX > 2) { 813 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8); 814 } 815 if (UseSSE == 1) { 816 for (int n = 0; n < 8; n++) { 817 __ movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble))); 818 } 819 __ addptr(rsp, sizeof(jdouble)*8); 820 } else if (UseSSE >= 2) { 821 // Restore whole 128bit (16 bytes) XMM registers 822 #ifdef _LP64 823 if (VM_Version::supports_evex()) { 824 for (int n = 0; n < num_xmm_regs; n++) { 825 __ vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0); 826 } 827 } else { 828 for (int n = 0; n < num_xmm_regs; n++) { 829 __ movdqu(as_XMMRegister(n), Address(rsp, n*16)); 830 } 831 } 832 #else 833 for (int n = 0; n < num_xmm_regs; n++) { 834 __ movdqu(as_XMMRegister(n), Address(rsp, n*16)); 835 } 836 #endif 837 __ addptr(rsp, 16*num_xmm_regs); 838 839 #ifdef COMPILER2 840 if (MaxVectorSize > 16) { 841 // Restore upper half of YMM registers. 842 for (int n = 0; n < num_xmm_regs; n++) { 843 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16)); 844 } 845 __ addptr(rsp, 16*num_xmm_regs); 846 if (UseAVX > 2) { 847 for (int n = 0; n < num_xmm_regs; n++) { 848 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32)); 849 } 850 __ addptr(rsp, 32*num_xmm_regs); 851 } 852 } 853 #endif 854 } 855 } 856 857 #undef __ 858 859 #ifdef COMPILER1 860 861 #define __ ce->masm()-> 862 863 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { 864 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 865 // At this point we know that marking is in progress. 866 // If do_load() is true then we have to emit the 867 // load of the previous value; otherwise it has already 868 // been loaded into _pre_val. 869 870 __ bind(*stub->entry()); 871 assert(stub->pre_val()->is_register(), "Precondition."); 872 873 Register pre_val_reg = stub->pre_val()->as_register(); 874 875 if (stub->do_load()) { 876 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); 877 } 878 879 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); 880 __ jcc(Assembler::equal, *stub->continuation()); 881 ce->store_parameter(stub->pre_val()->as_register(), 0); 882 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 883 __ jmp(*stub->continuation()); 884 885 } 886 887 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { 888 __ bind(*stub->entry()); 889 890 Label done; 891 Register obj = stub->obj()->as_register(); 892 Register res = stub->result()->as_register(); 893 894 if (res != obj) { 895 __ mov(res, obj); 896 } 897 898 // Check for null. 899 __ testptr(res, res); 900 __ jcc(Assembler::zero, done); 901 902 load_reference_barrier_not_null(ce->masm(), res); 903 904 __ bind(done); 905 __ jmp(*stub->continuation()); 906 } 907 908 #undef __ 909 910 #define __ sasm-> 911 912 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 913 __ prologue("shenandoah_pre_barrier", false); 914 // arg0 : previous value of memory 915 916 __ push(rax); 917 __ push(rdx); 918 919 const Register pre_val = rax; 920 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); 921 const Register tmp = rdx; 922 923 NOT_LP64(__ get_thread(thread);) 924 925 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); 926 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); 927 928 Label done; 929 Label runtime; 930 931 // Is SATB still active? 932 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); 933 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL); 934 __ jcc(Assembler::zero, done); 935 936 // Can we store original value in the thread's buffer? 937 938 __ movptr(tmp, queue_index); 939 __ testptr(tmp, tmp); 940 __ jcc(Assembler::zero, runtime); 941 __ subptr(tmp, wordSize); 942 __ movptr(queue_index, tmp); 943 __ addptr(tmp, buffer); 944 945 // prev_val (rax) 946 __ load_parameter(0, pre_val); 947 __ movptr(Address(tmp, 0), pre_val); 948 __ jmp(done); 949 950 __ bind(runtime); 951 952 __ save_live_registers_no_oop_map(true); 953 954 // load the pre-value 955 __ load_parameter(0, rcx); 956 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread); 957 958 __ restore_live_registers(true); 959 960 __ bind(done); 961 962 __ pop(rdx); 963 __ pop(rax); 964 965 __ epilogue(); 966 } 967 968 #undef __ 969 970 #endif // COMPILER1 971 972 address ShenandoahBarrierSetAssembler::shenandoah_lrb() { 973 assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); 974 return _shenandoah_lrb; 975 } 976 977 #define __ cgen->assembler()-> 978 979 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { 980 __ align(CodeEntryAlignment); 981 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); 982 address start = __ pc(); 983 984 Label resolve_oop, slow_path; 985 986 // We use RDI, which also serves as argument register for slow call. 987 // RAX always holds the src object ptr, except after the slow call and 988 // the cmpxchg, then it holds the result. R8/RBX is used as temporary register. 989 990 Register tmp1 = rdi; 991 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); 992 993 __ push(tmp1); 994 __ push(tmp2); 995 996 // Check for object being in the collection set. 997 // TODO: Can we use only 1 register here? 998 // The source object arrives here in rax. 999 // live: rax 1000 // live: tmp1 1001 __ mov(tmp1, rax); 1002 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); 1003 // live: tmp2 1004 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); 1005 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); 1006 // unlive: tmp1 1007 __ testbool(tmp2); 1008 // unlive: tmp2 1009 __ jccb(Assembler::notZero, resolve_oop); 1010 1011 __ pop(tmp2); 1012 __ pop(tmp1); 1013 __ ret(0); 1014 1015 __ bind(resolve_oop); 1016 1017 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); 1018 // Test if both lowest bits are set. We trick it by negating the bits 1019 // then test for both bits clear. 1020 __ notptr(tmp2); 1021 __ testb(tmp2, markOopDesc::marked_value); 1022 __ jccb(Assembler::notZero, slow_path); 1023 // Clear both lower bits. It's still inverted, so set them, and then invert back. 1024 __ orptr(tmp2, markOopDesc::marked_value); 1025 __ notptr(tmp2); 1026 // At this point, tmp2 contains the decoded forwarding pointer. 1027 __ mov(rax, tmp2); 1028 1029 __ pop(tmp2); 1030 __ pop(tmp1); 1031 __ ret(0); 1032 1033 __ bind(slow_path); 1034 1035 __ push(rcx); 1036 __ push(rdx); 1037 __ push(rdi); 1038 __ push(rsi); 1039 #ifdef _LP64 1040 __ push(r8); 1041 __ push(r9); 1042 __ push(r10); 1043 __ push(r11); 1044 __ push(r12); 1045 __ push(r13); 1046 __ push(r14); 1047 __ push(r15); 1048 #endif 1049 1050 save_vector_registers(cgen->assembler()); 1051 __ movptr(rdi, rax); 1052 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rdi); 1053 restore_vector_registers(cgen->assembler()); 1054 1055 #ifdef _LP64 1056 __ pop(r15); 1057 __ pop(r14); 1058 __ pop(r13); 1059 __ pop(r12); 1060 __ pop(r11); 1061 __ pop(r10); 1062 __ pop(r9); 1063 __ pop(r8); 1064 #endif 1065 __ pop(rsi); 1066 __ pop(rdi); 1067 __ pop(rdx); 1068 __ pop(rcx); 1069 1070 __ pop(tmp2); 1071 __ pop(tmp1); 1072 __ ret(0); 1073 1074 return start; 1075 } 1076 1077 #undef __ 1078 1079 void ShenandoahBarrierSetAssembler::barrier_stubs_init() { 1080 if (ShenandoahLoadRefBarrier) { 1081 int stub_code_size = 4096; 1082 ResourceMark rm; 1083 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); 1084 CodeBuffer buf(bb); 1085 StubCodeGenerator cgen(&buf); 1086 _shenandoah_lrb = generate_shenandoah_lrb(&cgen); 1087 } 1088 }