1 /*
2 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
279 mov(L5, G4);
280 restore(O0, 0, G2_thread);
281 }
282
283 static Thread* verify_thread_subroutine(Thread* gthread_value) {
284 Thread* correct_value = Thread::current();
285 guarantee(gthread_value == correct_value, "G2_thread value must be the thread");
286 return correct_value;
287 }
288
289 void MacroAssembler::verify_thread() {
290 if (VerifyThread) {
291 // NOTE: this chops off the heads of the 64-bit O registers.
292 // make sure G2_thread contains the right value
293 save_frame_and_mov(0, Lmethod, Lmethod); // to avoid clobbering O0 (and propagate Lmethod for -Xprof)
294 mov(G1, L1); // avoid clobbering G1
295 // G2 saved below
296 mov(G3, L3); // avoid clobbering G3
297 mov(G4, L4); // avoid clobbering G4
298 mov(G5_method, L5); // avoid clobbering G5_method
299 #if defined(COMPILER2) && !defined(_LP64)
300 // Save & restore possible 64-bit Long arguments in G-regs
301 srlx(G1,32,L0);
302 srlx(G4,32,L6);
303 #endif
304 call(CAST_FROM_FN_PTR(address,verify_thread_subroutine), relocInfo::runtime_call_type);
305 delayed()->mov(G2_thread, O0);
306
307 mov(L1, G1); // Restore G1
308 // G2 restored below
309 mov(L3, G3); // restore G3
310 mov(L4, G4); // restore G4
311 mov(L5, G5_method); // restore G5_method
312 #if defined(COMPILER2) && !defined(_LP64)
313 // Save & restore possible 64-bit Long arguments in G-regs
314 sllx(L0,32,G2); // Move old high G1 bits high in G2
315 srl(G1, 0,G1); // Clear current high G1 bits
316 or3 (G1,G2,G1); // Recover 64-bit G1
317 sllx(L6,32,G2); // Move old high G4 bits high in G2
318 srl(G4, 0,G4); // Clear current high G4 bits
319 or3 (G4,G2,G4); // Recover 64-bit G4
320 #endif
321 restore(O0, 0, G2_thread);
322 }
323 }
324
325
326 void MacroAssembler::save_thread(const Register thread_cache) {
327 verify_thread();
328 if (thread_cache->is_valid()) {
329 assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile");
330 mov(G2_thread, thread_cache);
331 }
332 if (VerifyThread) {
333 // smash G2_thread, as if the VM were about to anyway
334 set(0x67676767, G2_thread);
335 }
336 }
337
338
339 void MacroAssembler::restore_thread(const Register thread_cache) {
340 if (thread_cache->is_valid()) {
370
371 // Verify that flags was zeroed on return to Java
372 Label FlagsOk;
373 ld(flags, L0);
374 tst(L0);
375 br(Assembler::zero, false, Assembler::pt, FlagsOk);
376 delayed() -> restore();
377 STOP("flags not zeroed before leaving Java");
378 bind(FlagsOk);
379 #endif /* ASSERT */
380 //
381 // When returning from calling out from Java mode the frame anchor's last_Java_pc
382 // will always be set to NULL. It is set here so that if we are doing a call to
383 // native (not VM) that we capture the known pc and don't have to rely on the
384 // native call having a standard frame linkage where we can find the pc.
385
386 if (last_Java_pc->is_valid()) {
387 st_ptr(last_Java_pc, pc_addr);
388 }
389
390 #ifdef _LP64
391 #ifdef ASSERT
392 // Make sure that we have an odd stack
393 Label StackOk;
394 andcc(last_java_sp, 0x01, G0);
395 br(Assembler::notZero, false, Assembler::pt, StackOk);
396 delayed()->nop();
397 STOP("Stack Not Biased in set_last_Java_frame");
398 bind(StackOk);
399 #endif // ASSERT
400 assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
401 add( last_java_sp, STACK_BIAS, G4_scratch );
402 st_ptr(G4_scratch, G2_thread, JavaThread::last_Java_sp_offset());
403 #else
404 st_ptr(last_java_sp, G2_thread, JavaThread::last_Java_sp_offset());
405 #endif // _LP64
406 }
407
408 void MacroAssembler::reset_last_Java_frame(void) {
409 assert_not_delayed();
410
411 Address sp_addr(G2_thread, JavaThread::last_Java_sp_offset());
412 Address pc_addr(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
413 Address flags (G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
414
415 #ifdef ASSERT
416 // check that it WAS previously set
417 save_frame_and_mov(0, Lmethod, Lmethod); // Propagate Lmethod to helper frame for -Xprof
418 ld_ptr(sp_addr, L0);
419 tst(L0);
420 breakpoint_trap(Assembler::zero, Assembler::ptr_cc);
421 restore();
422 #endif // ASSERT
423
424 st_ptr(G0, sp_addr);
425 // Always return last_Java_pc to zero
641 breakpoint_trap(notZero, Assembler::ptr_cc);
642 // }
643 # endif
644
645 st_ptr(oop_result, vm_result_addr);
646 }
647
648
649 void MacroAssembler::ic_call(address entry, bool emit_delay, jint method_index) {
650 RelocationHolder rspec = virtual_call_Relocation::spec(pc(), method_index);
651 patchable_set((intptr_t)Universe::non_oop_word(), G5_inline_cache_reg);
652 relocate(rspec);
653 call(entry, relocInfo::none);
654 if (emit_delay) {
655 delayed()->nop();
656 }
657 }
658
659 void MacroAssembler::card_table_write(jbyte* byte_map_base,
660 Register tmp, Register obj) {
661 #ifdef _LP64
662 srlx(obj, CardTableModRefBS::card_shift, obj);
663 #else
664 srl(obj, CardTableModRefBS::card_shift, obj);
665 #endif
666 assert(tmp != obj, "need separate temp reg");
667 set((address) byte_map_base, tmp);
668 stb(G0, tmp, obj);
669 }
670
671
672 void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
673 address save_pc;
674 int shiftcnt;
675 #ifdef _LP64
676 # ifdef CHECK_DELAY
677 assert_not_delayed((char*) "cannot put two instructions in delay slot");
678 # endif
679 v9_dep();
680 save_pc = pc();
681
682 int msb32 = (int) (addrlit.value() >> 32);
683 int lsb32 = (int) (addrlit.value());
684
685 if (msb32 == 0 && lsb32 >= 0) {
686 Assembler::sethi(lsb32, d, addrlit.rspec());
687 }
688 else if (msb32 == -1) {
689 Assembler::sethi(~lsb32, d, addrlit.rspec());
690 xor3(d, ~low10(~0), d);
691 }
692 else {
693 Assembler::sethi(msb32, d, addrlit.rspec()); // msb 22-bits
694 if (msb32 & 0x3ff) // Any bits?
695 or3(d, msb32 & 0x3ff, d); // msb 32-bits are now in lsb 32
702 else
703 shiftcnt = 12;
704 if ((lsb32 >> 10) & 0x3ff) {
705 sllx(d, shiftcnt + 10, d); // Make room for last 10 bits
706 or3(d, (lsb32 >> 10) & 0x3ff, d); // Or in next 10
707 shiftcnt = 0;
708 }
709 else
710 shiftcnt = 10;
711 sllx(d, shiftcnt + 10, d); // Shift leaving disp field 0'd
712 }
713 else
714 sllx(d, 32, d);
715 }
716 // Pad out the instruction sequence so it can be patched later.
717 if (ForceRelocatable || (addrlit.rtype() != relocInfo::none &&
718 addrlit.rtype() != relocInfo::runtime_call_type)) {
719 while (pc() < (save_pc + (7 * BytesPerInstWord)))
720 nop();
721 }
722 #else
723 Assembler::sethi(addrlit.value(), d, addrlit.rspec());
724 #endif
725 }
726
727
728 void MacroAssembler::sethi(const AddressLiteral& addrlit, Register d) {
729 internal_sethi(addrlit, d, false);
730 }
731
732
733 void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d) {
734 internal_sethi(addrlit, d, true);
735 }
736
737
738 int MacroAssembler::insts_for_sethi(address a, bool worst_case) {
739 #ifdef _LP64
740 if (worst_case) return 7;
741 intptr_t iaddr = (intptr_t) a;
742 int msb32 = (int) (iaddr >> 32);
743 int lsb32 = (int) (iaddr);
744 int count;
745 if (msb32 == 0 && lsb32 >= 0)
746 count = 1;
747 else if (msb32 == -1)
748 count = 2;
749 else {
750 count = 2;
751 if (msb32 & 0x3ff)
752 count++;
753 if (lsb32 & 0xFFFFFC00 ) {
754 if ((lsb32 >> 20) & 0xfff) count += 2;
755 if ((lsb32 >> 10) & 0x3ff) count += 2;
756 }
757 }
758 return count;
759 #else
760 return 1;
761 #endif
762 }
763
764 int MacroAssembler::worst_case_insts_for_set() {
765 return insts_for_sethi(NULL, true) + 1;
766 }
767
768
769 // Keep in sync with MacroAssembler::insts_for_internal_set
770 void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
771 intptr_t value = addrlit.value();
772
773 if (!ForceRelocatable && addrlit.rspec().type() == relocInfo::none) {
774 // can optimize
775 if (-4096 <= value && value <= 4095) {
776 or3(G0, value, d); // setsw (this leaves upper 32 bits sign-extended)
777 return;
778 }
779 if (inv_hi22(hi22(value)) == value) {
780 sethi(addrlit, d);
781 return;
1471 BREAKPOINT;
1472 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
1473 }
1474 else {
1475 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1476 }
1477 assert(false, "DEBUG MESSAGE: %s", msg);
1478 }
1479
1480
1481 void MacroAssembler::calc_mem_param_words(Register Rparam_words, Register Rresult) {
1482 subcc( Rparam_words, Argument::n_register_parameters, Rresult); // how many mem words?
1483 Label no_extras;
1484 br( negative, true, pt, no_extras ); // if neg, clear reg
1485 delayed()->set(0, Rresult); // annuled, so only if taken
1486 bind( no_extras );
1487 }
1488
1489
1490 void MacroAssembler::calc_frame_size(Register Rextra_words, Register Rresult) {
1491 #ifdef _LP64
1492 add(Rextra_words, frame::memory_parameter_word_sp_offset, Rresult);
1493 #else
1494 add(Rextra_words, frame::memory_parameter_word_sp_offset + 1, Rresult);
1495 #endif
1496 bclr(1, Rresult);
1497 sll(Rresult, LogBytesPerWord, Rresult); // Rresult has total frame bytes
1498 }
1499
1500
1501 void MacroAssembler::calc_frame_size_and_save(Register Rextra_words, Register Rresult) {
1502 calc_frame_size(Rextra_words, Rresult);
1503 neg(Rresult);
1504 save(SP, Rresult, SP);
1505 }
1506
1507
1508 // ---------------------------------------------------------
1509 Assembler::RCondition cond2rcond(Assembler::Condition c) {
1510 switch (c) {
1511 /*case zero: */
1512 case Assembler::equal: return Assembler::rc_z;
1513 case Assembler::lessEqual: return Assembler::rc_lez;
1514 case Assembler::less: return Assembler::rc_lz;
1515 /*case notZero:*/
1516 case Assembler::notEqual: return Assembler::rc_nz;
1517 case Assembler::greater: return Assembler::rc_gz;
1518 case Assembler::greaterEqual: return Assembler::rc_gez;
1519 }
1520 ShouldNotReachHere();
1521 return Assembler::rc_z;
1522 }
1523
1524 // compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS
1525 void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {
1526 tst(s1);
1527 br (c, a, p, L);
1528 }
1529
1530 // Compares a pointer register with zero and branches on null.
1531 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
1532 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) {
1533 assert_not_delayed();
1534 #ifdef _LP64
1535 bpr( rc_z, a, p, s1, L );
1536 #else
1537 tst(s1);
1538 br ( zero, a, p, L );
1539 #endif
1540 }
1541
1542 void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) {
1543 assert_not_delayed();
1544 #ifdef _LP64
1545 bpr( rc_nz, a, p, s1, L );
1546 #else
1547 tst(s1);
1548 br ( notZero, a, p, L );
1549 #endif
1550 }
1551
1552 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
1553
1554 // Compare integer (32 bit) values (icc only).
1555 void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
1556 Predict p, Label& L) {
1557 assert_not_delayed();
1558 if (use_cbcond(L)) {
1559 Assembler::cbcond(c, icc, s1, s2, L);
1560 } else {
1561 cmp(s1, s2);
1562 br(c, false, p, L);
1563 delayed()->nop();
1564 }
1565 }
1566
1567 // Compare integer (32 bit) values (icc only).
1568 void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
1569 Predict p, Label& L) {
1845 // Rin and Rout are the same and should not be reversed.
1846
1847 sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
1848 srl(Rin_high, Rcount, Rout_high ); // high half
1849 sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
1850 if (Rcount == Rout_low) {
1851 srl(Rin_low, Rcount, Rout_low);
1852 }
1853 ba(done);
1854 delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
1855
1856 // shift >= 32 bits, Ralt_count = Rcount-32
1857 bind(big_shift);
1858
1859 srl(Rin_high, Ralt_count, Rout_low);
1860 clr(Rout_high);
1861
1862 bind( done );
1863 }
1864
1865 #ifdef _LP64
1866 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
1867 cmp(Ra, Rb);
1868 mov(-1, Rresult);
1869 movcc(equal, false, xcc, 0, Rresult);
1870 movcc(greater, false, xcc, 1, Rresult);
1871 }
1872 #endif
1873
1874
1875 void MacroAssembler::load_sized_value(Address src, Register dst, size_t size_in_bytes, bool is_signed) {
1876 switch (size_in_bytes) {
1877 case 8: ld_long(src, dst); break;
1878 case 4: ld( src, dst); break;
1879 case 2: is_signed ? ldsh(src, dst) : lduh(src, dst); break;
1880 case 1: is_signed ? ldsb(src, dst) : ldub(src, dst); break;
1881 default: ShouldNotReachHere();
1882 }
1883 }
1884
1885 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
1886 switch (size_in_bytes) {
1887 case 8: st_long(src, dst); break;
1888 case 4: st( src, dst); break;
1889 case 2: sth( src, dst); break;
1890 case 1: stb( src, dst); break;
1891 default: ShouldNotReachHere();
1892 }
2651 if (try_bias) {
2652 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
2653 }
2654
2655 // Save Rbox in Rscratch to be used for the cas operation
2656 mov(Rbox, Rscratch);
2657
2658 // set Rmark to markOop | markOopDesc::unlocked_value
2659 or3(Rmark, markOopDesc::unlocked_value, Rmark);
2660
2661 // Initialize the box. (Must happen before we update the object mark!)
2662 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2663
2664 // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
2665 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2666 cas_ptr(mark_addr.base(), Rmark, Rscratch);
2667
2668 // if compare/exchange succeeded we found an unlocked object and we now have locked it
2669 // hence we are done
2670 cmp(Rmark, Rscratch);
2671 #ifdef _LP64
2672 sub(Rscratch, STACK_BIAS, Rscratch);
2673 #endif
2674 brx(Assembler::equal, false, Assembler::pt, done);
2675 delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot
2676
2677 // we did not find an unlocked object so see if this is a recursive case
2678 // sub(Rscratch, SP, Rscratch);
2679 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2680 andcc(Rscratch, 0xfffff003, Rscratch);
2681 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2682 bind (done);
2683 return ;
2684 }
2685
2686 Label Egress ;
2687
2688 if (EmitSync & 256) {
2689 Label IsInflated ;
2690
2691 ld_ptr(mark_addr, Rmark); // fetch obj->mark
2692 // Triage: biased, stack-locked, neutral, inflated
2693 if (try_bias) {
2699 // Store mark into displaced mark field in the on-stack basic-lock "box"
2700 // Critically, this must happen before the CAS
2701 // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
2702 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2703 andcc(Rmark, 2, G0);
2704 brx(Assembler::notZero, false, Assembler::pn, IsInflated);
2705 delayed()->
2706
2707 // Try stack-lock acquisition.
2708 // Beware: the 1st instruction is in a delay slot
2709 mov(Rbox, Rscratch);
2710 or3(Rmark, markOopDesc::unlocked_value, Rmark);
2711 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2712 cas_ptr(mark_addr.base(), Rmark, Rscratch);
2713 cmp(Rmark, Rscratch);
2714 brx(Assembler::equal, false, Assembler::pt, done);
2715 delayed()->sub(Rscratch, SP, Rscratch);
2716
2717 // Stack-lock attempt failed - check for recursive stack-lock.
2718 // See the comments below about how we might remove this case.
2719 #ifdef _LP64
2720 sub(Rscratch, STACK_BIAS, Rscratch);
2721 #endif
2722 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2723 andcc(Rscratch, 0xfffff003, Rscratch);
2724 br(Assembler::always, false, Assembler::pt, done);
2725 delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2726
2727 bind(IsInflated);
2728 if (EmitSync & 64) {
2729 // If m->owner != null goto IsLocked
2730 // Pessimistic form: Test-and-CAS vs CAS
2731 // The optimistic form avoids RTS->RTO cache line upgrades.
2732 ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
2733 andcc(Rscratch, Rscratch, G0);
2734 brx(Assembler::notZero, false, Assembler::pn, done);
2735 delayed()->nop();
2736 // m->owner == null : it's unlocked.
2737 }
2738
2739 // Try to CAS m->owner from null to Self
2740 // Invariant: if we acquire the lock then _recursions should be 0.
2741 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
2783 }
2784 ba(done);
2785 delayed()->st_ptr(Rbox, mark_addr);
2786
2787 bind(Recursive);
2788 // Stack-lock attempt failed - check for recursive stack-lock.
2789 // Tests show that we can remove the recursive case with no impact
2790 // on refworkload 0.83. If we need to reduce the size of the code
2791 // emitted by compiler_lock_object() the recursive case is perfect
2792 // candidate.
2793 //
2794 // A more extreme idea is to always inflate on stack-lock recursion.
2795 // This lets us eliminate the recursive checks in compiler_lock_object
2796 // and compiler_unlock_object and the (box->dhw == 0) encoding.
2797 // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
2798 // and showed a performance *increase*. In the same experiment I eliminated
2799 // the fast-path stack-lock code from the interpreter and always passed
2800 // control to the "slow" operators in synchronizer.cpp.
2801
2802 // RScratch contains the fetched obj->mark value from the failed CAS.
2803 #ifdef _LP64
2804 sub(Rscratch, STACK_BIAS, Rscratch);
2805 #endif
2806 sub(Rscratch, SP, Rscratch);
2807 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2808 andcc(Rscratch, 0xfffff003, Rscratch);
2809 if (counters != NULL) {
2810 // Accounting needs the Rscratch register
2811 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2812 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
2813 ba_short(done);
2814 } else {
2815 ba(done);
2816 delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2817 }
2818
2819 bind (IsInflated);
2820
2821 // Try to CAS m->owner from null to Self
2822 // Invariant: if we acquire the lock then _recursions should be 0.
2823 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
2824 mov(G2_thread, Rscratch);
2825 cas_ptr(Rmark, G0, Rscratch);
3703 delayed()->mov(pre_val->after_save(), O0);
3704 restore();
3705 }
3706
3707 bind(filtered);
3708 }
3709
3710 static address dirty_card_log_enqueue = 0;
3711 static u_char* dirty_card_log_enqueue_end = 0;
3712
3713 // This gets to assume that o0 contains the object address.
3714 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
3715 BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
3716 CodeBuffer buf(bb);
3717 MacroAssembler masm(&buf);
3718 #define __ masm.
3719 address start = __ pc();
3720
3721 Label not_already_dirty, restart, refill, young_card;
3722
3723 #ifdef _LP64
3724 __ srlx(O0, CardTableModRefBS::card_shift, O0);
3725 #else
3726 __ srl(O0, CardTableModRefBS::card_shift, O0);
3727 #endif
3728 AddressLiteral addrlit(byte_map_base);
3729 __ set(addrlit, O1); // O1 := <card table base>
3730 __ ldub(O0, O1, O2); // O2 := [O0 + O1]
3731
3732 __ cmp_and_br_short(O2, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
3733
3734 __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
3735 __ ldub(O0, O1, O2); // O2 := [O0 + O1]
3736
3737 assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
3738 __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
3739
3740 __ bind(young_card);
3741 // We didn't take the branch, so we're already dirty: return.
3742 // Use return-from-leaf
3743 __ retl();
3744 __ delayed()->nop();
3745
3746 // Not dirty.
3747 __ bind(not_already_dirty);
3809 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
3810 if (dirty_card_log_enqueue == 0) {
3811 generate_dirty_card_log_enqueue(byte_map_base);
3812 assert(dirty_card_log_enqueue != 0, "postcondition.");
3813 }
3814 }
3815
3816
3817 void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
3818
3819 Label filtered;
3820 MacroAssembler* post_filter_masm = this;
3821
3822 if (new_val == G0) return;
3823
3824 G1SATBCardTableLoggingModRefBS* bs =
3825 barrier_set_cast<G1SATBCardTableLoggingModRefBS>(Universe::heap()->barrier_set());
3826
3827 if (G1RSBarrierRegionFilter) {
3828 xor3(store_addr, new_val, tmp);
3829 #ifdef _LP64
3830 srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
3831 #else
3832 srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
3833 #endif
3834
3835 // XXX Should I predict this taken or not? Does it matter?
3836 cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
3837 }
3838
3839 // If the "store_addr" register is an "in" or "local" register, move it to
3840 // a scratch reg so we can pass it as an argument.
3841 bool use_scr = !(store_addr->is_global() || store_addr->is_out());
3842 // Pick a scratch register different from "tmp".
3843 Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
3844 // Make sure we use up the delay slot!
3845 if (use_scr) {
3846 post_filter_masm->mov(store_addr, scr);
3847 } else {
3848 post_filter_masm->nop();
3849 }
3850 generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
3851 save_frame(0);
3852 call(dirty_card_log_enqueue);
3853 if (use_scr) {
|
1 /*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
279 mov(L5, G4);
280 restore(O0, 0, G2_thread);
281 }
282
283 static Thread* verify_thread_subroutine(Thread* gthread_value) {
284 Thread* correct_value = Thread::current();
285 guarantee(gthread_value == correct_value, "G2_thread value must be the thread");
286 return correct_value;
287 }
288
289 void MacroAssembler::verify_thread() {
290 if (VerifyThread) {
291 // NOTE: this chops off the heads of the 64-bit O registers.
292 // make sure G2_thread contains the right value
293 save_frame_and_mov(0, Lmethod, Lmethod); // to avoid clobbering O0 (and propagate Lmethod for -Xprof)
294 mov(G1, L1); // avoid clobbering G1
295 // G2 saved below
296 mov(G3, L3); // avoid clobbering G3
297 mov(G4, L4); // avoid clobbering G4
298 mov(G5_method, L5); // avoid clobbering G5_method
299 call(CAST_FROM_FN_PTR(address,verify_thread_subroutine), relocInfo::runtime_call_type);
300 delayed()->mov(G2_thread, O0);
301
302 mov(L1, G1); // Restore G1
303 // G2 restored below
304 mov(L3, G3); // restore G3
305 mov(L4, G4); // restore G4
306 mov(L5, G5_method); // restore G5_method
307 restore(O0, 0, G2_thread);
308 }
309 }
310
311
312 void MacroAssembler::save_thread(const Register thread_cache) {
313 verify_thread();
314 if (thread_cache->is_valid()) {
315 assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile");
316 mov(G2_thread, thread_cache);
317 }
318 if (VerifyThread) {
319 // smash G2_thread, as if the VM were about to anyway
320 set(0x67676767, G2_thread);
321 }
322 }
323
324
325 void MacroAssembler::restore_thread(const Register thread_cache) {
326 if (thread_cache->is_valid()) {
356
357 // Verify that flags was zeroed on return to Java
358 Label FlagsOk;
359 ld(flags, L0);
360 tst(L0);
361 br(Assembler::zero, false, Assembler::pt, FlagsOk);
362 delayed() -> restore();
363 STOP("flags not zeroed before leaving Java");
364 bind(FlagsOk);
365 #endif /* ASSERT */
366 //
367 // When returning from calling out from Java mode the frame anchor's last_Java_pc
368 // will always be set to NULL. It is set here so that if we are doing a call to
369 // native (not VM) that we capture the known pc and don't have to rely on the
370 // native call having a standard frame linkage where we can find the pc.
371
372 if (last_Java_pc->is_valid()) {
373 st_ptr(last_Java_pc, pc_addr);
374 }
375
376 #ifdef ASSERT
377 // Make sure that we have an odd stack
378 Label StackOk;
379 andcc(last_java_sp, 0x01, G0);
380 br(Assembler::notZero, false, Assembler::pt, StackOk);
381 delayed()->nop();
382 STOP("Stack Not Biased in set_last_Java_frame");
383 bind(StackOk);
384 #endif // ASSERT
385 assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
386 add( last_java_sp, STACK_BIAS, G4_scratch );
387 st_ptr(G4_scratch, G2_thread, JavaThread::last_Java_sp_offset());
388 }
389
390 void MacroAssembler::reset_last_Java_frame(void) {
391 assert_not_delayed();
392
393 Address sp_addr(G2_thread, JavaThread::last_Java_sp_offset());
394 Address pc_addr(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
395 Address flags (G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
396
397 #ifdef ASSERT
398 // check that it WAS previously set
399 save_frame_and_mov(0, Lmethod, Lmethod); // Propagate Lmethod to helper frame for -Xprof
400 ld_ptr(sp_addr, L0);
401 tst(L0);
402 breakpoint_trap(Assembler::zero, Assembler::ptr_cc);
403 restore();
404 #endif // ASSERT
405
406 st_ptr(G0, sp_addr);
407 // Always return last_Java_pc to zero
623 breakpoint_trap(notZero, Assembler::ptr_cc);
624 // }
625 # endif
626
627 st_ptr(oop_result, vm_result_addr);
628 }
629
630
631 void MacroAssembler::ic_call(address entry, bool emit_delay, jint method_index) {
632 RelocationHolder rspec = virtual_call_Relocation::spec(pc(), method_index);
633 patchable_set((intptr_t)Universe::non_oop_word(), G5_inline_cache_reg);
634 relocate(rspec);
635 call(entry, relocInfo::none);
636 if (emit_delay) {
637 delayed()->nop();
638 }
639 }
640
641 void MacroAssembler::card_table_write(jbyte* byte_map_base,
642 Register tmp, Register obj) {
643 srlx(obj, CardTableModRefBS::card_shift, obj);
644 assert(tmp != obj, "need separate temp reg");
645 set((address) byte_map_base, tmp);
646 stb(G0, tmp, obj);
647 }
648
649
650 void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
651 address save_pc;
652 int shiftcnt;
653 # ifdef CHECK_DELAY
654 assert_not_delayed((char*) "cannot put two instructions in delay slot");
655 # endif
656 v9_dep();
657 save_pc = pc();
658
659 int msb32 = (int) (addrlit.value() >> 32);
660 int lsb32 = (int) (addrlit.value());
661
662 if (msb32 == 0 && lsb32 >= 0) {
663 Assembler::sethi(lsb32, d, addrlit.rspec());
664 }
665 else if (msb32 == -1) {
666 Assembler::sethi(~lsb32, d, addrlit.rspec());
667 xor3(d, ~low10(~0), d);
668 }
669 else {
670 Assembler::sethi(msb32, d, addrlit.rspec()); // msb 22-bits
671 if (msb32 & 0x3ff) // Any bits?
672 or3(d, msb32 & 0x3ff, d); // msb 32-bits are now in lsb 32
679 else
680 shiftcnt = 12;
681 if ((lsb32 >> 10) & 0x3ff) {
682 sllx(d, shiftcnt + 10, d); // Make room for last 10 bits
683 or3(d, (lsb32 >> 10) & 0x3ff, d); // Or in next 10
684 shiftcnt = 0;
685 }
686 else
687 shiftcnt = 10;
688 sllx(d, shiftcnt + 10, d); // Shift leaving disp field 0'd
689 }
690 else
691 sllx(d, 32, d);
692 }
693 // Pad out the instruction sequence so it can be patched later.
694 if (ForceRelocatable || (addrlit.rtype() != relocInfo::none &&
695 addrlit.rtype() != relocInfo::runtime_call_type)) {
696 while (pc() < (save_pc + (7 * BytesPerInstWord)))
697 nop();
698 }
699 }
700
701
702 void MacroAssembler::sethi(const AddressLiteral& addrlit, Register d) {
703 internal_sethi(addrlit, d, false);
704 }
705
706
707 void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d) {
708 internal_sethi(addrlit, d, true);
709 }
710
711
712 int MacroAssembler::insts_for_sethi(address a, bool worst_case) {
713 if (worst_case) return 7;
714 intptr_t iaddr = (intptr_t) a;
715 int msb32 = (int) (iaddr >> 32);
716 int lsb32 = (int) (iaddr);
717 int count;
718 if (msb32 == 0 && lsb32 >= 0)
719 count = 1;
720 else if (msb32 == -1)
721 count = 2;
722 else {
723 count = 2;
724 if (msb32 & 0x3ff)
725 count++;
726 if (lsb32 & 0xFFFFFC00 ) {
727 if ((lsb32 >> 20) & 0xfff) count += 2;
728 if ((lsb32 >> 10) & 0x3ff) count += 2;
729 }
730 }
731 return count;
732 }
733
734 int MacroAssembler::worst_case_insts_for_set() {
735 return insts_for_sethi(NULL, true) + 1;
736 }
737
738
739 // Keep in sync with MacroAssembler::insts_for_internal_set
740 void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
741 intptr_t value = addrlit.value();
742
743 if (!ForceRelocatable && addrlit.rspec().type() == relocInfo::none) {
744 // can optimize
745 if (-4096 <= value && value <= 4095) {
746 or3(G0, value, d); // setsw (this leaves upper 32 bits sign-extended)
747 return;
748 }
749 if (inv_hi22(hi22(value)) == value) {
750 sethi(addrlit, d);
751 return;
1441 BREAKPOINT;
1442 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
1443 }
1444 else {
1445 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1446 }
1447 assert(false, "DEBUG MESSAGE: %s", msg);
1448 }
1449
1450
1451 void MacroAssembler::calc_mem_param_words(Register Rparam_words, Register Rresult) {
1452 subcc( Rparam_words, Argument::n_register_parameters, Rresult); // how many mem words?
1453 Label no_extras;
1454 br( negative, true, pt, no_extras ); // if neg, clear reg
1455 delayed()->set(0, Rresult); // annuled, so only if taken
1456 bind( no_extras );
1457 }
1458
1459
1460 void MacroAssembler::calc_frame_size(Register Rextra_words, Register Rresult) {
1461 add(Rextra_words, frame::memory_parameter_word_sp_offset, Rresult);
1462 bclr(1, Rresult);
1463 sll(Rresult, LogBytesPerWord, Rresult); // Rresult has total frame bytes
1464 }
1465
1466
1467 void MacroAssembler::calc_frame_size_and_save(Register Rextra_words, Register Rresult) {
1468 calc_frame_size(Rextra_words, Rresult);
1469 neg(Rresult);
1470 save(SP, Rresult, SP);
1471 }
1472
1473
1474 // ---------------------------------------------------------
1475 Assembler::RCondition cond2rcond(Assembler::Condition c) {
1476 switch (c) {
1477 /*case zero: */
1478 case Assembler::equal: return Assembler::rc_z;
1479 case Assembler::lessEqual: return Assembler::rc_lez;
1480 case Assembler::less: return Assembler::rc_lz;
1481 /*case notZero:*/
1482 case Assembler::notEqual: return Assembler::rc_nz;
1483 case Assembler::greater: return Assembler::rc_gz;
1484 case Assembler::greaterEqual: return Assembler::rc_gez;
1485 }
1486 ShouldNotReachHere();
1487 return Assembler::rc_z;
1488 }
1489
1490 // compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS
1491 void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {
1492 tst(s1);
1493 br (c, a, p, L);
1494 }
1495
1496 // Compares a pointer register with zero and branches on null.
1497 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
1498 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) {
1499 assert_not_delayed();
1500 bpr( rc_z, a, p, s1, L );
1501 }
1502
1503 void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) {
1504 assert_not_delayed();
1505 bpr( rc_nz, a, p, s1, L );
1506 }
1507
1508 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
1509
1510 // Compare integer (32 bit) values (icc only).
1511 void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
1512 Predict p, Label& L) {
1513 assert_not_delayed();
1514 if (use_cbcond(L)) {
1515 Assembler::cbcond(c, icc, s1, s2, L);
1516 } else {
1517 cmp(s1, s2);
1518 br(c, false, p, L);
1519 delayed()->nop();
1520 }
1521 }
1522
1523 // Compare integer (32 bit) values (icc only).
1524 void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
1525 Predict p, Label& L) {
1801 // Rin and Rout are the same and should not be reversed.
1802
1803 sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
1804 srl(Rin_high, Rcount, Rout_high ); // high half
1805 sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
1806 if (Rcount == Rout_low) {
1807 srl(Rin_low, Rcount, Rout_low);
1808 }
1809 ba(done);
1810 delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
1811
1812 // shift >= 32 bits, Ralt_count = Rcount-32
1813 bind(big_shift);
1814
1815 srl(Rin_high, Ralt_count, Rout_low);
1816 clr(Rout_high);
1817
1818 bind( done );
1819 }
1820
1821 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
1822 cmp(Ra, Rb);
1823 mov(-1, Rresult);
1824 movcc(equal, false, xcc, 0, Rresult);
1825 movcc(greater, false, xcc, 1, Rresult);
1826 }
1827
1828
1829 void MacroAssembler::load_sized_value(Address src, Register dst, size_t size_in_bytes, bool is_signed) {
1830 switch (size_in_bytes) {
1831 case 8: ld_long(src, dst); break;
1832 case 4: ld( src, dst); break;
1833 case 2: is_signed ? ldsh(src, dst) : lduh(src, dst); break;
1834 case 1: is_signed ? ldsb(src, dst) : ldub(src, dst); break;
1835 default: ShouldNotReachHere();
1836 }
1837 }
1838
1839 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
1840 switch (size_in_bytes) {
1841 case 8: st_long(src, dst); break;
1842 case 4: st( src, dst); break;
1843 case 2: sth( src, dst); break;
1844 case 1: stb( src, dst); break;
1845 default: ShouldNotReachHere();
1846 }
2605 if (try_bias) {
2606 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
2607 }
2608
2609 // Save Rbox in Rscratch to be used for the cas operation
2610 mov(Rbox, Rscratch);
2611
2612 // set Rmark to markOop | markOopDesc::unlocked_value
2613 or3(Rmark, markOopDesc::unlocked_value, Rmark);
2614
2615 // Initialize the box. (Must happen before we update the object mark!)
2616 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2617
2618 // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
2619 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2620 cas_ptr(mark_addr.base(), Rmark, Rscratch);
2621
2622 // if compare/exchange succeeded we found an unlocked object and we now have locked it
2623 // hence we are done
2624 cmp(Rmark, Rscratch);
2625 sub(Rscratch, STACK_BIAS, Rscratch);
2626 brx(Assembler::equal, false, Assembler::pt, done);
2627 delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot
2628
2629 // we did not find an unlocked object so see if this is a recursive case
2630 // sub(Rscratch, SP, Rscratch);
2631 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2632 andcc(Rscratch, 0xfffff003, Rscratch);
2633 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2634 bind (done);
2635 return ;
2636 }
2637
2638 Label Egress ;
2639
2640 if (EmitSync & 256) {
2641 Label IsInflated ;
2642
2643 ld_ptr(mark_addr, Rmark); // fetch obj->mark
2644 // Triage: biased, stack-locked, neutral, inflated
2645 if (try_bias) {
2651 // Store mark into displaced mark field in the on-stack basic-lock "box"
2652 // Critically, this must happen before the CAS
2653 // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
2654 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2655 andcc(Rmark, 2, G0);
2656 brx(Assembler::notZero, false, Assembler::pn, IsInflated);
2657 delayed()->
2658
2659 // Try stack-lock acquisition.
2660 // Beware: the 1st instruction is in a delay slot
2661 mov(Rbox, Rscratch);
2662 or3(Rmark, markOopDesc::unlocked_value, Rmark);
2663 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2664 cas_ptr(mark_addr.base(), Rmark, Rscratch);
2665 cmp(Rmark, Rscratch);
2666 brx(Assembler::equal, false, Assembler::pt, done);
2667 delayed()->sub(Rscratch, SP, Rscratch);
2668
2669 // Stack-lock attempt failed - check for recursive stack-lock.
2670 // See the comments below about how we might remove this case.
2671 sub(Rscratch, STACK_BIAS, Rscratch);
2672 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2673 andcc(Rscratch, 0xfffff003, Rscratch);
2674 br(Assembler::always, false, Assembler::pt, done);
2675 delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2676
2677 bind(IsInflated);
2678 if (EmitSync & 64) {
2679 // If m->owner != null goto IsLocked
2680 // Pessimistic form: Test-and-CAS vs CAS
2681 // The optimistic form avoids RTS->RTO cache line upgrades.
2682 ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
2683 andcc(Rscratch, Rscratch, G0);
2684 brx(Assembler::notZero, false, Assembler::pn, done);
2685 delayed()->nop();
2686 // m->owner == null : it's unlocked.
2687 }
2688
2689 // Try to CAS m->owner from null to Self
2690 // Invariant: if we acquire the lock then _recursions should be 0.
2691 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
2733 }
2734 ba(done);
2735 delayed()->st_ptr(Rbox, mark_addr);
2736
2737 bind(Recursive);
2738 // Stack-lock attempt failed - check for recursive stack-lock.
2739 // Tests show that we can remove the recursive case with no impact
2740 // on refworkload 0.83. If we need to reduce the size of the code
2741 // emitted by compiler_lock_object() the recursive case is perfect
2742 // candidate.
2743 //
2744 // A more extreme idea is to always inflate on stack-lock recursion.
2745 // This lets us eliminate the recursive checks in compiler_lock_object
2746 // and compiler_unlock_object and the (box->dhw == 0) encoding.
2747 // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
2748 // and showed a performance *increase*. In the same experiment I eliminated
2749 // the fast-path stack-lock code from the interpreter and always passed
2750 // control to the "slow" operators in synchronizer.cpp.
2751
2752 // RScratch contains the fetched obj->mark value from the failed CAS.
2753 sub(Rscratch, STACK_BIAS, Rscratch);
2754 sub(Rscratch, SP, Rscratch);
2755 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2756 andcc(Rscratch, 0xfffff003, Rscratch);
2757 if (counters != NULL) {
2758 // Accounting needs the Rscratch register
2759 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2760 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
2761 ba_short(done);
2762 } else {
2763 ba(done);
2764 delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2765 }
2766
2767 bind (IsInflated);
2768
2769 // Try to CAS m->owner from null to Self
2770 // Invariant: if we acquire the lock then _recursions should be 0.
2771 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
2772 mov(G2_thread, Rscratch);
2773 cas_ptr(Rmark, G0, Rscratch);
3651 delayed()->mov(pre_val->after_save(), O0);
3652 restore();
3653 }
3654
3655 bind(filtered);
3656 }
3657
3658 static address dirty_card_log_enqueue = 0;
3659 static u_char* dirty_card_log_enqueue_end = 0;
3660
3661 // This gets to assume that o0 contains the object address.
3662 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
3663 BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
3664 CodeBuffer buf(bb);
3665 MacroAssembler masm(&buf);
3666 #define __ masm.
3667 address start = __ pc();
3668
3669 Label not_already_dirty, restart, refill, young_card;
3670
3671 __ srlx(O0, CardTableModRefBS::card_shift, O0);
3672 AddressLiteral addrlit(byte_map_base);
3673 __ set(addrlit, O1); // O1 := <card table base>
3674 __ ldub(O0, O1, O2); // O2 := [O0 + O1]
3675
3676 __ cmp_and_br_short(O2, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
3677
3678 __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
3679 __ ldub(O0, O1, O2); // O2 := [O0 + O1]
3680
3681 assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
3682 __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
3683
3684 __ bind(young_card);
3685 // We didn't take the branch, so we're already dirty: return.
3686 // Use return-from-leaf
3687 __ retl();
3688 __ delayed()->nop();
3689
3690 // Not dirty.
3691 __ bind(not_already_dirty);
3753 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
3754 if (dirty_card_log_enqueue == 0) {
3755 generate_dirty_card_log_enqueue(byte_map_base);
3756 assert(dirty_card_log_enqueue != 0, "postcondition.");
3757 }
3758 }
3759
3760
3761 void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
3762
3763 Label filtered;
3764 MacroAssembler* post_filter_masm = this;
3765
3766 if (new_val == G0) return;
3767
3768 G1SATBCardTableLoggingModRefBS* bs =
3769 barrier_set_cast<G1SATBCardTableLoggingModRefBS>(Universe::heap()->barrier_set());
3770
3771 if (G1RSBarrierRegionFilter) {
3772 xor3(store_addr, new_val, tmp);
3773 srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
3774
3775 // XXX Should I predict this taken or not? Does it matter?
3776 cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
3777 }
3778
3779 // If the "store_addr" register is an "in" or "local" register, move it to
3780 // a scratch reg so we can pass it as an argument.
3781 bool use_scr = !(store_addr->is_global() || store_addr->is_out());
3782 // Pick a scratch register different from "tmp".
3783 Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
3784 // Make sure we use up the delay slot!
3785 if (use_scr) {
3786 post_filter_masm->mov(store_addr, scr);
3787 } else {
3788 post_filter_masm->nop();
3789 }
3790 generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
3791 save_frame(0);
3792 call(dirty_card_log_enqueue);
3793 if (use_scr) {
|