< prev index next >

src/cpu/sparc/vm/macroAssembler_sparc.cpp

Print this page


   1 /*
   2  * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


 279   mov(L5, G4);
 280   restore(O0, 0, G2_thread);
 281 }
 282 
 283 static Thread* verify_thread_subroutine(Thread* gthread_value) {
 284   Thread* correct_value = Thread::current();
 285   guarantee(gthread_value == correct_value, "G2_thread value must be the thread");
 286   return correct_value;
 287 }
 288 
 289 void MacroAssembler::verify_thread() {
 290   if (VerifyThread) {
 291     // NOTE: this chops off the heads of the 64-bit O registers.
 292     // make sure G2_thread contains the right value
 293     save_frame_and_mov(0, Lmethod, Lmethod);   // to avoid clobbering O0 (and propagate Lmethod for -Xprof)
 294     mov(G1, L1);                // avoid clobbering G1
 295     // G2 saved below
 296     mov(G3, L3);                // avoid clobbering G3
 297     mov(G4, L4);                // avoid clobbering G4
 298     mov(G5_method, L5);         // avoid clobbering G5_method
 299 #if defined(COMPILER2) && !defined(_LP64)
 300     // Save & restore possible 64-bit Long arguments in G-regs
 301     srlx(G1,32,L0);
 302     srlx(G4,32,L6);
 303 #endif
 304     call(CAST_FROM_FN_PTR(address,verify_thread_subroutine), relocInfo::runtime_call_type);
 305     delayed()->mov(G2_thread, O0);
 306 
 307     mov(L1, G1);                // Restore G1
 308     // G2 restored below
 309     mov(L3, G3);                // restore G3
 310     mov(L4, G4);                // restore G4
 311     mov(L5, G5_method);         // restore G5_method
 312 #if defined(COMPILER2) && !defined(_LP64)
 313     // Save & restore possible 64-bit Long arguments in G-regs
 314     sllx(L0,32,G2);             // Move old high G1 bits high in G2
 315     srl(G1, 0,G1);              // Clear current high G1 bits
 316     or3 (G1,G2,G1);             // Recover 64-bit G1
 317     sllx(L6,32,G2);             // Move old high G4 bits high in G2
 318     srl(G4, 0,G4);              // Clear current high G4 bits
 319     or3 (G4,G2,G4);             // Recover 64-bit G4
 320 #endif
 321     restore(O0, 0, G2_thread);
 322   }
 323 }
 324 
 325 
 326 void MacroAssembler::save_thread(const Register thread_cache) {
 327   verify_thread();
 328   if (thread_cache->is_valid()) {
 329     assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile");
 330     mov(G2_thread, thread_cache);
 331   }
 332   if (VerifyThread) {
 333     // smash G2_thread, as if the VM were about to anyway
 334     set(0x67676767, G2_thread);
 335   }
 336 }
 337 
 338 
 339 void MacroAssembler::restore_thread(const Register thread_cache) {
 340   if (thread_cache->is_valid()) {


 370 
 371   // Verify that flags was zeroed on return to Java
 372   Label FlagsOk;
 373   ld(flags, L0);
 374   tst(L0);
 375   br(Assembler::zero, false, Assembler::pt, FlagsOk);
 376   delayed() -> restore();
 377   STOP("flags not zeroed before leaving Java");
 378   bind(FlagsOk);
 379 #endif /* ASSERT */
 380   //
 381   // When returning from calling out from Java mode the frame anchor's last_Java_pc
 382   // will always be set to NULL. It is set here so that if we are doing a call to
 383   // native (not VM) that we capture the known pc and don't have to rely on the
 384   // native call having a standard frame linkage where we can find the pc.
 385 
 386   if (last_Java_pc->is_valid()) {
 387     st_ptr(last_Java_pc, pc_addr);
 388   }
 389 
 390 #ifdef _LP64
 391 #ifdef ASSERT
 392   // Make sure that we have an odd stack
 393   Label StackOk;
 394   andcc(last_java_sp, 0x01, G0);
 395   br(Assembler::notZero, false, Assembler::pt, StackOk);
 396   delayed()->nop();
 397   STOP("Stack Not Biased in set_last_Java_frame");
 398   bind(StackOk);
 399 #endif // ASSERT
 400   assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
 401   add( last_java_sp, STACK_BIAS, G4_scratch );
 402   st_ptr(G4_scratch, G2_thread, JavaThread::last_Java_sp_offset());
 403 #else
 404   st_ptr(last_java_sp, G2_thread, JavaThread::last_Java_sp_offset());
 405 #endif // _LP64
 406 }
 407 
 408 void MacroAssembler::reset_last_Java_frame(void) {
 409   assert_not_delayed();
 410 
 411   Address sp_addr(G2_thread, JavaThread::last_Java_sp_offset());
 412   Address pc_addr(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
 413   Address flags  (G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
 414 
 415 #ifdef ASSERT
 416   // check that it WAS previously set
 417     save_frame_and_mov(0, Lmethod, Lmethod);     // Propagate Lmethod to helper frame for -Xprof
 418     ld_ptr(sp_addr, L0);
 419     tst(L0);
 420     breakpoint_trap(Assembler::zero, Assembler::ptr_cc);
 421     restore();
 422 #endif // ASSERT
 423 
 424   st_ptr(G0, sp_addr);
 425   // Always return last_Java_pc to zero


 641     breakpoint_trap(notZero, Assembler::ptr_cc);
 642     // }
 643 # endif
 644 
 645   st_ptr(oop_result, vm_result_addr);
 646 }
 647 
 648 
 649 void MacroAssembler::ic_call(address entry, bool emit_delay, jint method_index) {
 650   RelocationHolder rspec = virtual_call_Relocation::spec(pc(), method_index);
 651   patchable_set((intptr_t)Universe::non_oop_word(), G5_inline_cache_reg);
 652   relocate(rspec);
 653   call(entry, relocInfo::none);
 654   if (emit_delay) {
 655     delayed()->nop();
 656   }
 657 }
 658 
 659 void MacroAssembler::card_table_write(jbyte* byte_map_base,
 660                                       Register tmp, Register obj) {
 661 #ifdef _LP64
 662   srlx(obj, CardTableModRefBS::card_shift, obj);
 663 #else
 664   srl(obj, CardTableModRefBS::card_shift, obj);
 665 #endif
 666   assert(tmp != obj, "need separate temp reg");
 667   set((address) byte_map_base, tmp);
 668   stb(G0, tmp, obj);
 669 }
 670 
 671 
 672 void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
 673   address save_pc;
 674   int shiftcnt;
 675 #ifdef _LP64
 676 # ifdef CHECK_DELAY
 677   assert_not_delayed((char*) "cannot put two instructions in delay slot");
 678 # endif
 679   v9_dep();
 680   save_pc = pc();
 681 
 682   int msb32 = (int) (addrlit.value() >> 32);
 683   int lsb32 = (int) (addrlit.value());
 684 
 685   if (msb32 == 0 && lsb32 >= 0) {
 686     Assembler::sethi(lsb32, d, addrlit.rspec());
 687   }
 688   else if (msb32 == -1) {
 689     Assembler::sethi(~lsb32, d, addrlit.rspec());
 690     xor3(d, ~low10(~0), d);
 691   }
 692   else {
 693     Assembler::sethi(msb32, d, addrlit.rspec());  // msb 22-bits
 694     if (msb32 & 0x3ff)                            // Any bits?
 695       or3(d, msb32 & 0x3ff, d);                   // msb 32-bits are now in lsb 32


 702       else
 703         shiftcnt = 12;
 704       if ((lsb32 >> 10) & 0x3ff) {
 705         sllx(d, shiftcnt + 10, d);                // Make room for last 10 bits
 706         or3(d, (lsb32 >> 10) & 0x3ff, d);         // Or in next 10
 707         shiftcnt = 0;
 708       }
 709       else
 710         shiftcnt = 10;
 711       sllx(d, shiftcnt + 10, d);                  // Shift leaving disp field 0'd
 712     }
 713     else
 714       sllx(d, 32, d);
 715   }
 716   // Pad out the instruction sequence so it can be patched later.
 717   if (ForceRelocatable || (addrlit.rtype() != relocInfo::none &&
 718                            addrlit.rtype() != relocInfo::runtime_call_type)) {
 719     while (pc() < (save_pc + (7 * BytesPerInstWord)))
 720       nop();
 721   }
 722 #else
 723   Assembler::sethi(addrlit.value(), d, addrlit.rspec());
 724 #endif
 725 }
 726 
 727 
 728 void MacroAssembler::sethi(const AddressLiteral& addrlit, Register d) {
 729   internal_sethi(addrlit, d, false);
 730 }
 731 
 732 
 733 void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d) {
 734   internal_sethi(addrlit, d, true);
 735 }
 736 
 737 
 738 int MacroAssembler::insts_for_sethi(address a, bool worst_case) {
 739 #ifdef _LP64
 740   if (worst_case)  return 7;
 741   intptr_t iaddr = (intptr_t) a;
 742   int msb32 = (int) (iaddr >> 32);
 743   int lsb32 = (int) (iaddr);
 744   int count;
 745   if (msb32 == 0 && lsb32 >= 0)
 746     count = 1;
 747   else if (msb32 == -1)
 748     count = 2;
 749   else {
 750     count = 2;
 751     if (msb32 & 0x3ff)
 752       count++;
 753     if (lsb32 & 0xFFFFFC00 ) {
 754       if ((lsb32 >> 20) & 0xfff)  count += 2;
 755       if ((lsb32 >> 10) & 0x3ff)  count += 2;
 756     }
 757   }
 758   return count;
 759 #else
 760   return 1;
 761 #endif
 762 }
 763 
 764 int MacroAssembler::worst_case_insts_for_set() {
 765   return insts_for_sethi(NULL, true) + 1;
 766 }
 767 
 768 
 769 // Keep in sync with MacroAssembler::insts_for_internal_set
 770 void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
 771   intptr_t value = addrlit.value();
 772 
 773   if (!ForceRelocatable && addrlit.rspec().type() == relocInfo::none) {
 774     // can optimize
 775     if (-4096 <= value && value <= 4095) {
 776       or3(G0, value, d); // setsw (this leaves upper 32 bits sign-extended)
 777       return;
 778     }
 779     if (inv_hi22(hi22(value)) == value) {
 780       sethi(addrlit, d);
 781       return;


1471     BREAKPOINT;
1472       ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
1473   }
1474   else {
1475      ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1476   }
1477   assert(false, "DEBUG MESSAGE: %s", msg);
1478 }
1479 
1480 
1481 void MacroAssembler::calc_mem_param_words(Register Rparam_words, Register Rresult) {
1482   subcc( Rparam_words, Argument::n_register_parameters, Rresult); // how many mem words?
1483   Label no_extras;
1484   br( negative, true, pt, no_extras ); // if neg, clear reg
1485   delayed()->set(0, Rresult);          // annuled, so only if taken
1486   bind( no_extras );
1487 }
1488 
1489 
1490 void MacroAssembler::calc_frame_size(Register Rextra_words, Register Rresult) {
1491 #ifdef _LP64
1492   add(Rextra_words, frame::memory_parameter_word_sp_offset, Rresult);
1493 #else
1494   add(Rextra_words, frame::memory_parameter_word_sp_offset + 1, Rresult);
1495 #endif
1496   bclr(1, Rresult);
1497   sll(Rresult, LogBytesPerWord, Rresult);  // Rresult has total frame bytes
1498 }
1499 
1500 
1501 void MacroAssembler::calc_frame_size_and_save(Register Rextra_words, Register Rresult) {
1502   calc_frame_size(Rextra_words, Rresult);
1503   neg(Rresult);
1504   save(SP, Rresult, SP);
1505 }
1506 
1507 
1508 // ---------------------------------------------------------
1509 Assembler::RCondition cond2rcond(Assembler::Condition c) {
1510   switch (c) {
1511     /*case zero: */
1512     case Assembler::equal:        return Assembler::rc_z;
1513     case Assembler::lessEqual:    return Assembler::rc_lez;
1514     case Assembler::less:         return Assembler::rc_lz;
1515     /*case notZero:*/
1516     case Assembler::notEqual:     return Assembler::rc_nz;
1517     case Assembler::greater:      return Assembler::rc_gz;
1518     case Assembler::greaterEqual: return Assembler::rc_gez;
1519   }
1520   ShouldNotReachHere();
1521   return Assembler::rc_z;
1522 }
1523 
1524 // compares (32 bit) register with zero and branches.  NOT FOR USE WITH 64-bit POINTERS
1525 void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {
1526   tst(s1);
1527   br (c, a, p, L);
1528 }
1529 
1530 // Compares a pointer register with zero and branches on null.
1531 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
1532 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) {
1533   assert_not_delayed();
1534 #ifdef _LP64
1535   bpr( rc_z, a, p, s1, L );
1536 #else
1537   tst(s1);
1538   br ( zero, a, p, L );
1539 #endif
1540 }
1541 
1542 void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) {
1543   assert_not_delayed();
1544 #ifdef _LP64
1545   bpr( rc_nz, a, p, s1, L );
1546 #else
1547   tst(s1);
1548   br ( notZero, a, p, L );
1549 #endif
1550 }
1551 
1552 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
1553 
1554 // Compare integer (32 bit) values (icc only).
1555 void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
1556                                       Predict p, Label& L) {
1557   assert_not_delayed();
1558   if (use_cbcond(L)) {
1559     Assembler::cbcond(c, icc, s1, s2, L);
1560   } else {
1561     cmp(s1, s2);
1562     br(c, false, p, L);
1563     delayed()->nop();
1564   }
1565 }
1566 
1567 // Compare integer (32 bit) values (icc only).
1568 void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
1569                                       Predict p, Label& L) {


1845   // Rin and Rout are the same and should not be reversed.
1846 
1847   sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
1848   srl(Rin_high,     Rcount, Rout_high ); // high half
1849   sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
1850   if (Rcount == Rout_low) {
1851     srl(Rin_low, Rcount, Rout_low);
1852   }
1853   ba(done);
1854   delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
1855 
1856   // shift >= 32 bits, Ralt_count = Rcount-32
1857   bind(big_shift);
1858 
1859   srl(Rin_high, Ralt_count, Rout_low);
1860   clr(Rout_high);
1861 
1862   bind( done );
1863 }
1864 
1865 #ifdef _LP64
1866 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
1867   cmp(Ra, Rb);
1868   mov(-1, Rresult);
1869   movcc(equal,   false, xcc,  0, Rresult);
1870   movcc(greater, false, xcc,  1, Rresult);
1871 }
1872 #endif
1873 
1874 
1875 void MacroAssembler::load_sized_value(Address src, Register dst, size_t size_in_bytes, bool is_signed) {
1876   switch (size_in_bytes) {
1877   case  8:  ld_long(src, dst); break;
1878   case  4:  ld(     src, dst); break;
1879   case  2:  is_signed ? ldsh(src, dst) : lduh(src, dst); break;
1880   case  1:  is_signed ? ldsb(src, dst) : ldub(src, dst); break;
1881   default:  ShouldNotReachHere();
1882   }
1883 }
1884 
1885 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
1886   switch (size_in_bytes) {
1887   case  8:  st_long(src, dst); break;
1888   case  4:  st(     src, dst); break;
1889   case  2:  sth(    src, dst); break;
1890   case  1:  stb(    src, dst); break;
1891   default:  ShouldNotReachHere();
1892   }


2651      if (try_bias) {
2652         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
2653      }
2654 
2655      // Save Rbox in Rscratch to be used for the cas operation
2656      mov(Rbox, Rscratch);
2657 
2658      // set Rmark to markOop | markOopDesc::unlocked_value
2659      or3(Rmark, markOopDesc::unlocked_value, Rmark);
2660 
2661      // Initialize the box.  (Must happen before we update the object mark!)
2662      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2663 
2664      // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
2665      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2666      cas_ptr(mark_addr.base(), Rmark, Rscratch);
2667 
2668      // if compare/exchange succeeded we found an unlocked object and we now have locked it
2669      // hence we are done
2670      cmp(Rmark, Rscratch);
2671 #ifdef _LP64
2672      sub(Rscratch, STACK_BIAS, Rscratch);
2673 #endif
2674      brx(Assembler::equal, false, Assembler::pt, done);
2675      delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
2676 
2677      // we did not find an unlocked object so see if this is a recursive case
2678      // sub(Rscratch, SP, Rscratch);
2679      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2680      andcc(Rscratch, 0xfffff003, Rscratch);
2681      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2682      bind (done);
2683      return ;
2684    }
2685 
2686    Label Egress ;
2687 
2688    if (EmitSync & 256) {
2689       Label IsInflated ;
2690 
2691       ld_ptr(mark_addr, Rmark);           // fetch obj->mark
2692       // Triage: biased, stack-locked, neutral, inflated
2693       if (try_bias) {


2699       // Store mark into displaced mark field in the on-stack basic-lock "box"
2700       // Critically, this must happen before the CAS
2701       // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
2702       st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2703       andcc(Rmark, 2, G0);
2704       brx(Assembler::notZero, false, Assembler::pn, IsInflated);
2705       delayed()->
2706 
2707       // Try stack-lock acquisition.
2708       // Beware: the 1st instruction is in a delay slot
2709       mov(Rbox,  Rscratch);
2710       or3(Rmark, markOopDesc::unlocked_value, Rmark);
2711       assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2712       cas_ptr(mark_addr.base(), Rmark, Rscratch);
2713       cmp(Rmark, Rscratch);
2714       brx(Assembler::equal, false, Assembler::pt, done);
2715       delayed()->sub(Rscratch, SP, Rscratch);
2716 
2717       // Stack-lock attempt failed - check for recursive stack-lock.
2718       // See the comments below about how we might remove this case.
2719 #ifdef _LP64
2720       sub(Rscratch, STACK_BIAS, Rscratch);
2721 #endif
2722       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2723       andcc(Rscratch, 0xfffff003, Rscratch);
2724       br(Assembler::always, false, Assembler::pt, done);
2725       delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2726 
2727       bind(IsInflated);
2728       if (EmitSync & 64) {
2729          // If m->owner != null goto IsLocked
2730          // Pessimistic form: Test-and-CAS vs CAS
2731          // The optimistic form avoids RTS->RTO cache line upgrades.
2732          ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
2733          andcc(Rscratch, Rscratch, G0);
2734          brx(Assembler::notZero, false, Assembler::pn, done);
2735          delayed()->nop();
2736          // m->owner == null : it's unlocked.
2737       }
2738 
2739       // Try to CAS m->owner from null to Self
2740       // Invariant: if we acquire the lock then _recursions should be 0.
2741       add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);


2783       }
2784       ba(done);
2785       delayed()->st_ptr(Rbox, mark_addr);
2786 
2787       bind(Recursive);
2788       // Stack-lock attempt failed - check for recursive stack-lock.
2789       // Tests show that we can remove the recursive case with no impact
2790       // on refworkload 0.83.  If we need to reduce the size of the code
2791       // emitted by compiler_lock_object() the recursive case is perfect
2792       // candidate.
2793       //
2794       // A more extreme idea is to always inflate on stack-lock recursion.
2795       // This lets us eliminate the recursive checks in compiler_lock_object
2796       // and compiler_unlock_object and the (box->dhw == 0) encoding.
2797       // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
2798       // and showed a performance *increase*.  In the same experiment I eliminated
2799       // the fast-path stack-lock code from the interpreter and always passed
2800       // control to the "slow" operators in synchronizer.cpp.
2801 
2802       // RScratch contains the fetched obj->mark value from the failed CAS.
2803 #ifdef _LP64
2804       sub(Rscratch, STACK_BIAS, Rscratch);
2805 #endif
2806       sub(Rscratch, SP, Rscratch);
2807       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2808       andcc(Rscratch, 0xfffff003, Rscratch);
2809       if (counters != NULL) {
2810         // Accounting needs the Rscratch register
2811         st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2812         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
2813         ba_short(done);
2814       } else {
2815         ba(done);
2816         delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2817       }
2818 
2819       bind   (IsInflated);
2820 
2821       // Try to CAS m->owner from null to Self
2822       // Invariant: if we acquire the lock then _recursions should be 0.
2823       add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
2824       mov(G2_thread, Rscratch);
2825       cas_ptr(Rmark, G0, Rscratch);


3703     delayed()->mov(pre_val->after_save(), O0);
3704     restore();
3705   }
3706 
3707   bind(filtered);
3708 }
3709 
3710 static address dirty_card_log_enqueue = 0;
3711 static u_char* dirty_card_log_enqueue_end = 0;
3712 
3713 // This gets to assume that o0 contains the object address.
3714 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
3715   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
3716   CodeBuffer buf(bb);
3717   MacroAssembler masm(&buf);
3718 #define __ masm.
3719   address start = __ pc();
3720 
3721   Label not_already_dirty, restart, refill, young_card;
3722 
3723 #ifdef _LP64
3724   __ srlx(O0, CardTableModRefBS::card_shift, O0);
3725 #else
3726   __ srl(O0, CardTableModRefBS::card_shift, O0);
3727 #endif
3728   AddressLiteral addrlit(byte_map_base);
3729   __ set(addrlit, O1); // O1 := <card table base>
3730   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
3731 
3732   __ cmp_and_br_short(O2, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
3733 
3734   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
3735   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
3736 
3737   assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
3738   __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
3739 
3740   __ bind(young_card);
3741   // We didn't take the branch, so we're already dirty: return.
3742   // Use return-from-leaf
3743   __ retl();
3744   __ delayed()->nop();
3745 
3746   // Not dirty.
3747   __ bind(not_already_dirty);


3809 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
3810   if (dirty_card_log_enqueue == 0) {
3811     generate_dirty_card_log_enqueue(byte_map_base);
3812     assert(dirty_card_log_enqueue != 0, "postcondition.");
3813   }
3814 }
3815 
3816 
3817 void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
3818 
3819   Label filtered;
3820   MacroAssembler* post_filter_masm = this;
3821 
3822   if (new_val == G0) return;
3823 
3824   G1SATBCardTableLoggingModRefBS* bs =
3825     barrier_set_cast<G1SATBCardTableLoggingModRefBS>(Universe::heap()->barrier_set());
3826 
3827   if (G1RSBarrierRegionFilter) {
3828     xor3(store_addr, new_val, tmp);
3829 #ifdef _LP64
3830     srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
3831 #else
3832     srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
3833 #endif
3834 
3835     // XXX Should I predict this taken or not?  Does it matter?
3836     cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
3837   }
3838 
3839   // If the "store_addr" register is an "in" or "local" register, move it to
3840   // a scratch reg so we can pass it as an argument.
3841   bool use_scr = !(store_addr->is_global() || store_addr->is_out());
3842   // Pick a scratch register different from "tmp".
3843   Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
3844   // Make sure we use up the delay slot!
3845   if (use_scr) {
3846     post_filter_masm->mov(store_addr, scr);
3847   } else {
3848     post_filter_masm->nop();
3849   }
3850   generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
3851   save_frame(0);
3852   call(dirty_card_log_enqueue);
3853   if (use_scr) {


   1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


 279   mov(L5, G4);
 280   restore(O0, 0, G2_thread);
 281 }
 282 
 283 static Thread* verify_thread_subroutine(Thread* gthread_value) {
 284   Thread* correct_value = Thread::current();
 285   guarantee(gthread_value == correct_value, "G2_thread value must be the thread");
 286   return correct_value;
 287 }
 288 
 289 void MacroAssembler::verify_thread() {
 290   if (VerifyThread) {
 291     // NOTE: this chops off the heads of the 64-bit O registers.
 292     // make sure G2_thread contains the right value
 293     save_frame_and_mov(0, Lmethod, Lmethod);   // to avoid clobbering O0 (and propagate Lmethod for -Xprof)
 294     mov(G1, L1);                // avoid clobbering G1
 295     // G2 saved below
 296     mov(G3, L3);                // avoid clobbering G3
 297     mov(G4, L4);                // avoid clobbering G4
 298     mov(G5_method, L5);         // avoid clobbering G5_method





 299     call(CAST_FROM_FN_PTR(address,verify_thread_subroutine), relocInfo::runtime_call_type);
 300     delayed()->mov(G2_thread, O0);
 301 
 302     mov(L1, G1);                // Restore G1
 303     // G2 restored below
 304     mov(L3, G3);                // restore G3
 305     mov(L4, G4);                // restore G4
 306     mov(L5, G5_method);         // restore G5_method









 307     restore(O0, 0, G2_thread);
 308   }
 309 }
 310 
 311 
 312 void MacroAssembler::save_thread(const Register thread_cache) {
 313   verify_thread();
 314   if (thread_cache->is_valid()) {
 315     assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile");
 316     mov(G2_thread, thread_cache);
 317   }
 318   if (VerifyThread) {
 319     // smash G2_thread, as if the VM were about to anyway
 320     set(0x67676767, G2_thread);
 321   }
 322 }
 323 
 324 
 325 void MacroAssembler::restore_thread(const Register thread_cache) {
 326   if (thread_cache->is_valid()) {


 356 
 357   // Verify that flags was zeroed on return to Java
 358   Label FlagsOk;
 359   ld(flags, L0);
 360   tst(L0);
 361   br(Assembler::zero, false, Assembler::pt, FlagsOk);
 362   delayed() -> restore();
 363   STOP("flags not zeroed before leaving Java");
 364   bind(FlagsOk);
 365 #endif /* ASSERT */
 366   //
 367   // When returning from calling out from Java mode the frame anchor's last_Java_pc
 368   // will always be set to NULL. It is set here so that if we are doing a call to
 369   // native (not VM) that we capture the known pc and don't have to rely on the
 370   // native call having a standard frame linkage where we can find the pc.
 371 
 372   if (last_Java_pc->is_valid()) {
 373     st_ptr(last_Java_pc, pc_addr);
 374   }
 375 

 376 #ifdef ASSERT
 377   // Make sure that we have an odd stack
 378   Label StackOk;
 379   andcc(last_java_sp, 0x01, G0);
 380   br(Assembler::notZero, false, Assembler::pt, StackOk);
 381   delayed()->nop();
 382   STOP("Stack Not Biased in set_last_Java_frame");
 383   bind(StackOk);
 384 #endif // ASSERT
 385   assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
 386   add( last_java_sp, STACK_BIAS, G4_scratch );
 387   st_ptr(G4_scratch, G2_thread, JavaThread::last_Java_sp_offset());



 388 }
 389 
 390 void MacroAssembler::reset_last_Java_frame(void) {
 391   assert_not_delayed();
 392 
 393   Address sp_addr(G2_thread, JavaThread::last_Java_sp_offset());
 394   Address pc_addr(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
 395   Address flags  (G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
 396 
 397 #ifdef ASSERT
 398   // check that it WAS previously set
 399     save_frame_and_mov(0, Lmethod, Lmethod);     // Propagate Lmethod to helper frame for -Xprof
 400     ld_ptr(sp_addr, L0);
 401     tst(L0);
 402     breakpoint_trap(Assembler::zero, Assembler::ptr_cc);
 403     restore();
 404 #endif // ASSERT
 405 
 406   st_ptr(G0, sp_addr);
 407   // Always return last_Java_pc to zero


 623     breakpoint_trap(notZero, Assembler::ptr_cc);
 624     // }
 625 # endif
 626 
 627   st_ptr(oop_result, vm_result_addr);
 628 }
 629 
 630 
 631 void MacroAssembler::ic_call(address entry, bool emit_delay, jint method_index) {
 632   RelocationHolder rspec = virtual_call_Relocation::spec(pc(), method_index);
 633   patchable_set((intptr_t)Universe::non_oop_word(), G5_inline_cache_reg);
 634   relocate(rspec);
 635   call(entry, relocInfo::none);
 636   if (emit_delay) {
 637     delayed()->nop();
 638   }
 639 }
 640 
 641 void MacroAssembler::card_table_write(jbyte* byte_map_base,
 642                                       Register tmp, Register obj) {

 643   srlx(obj, CardTableModRefBS::card_shift, obj);



 644   assert(tmp != obj, "need separate temp reg");
 645   set((address) byte_map_base, tmp);
 646   stb(G0, tmp, obj);
 647 }
 648 
 649 
 650 void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
 651   address save_pc;
 652   int shiftcnt;

 653 # ifdef CHECK_DELAY
 654   assert_not_delayed((char*) "cannot put two instructions in delay slot");
 655 # endif
 656   v9_dep();
 657   save_pc = pc();
 658 
 659   int msb32 = (int) (addrlit.value() >> 32);
 660   int lsb32 = (int) (addrlit.value());
 661 
 662   if (msb32 == 0 && lsb32 >= 0) {
 663     Assembler::sethi(lsb32, d, addrlit.rspec());
 664   }
 665   else if (msb32 == -1) {
 666     Assembler::sethi(~lsb32, d, addrlit.rspec());
 667     xor3(d, ~low10(~0), d);
 668   }
 669   else {
 670     Assembler::sethi(msb32, d, addrlit.rspec());  // msb 22-bits
 671     if (msb32 & 0x3ff)                            // Any bits?
 672       or3(d, msb32 & 0x3ff, d);                   // msb 32-bits are now in lsb 32


 679       else
 680         shiftcnt = 12;
 681       if ((lsb32 >> 10) & 0x3ff) {
 682         sllx(d, shiftcnt + 10, d);                // Make room for last 10 bits
 683         or3(d, (lsb32 >> 10) & 0x3ff, d);         // Or in next 10
 684         shiftcnt = 0;
 685       }
 686       else
 687         shiftcnt = 10;
 688       sllx(d, shiftcnt + 10, d);                  // Shift leaving disp field 0'd
 689     }
 690     else
 691       sllx(d, 32, d);
 692   }
 693   // Pad out the instruction sequence so it can be patched later.
 694   if (ForceRelocatable || (addrlit.rtype() != relocInfo::none &&
 695                            addrlit.rtype() != relocInfo::runtime_call_type)) {
 696     while (pc() < (save_pc + (7 * BytesPerInstWord)))
 697       nop();
 698   }



 699 }
 700 
 701 
 702 void MacroAssembler::sethi(const AddressLiteral& addrlit, Register d) {
 703   internal_sethi(addrlit, d, false);
 704 }
 705 
 706 
 707 void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d) {
 708   internal_sethi(addrlit, d, true);
 709 }
 710 
 711 
 712 int MacroAssembler::insts_for_sethi(address a, bool worst_case) {

 713   if (worst_case)  return 7;
 714   intptr_t iaddr = (intptr_t) a;
 715   int msb32 = (int) (iaddr >> 32);
 716   int lsb32 = (int) (iaddr);
 717   int count;
 718   if (msb32 == 0 && lsb32 >= 0)
 719     count = 1;
 720   else if (msb32 == -1)
 721     count = 2;
 722   else {
 723     count = 2;
 724     if (msb32 & 0x3ff)
 725       count++;
 726     if (lsb32 & 0xFFFFFC00 ) {
 727       if ((lsb32 >> 20) & 0xfff)  count += 2;
 728       if ((lsb32 >> 10) & 0x3ff)  count += 2;
 729     }
 730   }
 731   return count;



 732 }
 733 
 734 int MacroAssembler::worst_case_insts_for_set() {
 735   return insts_for_sethi(NULL, true) + 1;
 736 }
 737 
 738 
 739 // Keep in sync with MacroAssembler::insts_for_internal_set
 740 void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
 741   intptr_t value = addrlit.value();
 742 
 743   if (!ForceRelocatable && addrlit.rspec().type() == relocInfo::none) {
 744     // can optimize
 745     if (-4096 <= value && value <= 4095) {
 746       or3(G0, value, d); // setsw (this leaves upper 32 bits sign-extended)
 747       return;
 748     }
 749     if (inv_hi22(hi22(value)) == value) {
 750       sethi(addrlit, d);
 751       return;


1441     BREAKPOINT;
1442       ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
1443   }
1444   else {
1445      ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1446   }
1447   assert(false, "DEBUG MESSAGE: %s", msg);
1448 }
1449 
1450 
1451 void MacroAssembler::calc_mem_param_words(Register Rparam_words, Register Rresult) {
1452   subcc( Rparam_words, Argument::n_register_parameters, Rresult); // how many mem words?
1453   Label no_extras;
1454   br( negative, true, pt, no_extras ); // if neg, clear reg
1455   delayed()->set(0, Rresult);          // annuled, so only if taken
1456   bind( no_extras );
1457 }
1458 
1459 
1460 void MacroAssembler::calc_frame_size(Register Rextra_words, Register Rresult) {

1461   add(Rextra_words, frame::memory_parameter_word_sp_offset, Rresult);



1462   bclr(1, Rresult);
1463   sll(Rresult, LogBytesPerWord, Rresult);  // Rresult has total frame bytes
1464 }
1465 
1466 
1467 void MacroAssembler::calc_frame_size_and_save(Register Rextra_words, Register Rresult) {
1468   calc_frame_size(Rextra_words, Rresult);
1469   neg(Rresult);
1470   save(SP, Rresult, SP);
1471 }
1472 
1473 
1474 // ---------------------------------------------------------
1475 Assembler::RCondition cond2rcond(Assembler::Condition c) {
1476   switch (c) {
1477     /*case zero: */
1478     case Assembler::equal:        return Assembler::rc_z;
1479     case Assembler::lessEqual:    return Assembler::rc_lez;
1480     case Assembler::less:         return Assembler::rc_lz;
1481     /*case notZero:*/
1482     case Assembler::notEqual:     return Assembler::rc_nz;
1483     case Assembler::greater:      return Assembler::rc_gz;
1484     case Assembler::greaterEqual: return Assembler::rc_gez;
1485   }
1486   ShouldNotReachHere();
1487   return Assembler::rc_z;
1488 }
1489 
1490 // compares (32 bit) register with zero and branches.  NOT FOR USE WITH 64-bit POINTERS
1491 void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {
1492   tst(s1);
1493   br (c, a, p, L);
1494 }
1495 
1496 // Compares a pointer register with zero and branches on null.
1497 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
1498 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) {
1499   assert_not_delayed();

1500   bpr( rc_z, a, p, s1, L );




1501 }
1502 
1503 void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) {
1504   assert_not_delayed();

1505   bpr( rc_nz, a, p, s1, L );




1506 }
1507 
1508 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
1509 
1510 // Compare integer (32 bit) values (icc only).
1511 void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
1512                                       Predict p, Label& L) {
1513   assert_not_delayed();
1514   if (use_cbcond(L)) {
1515     Assembler::cbcond(c, icc, s1, s2, L);
1516   } else {
1517     cmp(s1, s2);
1518     br(c, false, p, L);
1519     delayed()->nop();
1520   }
1521 }
1522 
1523 // Compare integer (32 bit) values (icc only).
1524 void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
1525                                       Predict p, Label& L) {


1801   // Rin and Rout are the same and should not be reversed.
1802 
1803   sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
1804   srl(Rin_high,     Rcount, Rout_high ); // high half
1805   sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
1806   if (Rcount == Rout_low) {
1807     srl(Rin_low, Rcount, Rout_low);
1808   }
1809   ba(done);
1810   delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
1811 
1812   // shift >= 32 bits, Ralt_count = Rcount-32
1813   bind(big_shift);
1814 
1815   srl(Rin_high, Ralt_count, Rout_low);
1816   clr(Rout_high);
1817 
1818   bind( done );
1819 }
1820 

1821 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
1822   cmp(Ra, Rb);
1823   mov(-1, Rresult);
1824   movcc(equal,   false, xcc,  0, Rresult);
1825   movcc(greater, false, xcc,  1, Rresult);
1826 }

1827 
1828 
1829 void MacroAssembler::load_sized_value(Address src, Register dst, size_t size_in_bytes, bool is_signed) {
1830   switch (size_in_bytes) {
1831   case  8:  ld_long(src, dst); break;
1832   case  4:  ld(     src, dst); break;
1833   case  2:  is_signed ? ldsh(src, dst) : lduh(src, dst); break;
1834   case  1:  is_signed ? ldsb(src, dst) : ldub(src, dst); break;
1835   default:  ShouldNotReachHere();
1836   }
1837 }
1838 
1839 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
1840   switch (size_in_bytes) {
1841   case  8:  st_long(src, dst); break;
1842   case  4:  st(     src, dst); break;
1843   case  2:  sth(    src, dst); break;
1844   case  1:  stb(    src, dst); break;
1845   default:  ShouldNotReachHere();
1846   }


2605      if (try_bias) {
2606         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
2607      }
2608 
2609      // Save Rbox in Rscratch to be used for the cas operation
2610      mov(Rbox, Rscratch);
2611 
2612      // set Rmark to markOop | markOopDesc::unlocked_value
2613      or3(Rmark, markOopDesc::unlocked_value, Rmark);
2614 
2615      // Initialize the box.  (Must happen before we update the object mark!)
2616      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2617 
2618      // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
2619      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2620      cas_ptr(mark_addr.base(), Rmark, Rscratch);
2621 
2622      // if compare/exchange succeeded we found an unlocked object and we now have locked it
2623      // hence we are done
2624      cmp(Rmark, Rscratch);

2625      sub(Rscratch, STACK_BIAS, Rscratch);

2626      brx(Assembler::equal, false, Assembler::pt, done);
2627      delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
2628 
2629      // we did not find an unlocked object so see if this is a recursive case
2630      // sub(Rscratch, SP, Rscratch);
2631      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2632      andcc(Rscratch, 0xfffff003, Rscratch);
2633      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2634      bind (done);
2635      return ;
2636    }
2637 
2638    Label Egress ;
2639 
2640    if (EmitSync & 256) {
2641       Label IsInflated ;
2642 
2643       ld_ptr(mark_addr, Rmark);           // fetch obj->mark
2644       // Triage: biased, stack-locked, neutral, inflated
2645       if (try_bias) {


2651       // Store mark into displaced mark field in the on-stack basic-lock "box"
2652       // Critically, this must happen before the CAS
2653       // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
2654       st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2655       andcc(Rmark, 2, G0);
2656       brx(Assembler::notZero, false, Assembler::pn, IsInflated);
2657       delayed()->
2658 
2659       // Try stack-lock acquisition.
2660       // Beware: the 1st instruction is in a delay slot
2661       mov(Rbox,  Rscratch);
2662       or3(Rmark, markOopDesc::unlocked_value, Rmark);
2663       assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2664       cas_ptr(mark_addr.base(), Rmark, Rscratch);
2665       cmp(Rmark, Rscratch);
2666       brx(Assembler::equal, false, Assembler::pt, done);
2667       delayed()->sub(Rscratch, SP, Rscratch);
2668 
2669       // Stack-lock attempt failed - check for recursive stack-lock.
2670       // See the comments below about how we might remove this case.

2671       sub(Rscratch, STACK_BIAS, Rscratch);

2672       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2673       andcc(Rscratch, 0xfffff003, Rscratch);
2674       br(Assembler::always, false, Assembler::pt, done);
2675       delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2676 
2677       bind(IsInflated);
2678       if (EmitSync & 64) {
2679          // If m->owner != null goto IsLocked
2680          // Pessimistic form: Test-and-CAS vs CAS
2681          // The optimistic form avoids RTS->RTO cache line upgrades.
2682          ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
2683          andcc(Rscratch, Rscratch, G0);
2684          brx(Assembler::notZero, false, Assembler::pn, done);
2685          delayed()->nop();
2686          // m->owner == null : it's unlocked.
2687       }
2688 
2689       // Try to CAS m->owner from null to Self
2690       // Invariant: if we acquire the lock then _recursions should be 0.
2691       add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);


2733       }
2734       ba(done);
2735       delayed()->st_ptr(Rbox, mark_addr);
2736 
2737       bind(Recursive);
2738       // Stack-lock attempt failed - check for recursive stack-lock.
2739       // Tests show that we can remove the recursive case with no impact
2740       // on refworkload 0.83.  If we need to reduce the size of the code
2741       // emitted by compiler_lock_object() the recursive case is perfect
2742       // candidate.
2743       //
2744       // A more extreme idea is to always inflate on stack-lock recursion.
2745       // This lets us eliminate the recursive checks in compiler_lock_object
2746       // and compiler_unlock_object and the (box->dhw == 0) encoding.
2747       // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
2748       // and showed a performance *increase*.  In the same experiment I eliminated
2749       // the fast-path stack-lock code from the interpreter and always passed
2750       // control to the "slow" operators in synchronizer.cpp.
2751 
2752       // RScratch contains the fetched obj->mark value from the failed CAS.

2753       sub(Rscratch, STACK_BIAS, Rscratch);

2754       sub(Rscratch, SP, Rscratch);
2755       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2756       andcc(Rscratch, 0xfffff003, Rscratch);
2757       if (counters != NULL) {
2758         // Accounting needs the Rscratch register
2759         st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2760         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
2761         ba_short(done);
2762       } else {
2763         ba(done);
2764         delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2765       }
2766 
2767       bind   (IsInflated);
2768 
2769       // Try to CAS m->owner from null to Self
2770       // Invariant: if we acquire the lock then _recursions should be 0.
2771       add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
2772       mov(G2_thread, Rscratch);
2773       cas_ptr(Rmark, G0, Rscratch);


3651     delayed()->mov(pre_val->after_save(), O0);
3652     restore();
3653   }
3654 
3655   bind(filtered);
3656 }
3657 
3658 static address dirty_card_log_enqueue = 0;
3659 static u_char* dirty_card_log_enqueue_end = 0;
3660 
3661 // This gets to assume that o0 contains the object address.
3662 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
3663   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
3664   CodeBuffer buf(bb);
3665   MacroAssembler masm(&buf);
3666 #define __ masm.
3667   address start = __ pc();
3668 
3669   Label not_already_dirty, restart, refill, young_card;
3670 

3671   __ srlx(O0, CardTableModRefBS::card_shift, O0);



3672   AddressLiteral addrlit(byte_map_base);
3673   __ set(addrlit, O1); // O1 := <card table base>
3674   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
3675 
3676   __ cmp_and_br_short(O2, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
3677 
3678   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
3679   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
3680 
3681   assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
3682   __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
3683 
3684   __ bind(young_card);
3685   // We didn't take the branch, so we're already dirty: return.
3686   // Use return-from-leaf
3687   __ retl();
3688   __ delayed()->nop();
3689 
3690   // Not dirty.
3691   __ bind(not_already_dirty);


3753 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
3754   if (dirty_card_log_enqueue == 0) {
3755     generate_dirty_card_log_enqueue(byte_map_base);
3756     assert(dirty_card_log_enqueue != 0, "postcondition.");
3757   }
3758 }
3759 
3760 
3761 void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
3762 
3763   Label filtered;
3764   MacroAssembler* post_filter_masm = this;
3765 
3766   if (new_val == G0) return;
3767 
3768   G1SATBCardTableLoggingModRefBS* bs =
3769     barrier_set_cast<G1SATBCardTableLoggingModRefBS>(Universe::heap()->barrier_set());
3770 
3771   if (G1RSBarrierRegionFilter) {
3772     xor3(store_addr, new_val, tmp);

3773     srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);



3774 
3775     // XXX Should I predict this taken or not?  Does it matter?
3776     cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
3777   }
3778 
3779   // If the "store_addr" register is an "in" or "local" register, move it to
3780   // a scratch reg so we can pass it as an argument.
3781   bool use_scr = !(store_addr->is_global() || store_addr->is_out());
3782   // Pick a scratch register different from "tmp".
3783   Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
3784   // Make sure we use up the delay slot!
3785   if (use_scr) {
3786     post_filter_masm->mov(store_addr, scr);
3787   } else {
3788     post_filter_masm->nop();
3789   }
3790   generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
3791   save_frame(0);
3792   call(dirty_card_log_enqueue);
3793   if (use_scr) {


< prev index next >