src/cpu/sparc/vm/assembler_sparc.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 7063628_1 Sdiff src/cpu/sparc/vm

src/cpu/sparc/vm/assembler_sparc.cpp

Print this page




  89 const char* Argument::name() const {
  90   int nofArgs = sizeof argumentNames / sizeof argumentNames[0];
  91   int num = number();
  92   if (num >= nofArgs)  num = nofArgs - 1;
  93   return argumentNames[num][is_in() ? 1 : 0];
  94 }
  95 
  96 void Assembler::print_instruction(int inst) {
  97   const char* s;
  98   switch (inv_op(inst)) {
  99   default:         s = "????"; break;
 100   case call_op:    s = "call"; break;
 101   case branch_op:
 102     switch (inv_op2(inst)) {
 103       case fb_op2:     s = "fb";   break;
 104       case fbp_op2:    s = "fbp";  break;
 105       case br_op2:     s = "br";   break;
 106       case bp_op2:     s = "bp";   break;
 107       case cb_op2:     s = "cb";   break;
 108       case bpr_op2: {
 109         if (is_cbc(inst)) {
 110           s = is_cxb(inst) ? "cxb" : "cwb";
 111         } else {
 112           s = "bpr";
 113         }
 114         break;
 115       }
 116       default:         s = "????"; break;
 117     }
 118   }
 119   ::tty->print("%s", s);
 120 }
 121 
 122 
 123 // Patch instruction inst at offset inst_pos to refer to dest_pos
 124 // and return the resulting instruction.
 125 // We should have pcs, not offsets, but since all is relative, it will work out
 126 // OK.
 127 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
 128 
 129   int m; // mask for displacement field
 130   int v; // new value for displacement field
 131   const int word_aligned_ones = -4;
 132   switch (inv_op(inst)) {
 133   default: ShouldNotReachHere();
 134   case call_op:    m = wdisp(word_aligned_ones, 0, 30);  v = wdisp(dest_pos, inst_pos, 30); break;
 135   case branch_op:
 136     switch (inv_op2(inst)) {
 137       case fbp_op2:    m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
 138       case bp_op2:     m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
 139       case fb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
 140       case br_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
 141       case cb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
 142       case bpr_op2: {
 143         if (is_cbc(inst)) {
 144           m = wdisp10(word_aligned_ones, 0);
 145           v = wdisp10(dest_pos, inst_pos);
 146         } else {
 147           m = wdisp16(word_aligned_ones, 0);
 148           v = wdisp16(dest_pos, inst_pos);
 149         }
 150         break;
 151       }
 152       default: ShouldNotReachHere();
 153     }
 154   }
 155   return  inst & ~m  |  v;
 156 }
 157 
 158 // Return the offset of the branch destionation of instruction inst
 159 // at offset pos.
 160 // Should have pcs, but since all is relative, it works out.
 161 int Assembler::branch_destination(int inst, int pos) {
 162   int r;
 163   switch (inv_op(inst)) {
 164   default: ShouldNotReachHere();
 165   case call_op:        r = inv_wdisp(inst, pos, 30);  break;
 166   case branch_op:
 167     switch (inv_op2(inst)) {
 168       case fbp_op2:    r = inv_wdisp(  inst, pos, 19);  break;
 169       case bp_op2:     r = inv_wdisp(  inst, pos, 19);  break;
 170       case fb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
 171       case br_op2:     r = inv_wdisp(  inst, pos, 22);  break;
 172       case cb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
 173       case bpr_op2: {
 174         if (is_cbc(inst)) {
 175           r = inv_wdisp10(inst, pos);
 176         } else {
 177           r = inv_wdisp16(inst, pos);
 178         }
 179         break;
 180       }
 181       default: ShouldNotReachHere();
 182     }
 183   }
 184   return r;
 185 }
 186 
 187 int AbstractAssembler::code_fill_byte() {
 188   return 0x00;                  // illegal instruction 0x00000000
 189 }
 190 
 191 Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
 192   switch (in) {
 193   case rc_z:   return equal;
 194   case rc_lez: return lessEqual;


 974   }
 975 }
 976 
 977 
 978 // %%% maybe get rid of [re]set_last_Java_frame
 979 void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_Java_pc) {
 980   assert_not_delayed();
 981   Address flags(G2_thread, JavaThread::frame_anchor_offset() +
 982                            JavaFrameAnchor::flags_offset());
 983   Address pc_addr(G2_thread, JavaThread::last_Java_pc_offset());
 984 
 985   // Always set last_Java_pc and flags first because once last_Java_sp is visible
 986   // has_last_Java_frame is true and users will look at the rest of the fields.
 987   // (Note: flags should always be zero before we get here so doesn't need to be set.)
 988 
 989 #ifdef ASSERT
 990   // Verify that flags was zeroed on return to Java
 991   Label PcOk;
 992   save_frame(0);                // to avoid clobbering O0
 993   ld_ptr(pc_addr, L0);
 994   br_null(L0, false, Assembler::pt, PcOk);
 995   stop("last_Java_pc not zeroed before leaving Java");
 996   bind(PcOk);
 997 
 998   // Verify that flags was zeroed on return to Java
 999   Label FlagsOk;
1000   ld(flags, L0);
1001   tst(L0);
1002   br(Assembler::zero, false, Assembler::pt, FlagsOk);
1003   delayed() -> restore();
1004   stop("flags not zeroed before leaving Java");
1005   bind(FlagsOk);
1006 #endif /* ASSERT */
1007   //
1008   // When returning from calling out from Java mode the frame anchor's last_Java_pc
1009   // will always be set to NULL. It is set here so that if we are doing a call to
1010   // native (not VM) that we capture the known pc and don't have to rely on the
1011   // native call having a standard frame linkage where we can find the pc.
1012 
1013   if (last_Java_pc->is_valid()) {
1014     st_ptr(last_Java_pc, pc_addr);


1099   set(badHeapWordVal, G3);
1100   set(badHeapWordVal, G4);
1101   set(badHeapWordVal, G5);
1102 #endif
1103 
1104   // get oop result if there is one and reset the value in the thread
1105   if (oop_result->is_valid()) {
1106     get_vm_result(oop_result);
1107   }
1108 }
1109 
1110 void MacroAssembler::check_and_forward_exception(Register scratch_reg)
1111 {
1112   Label L;
1113 
1114   check_and_handle_popframe(scratch_reg);
1115   check_and_handle_earlyret(scratch_reg);
1116 
1117   Address exception_addr(G2_thread, Thread::pending_exception_offset());
1118   ld_ptr(exception_addr, scratch_reg);
1119   br_null(scratch_reg,false,pt,L);
1120   // we use O7 linkage so that forward_exception_entry has the issuing PC
1121   call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1122   delayed()->nop();
1123   bind(L);
1124 }
1125 
1126 
1127 void MacroAssembler::check_and_handle_popframe(Register scratch_reg) {
1128 }
1129 
1130 
1131 void MacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
1132 }
1133 
1134 
1135 void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1136   call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
1137 }
1138 
1139 


1873     Register O2_adr   = O2;
1874     Register O3_accum = O3;
1875     inc_counter(StubRoutines::verify_oop_count_addr(), O2_adr, O3_accum);
1876   }
1877 
1878   Register O2_mask = O2;
1879   Register O3_bits = O3;
1880   Register O4_temp = O4;
1881 
1882   // mark lower end of faulting range
1883   assert(_verify_oop_implicit_branch[0] == NULL, "set once");
1884   _verify_oop_implicit_branch[0] = pc();
1885 
1886   // We can't check the mark oop because it could be in the process of
1887   // locking or unlocking while this is running.
1888   set(Universe::verify_oop_mask (), O2_mask);
1889   set(Universe::verify_oop_bits (), O3_bits);
1890 
1891   // assert((obj & oop_mask) == oop_bits);
1892   and3(O0_obj, O2_mask, O4_temp);
1893   cmp_and_brx(O4_temp, O3_bits, notEqual, false, pn, null_or_fail);
1894 
1895   if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
1896     // the null_or_fail case is useless; must test for null separately
1897     br_null(O0_obj, false, pn, succeed);
1898   }
1899 
1900   // Check the klassOop of this object for being in the right area of memory.
1901   // Cannot do the load in the delay above slot in case O0 is null
1902   load_klass(O0_obj, O0_obj);
1903   // assert((klass & klass_mask) == klass_bits);
1904   if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
1905     set(Universe::verify_klass_mask(), O2_mask);
1906   if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
1907     set(Universe::verify_klass_bits(), O3_bits);
1908   and3(O0_obj, O2_mask, O4_temp);
1909   cmp_and_brx(O4_temp, O3_bits, notEqual, false, pn, fail);
1910   // Check the klass's klass
1911   load_klass(O0_obj, O0_obj);
1912   and3(O0_obj, O2_mask, O4_temp);
1913   cmp(O4_temp, O3_bits);
1914   brx(notEqual, false, pn, fail);
1915   delayed()->wrccr( O5_save_flags ); // Restore CCR's
1916 
1917   // mark upper end of faulting range
1918   _verify_oop_implicit_branch[1] = pc();
1919 
1920   //-----------------------
1921   // all tests pass
1922   bind(succeed);
1923 
1924   // Restore prior 64-bit registers
1925   ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+0*8,O0);
1926   ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+1*8,O1);
1927   ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+2*8,O2);
1928   ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+3*8,O3);
1929   ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+4*8,O4);


2117 }
2118 
2119 
2120 // ---------------------------------------------------------
2121 Assembler::RCondition cond2rcond(Assembler::Condition c) {
2122   switch (c) {
2123     /*case zero: */
2124     case Assembler::equal:        return Assembler::rc_z;
2125     case Assembler::lessEqual:    return Assembler::rc_lez;
2126     case Assembler::less:         return Assembler::rc_lz;
2127     /*case notZero:*/
2128     case Assembler::notEqual:     return Assembler::rc_nz;
2129     case Assembler::greater:      return Assembler::rc_gz;
2130     case Assembler::greaterEqual: return Assembler::rc_gez;
2131   }
2132   ShouldNotReachHere();
2133   return Assembler::rc_z;
2134 }
2135 
2136 // compares (32 bit) register with zero and branches.  NOT FOR USE WITH 64-bit POINTERS
2137 void MacroAssembler::br_zero(Register s1, Label& L) {
2138   assert_not_delayed();
2139   if (use_cbc(L)) {
2140     Assembler::cbc(zero, icc, s1, 0, L);
2141   } else {
2142     tst(s1);
2143     br (zero, false, pt, L);
2144     delayed()->nop();
2145   }
2146 }
2147 
2148 // Compares a pointer register with zero and branches on null.
2149 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
2150 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L, bool emit_delayed_nop ) {
2151   assert_not_delayed();
2152   if (emit_delayed_nop && use_cbc(L)) {
2153     Assembler::cbc(zero, ptr_cc, s1, 0, L);
2154     return;
2155   }
2156 #ifdef _LP64
2157   bpr( rc_z, a, p, s1, L );
2158 #else
2159   tst(s1);
2160   br ( zero, a, p, L );
2161 #endif
2162   // Some callers can fill the delay slot.
2163   if (emit_delayed_nop) {
2164     delayed()->nop();
2165   }


2211   }
2212   // Some callers can fill the delay slot.
2213   if (emit_delayed_nop) {
2214     delayed()->nop();
2215   }
2216 }
2217 
2218 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
2219 void MacroAssembler::cmp_and_br(Register s1, Register s2, Condition c,
2220                                 bool a, Predict p, Label& L) {
2221   assert_not_delayed();
2222   if (use_cbc(L)) {
2223     Assembler::cbc(c, icc, s1, s2, L);
2224   } else {
2225     cmp(s1, s2);
2226     br(c, a, p, L);
2227     delayed()->nop();
2228   }
2229 }
2230 
2231 void MacroAssembler::cmp_and_br(Register s1, int simm13a, Condition c,
2232                                 bool a, Predict p, Label& L) {

2233   assert_not_delayed();
2234   if (is_simm(simm13a,5) && use_cbc(L)) {
2235     Assembler::cbc(c, icc, s1, simm13a, L);
2236   } else {













2237     cmp(s1, simm13a);
2238     br(c, a, p, L);
2239     delayed()->nop();
2240   }
2241 }
2242 
2243 // Branch that tests xcc in LP64 and icc in !LP64
2244 void MacroAssembler::cmp_and_brx(Register s1, Register s2, Condition c,
2245                                  bool a, Predict p, Label& L) {
2246   assert_not_delayed();
2247   if (use_cbc(L)) {
2248     Assembler::cbc(c, ptr_cc, s1, s2, L);
2249   } else {
2250     cmp(s1, s2);
2251     brx(c, a, p, L);
2252     delayed()->nop();
2253   }
2254 }
2255 
2256 void MacroAssembler::cmp_and_brx(Register s1, int simm13a, Condition c,
2257                                  bool a, Predict p, Label& L) {

2258   assert_not_delayed();
2259   if (is_simm(simm13a,5) && use_cbc(L)) {
2260     Assembler::cbc(c, ptr_cc, s1, simm13a, L);
2261   } else {
2262     cmp(s1, simm13a);
2263     brx(c, a, p, L);
2264     delayed()->nop();
2265   }
2266 }
2267 
































2268 // instruction sequences factored across compiler & interpreter
2269 
2270 
2271 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
2272                            Register Rb_hi, Register Rb_low,
2273                            Register Rresult) {
2274 
2275   Label check_low_parts, done;
2276 
2277   cmp(Ra_hi, Rb_hi );  // compare hi parts
2278   br(equal, true, pt, check_low_parts);
2279   delayed()->cmp(Ra_low, Rb_low); // test low parts
2280 
2281   // And, with an unsigned comparison, it does not matter if the numbers
2282   // are negative or not.
2283   // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff.
2284   // The second one is bigger (unsignedly).
2285 
2286   // Other notes:  The first move in each triplet can be unconditional
2287   // (and therefore probably prefetchable).
2288   // And the equals case for the high part does not need testing,
2289   // since that triplet is reached only after finding the high halves differ.
2290 
2291   if (VM_Version::v9_instructions_work()) {
2292     mov(-1, Rresult);
2293     ba(done, false);  delayed()-> movcc(greater, false, icc,  1, Rresult);
2294   } else {
2295     br(less,    true, pt, done); delayed()-> set(-1, Rresult);
2296     br(greater, true, pt, done); delayed()-> set( 1, Rresult);
2297   }
2298 
2299   bind( check_low_parts );
2300 
2301   if (VM_Version::v9_instructions_work()) {
2302     mov(                               -1, Rresult);
2303     movcc(equal,           false, icc,  0, Rresult);
2304     movcc(greaterUnsigned, false, icc,  1, Rresult);
2305   } else {
2306     set(-1, Rresult);
2307     br(equal,           true, pt, done); delayed()->set( 0, Rresult);
2308     br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult);
2309   }
2310   bind( done );
2311 }
2312 
2313 void MacroAssembler::lneg( Register Rhi, Register Rlow ) {


2348 
2349   // We get the transfer bits by shifting right by 32-count the low
2350   // register. This is done by shifting right by 31-count and then by one
2351   // more to take care of the special (rare) case where count is zero
2352   // (shifting by 32 would not work).
2353 
2354   neg(Ralt_count);
2355 
2356   // The order of the next two instructions is critical in the case where
2357   // Rin and Rout are the same and should not be reversed.
2358 
2359   srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count
2360   if (Rcount != Rout_low) {
2361     sll(Rin_low, Rcount, Rout_low); // low half
2362   }
2363   sll(Rin_high, Rcount, Rout_high);
2364   if (Rcount == Rout_low) {
2365     sll(Rin_low, Rcount, Rout_low); // low half
2366   }
2367   srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
2368   ba(done, false);
2369   delayed()->or3(Rout_high, Rxfer_bits, Rout_high);   // new hi value: or in shifted old hi part and xfer from low
2370 
2371   // shift >= 32 bits, Ralt_count = Rcount-32
2372   bind(big_shift);
2373   sll(Rin_low, Ralt_count, Rout_high  );
2374   clr(Rout_low);
2375 
2376   bind(done);
2377 }
2378 
2379 
2380 void MacroAssembler::lshr( Register Rin_high,  Register Rin_low,
2381                            Register Rcount,
2382                            Register Rout_high, Register Rout_low,
2383                            Register Rtemp ) {
2384 
2385   Register Ralt_count = Rtemp;
2386   Register Rxfer_bits = Rtemp;
2387 
2388   assert( Ralt_count != Rin_high


2409 
2410   // We get the transfer bits by shifting left by 32-count the high
2411   // register. This is done by shifting left by 31-count and then by one
2412   // more to take care of the special (rare) case where count is zero
2413   // (shifting by 32 would not work).
2414 
2415   neg(Ralt_count);
2416   if (Rcount != Rout_low) {
2417     srl(Rin_low, Rcount, Rout_low);
2418   }
2419 
2420   // The order of the next two instructions is critical in the case where
2421   // Rin and Rout are the same and should not be reversed.
2422 
2423   sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
2424   sra(Rin_high,     Rcount, Rout_high ); // high half
2425   sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
2426   if (Rcount == Rout_low) {
2427     srl(Rin_low, Rcount, Rout_low);
2428   }
2429   ba(done, false);
2430   delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
2431 
2432   // shift >= 32 bits, Ralt_count = Rcount-32
2433   bind(big_shift);
2434 
2435   sra(Rin_high, Ralt_count, Rout_low);
2436   sra(Rin_high,         31, Rout_high); // sign into hi
2437 
2438   bind( done );
2439 }
2440 
2441 
2442 
2443 void MacroAssembler::lushr( Register Rin_high,  Register Rin_low,
2444                             Register Rcount,
2445                             Register Rout_high, Register Rout_low,
2446                             Register Rtemp ) {
2447 
2448   Register Ralt_count = Rtemp;
2449   Register Rxfer_bits = Rtemp;


2472 
2473   // We get the transfer bits by shifting left by 32-count the high
2474   // register. This is done by shifting left by 31-count and then by one
2475   // more to take care of the special (rare) case where count is zero
2476   // (shifting by 32 would not work).
2477 
2478   neg(Ralt_count);
2479   if (Rcount != Rout_low) {
2480     srl(Rin_low, Rcount, Rout_low);
2481   }
2482 
2483   // The order of the next two instructions is critical in the case where
2484   // Rin and Rout are the same and should not be reversed.
2485 
2486   sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
2487   srl(Rin_high,     Rcount, Rout_high ); // high half
2488   sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
2489   if (Rcount == Rout_low) {
2490     srl(Rin_low, Rcount, Rout_low);
2491   }
2492   ba(done, false);
2493   delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
2494 
2495   // shift >= 32 bits, Ralt_count = Rcount-32
2496   bind(big_shift);
2497 
2498   srl(Rin_high, Ralt_count, Rout_low);
2499   clr(Rout_high);
2500 
2501   bind( done );
2502 }
2503 
2504 #ifdef _LP64
2505 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
2506   cmp(Ra, Rb);
2507   mov(-1, Rresult);
2508   movcc(equal,   false, xcc,  0, Rresult);
2509   movcc(greater, false, xcc,  1, Rresult);
2510 }
2511 #endif
2512 


2736     if (top_reg_after_save == L1) {
2737       ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save);
2738     }
2739 
2740     if (ptr_reg_after_save == L2) {
2741       ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save);
2742     }
2743 
2744     Label(retry_get_lock);
2745     Label(not_same);
2746     Label(dont_yield);
2747 
2748     assert(lock_addr, "lock_address should be non null for v8");
2749     set((intptr_t)lock_addr, lock_ptr_reg);
2750     // Initialize yield counter
2751     mov(G0,yield_reg);
2752     mov(G0, yieldall_reg);
2753     set(StubRoutines::Sparc::locked, lock_reg);
2754 
2755     bind(retry_get_lock);
2756     cmp_and_br(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, false, Assembler::pt, dont_yield);
2757 
2758     if(use_call_vm) {
2759       Untested("Need to verify global reg consistancy");
2760       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
2761     } else {
2762       // Save the regs and make space for a C call
2763       save(SP, -96, SP);
2764       save_all_globals_into_locals();
2765       call(CAST_FROM_FN_PTR(address,os::yield_all));
2766       delayed()->mov(yieldall_reg, O0);
2767       restore_globals_from_locals();
2768       restore();
2769     }
2770 
2771     // reset the counter
2772     mov(G0,yield_reg);
2773     add(yieldall_reg, 1, yieldall_reg);
2774 
2775     bind(dont_yield);
2776     // try to get lock
2777     swap(lock_ptr_reg, 0, lock_reg);
2778 
2779     // did we get the lock?
2780     cmp(lock_reg, StubRoutines::Sparc::unlocked);
2781     br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
2782     delayed()->add(yield_reg,1,yield_reg);
2783 
2784     // yes, got lock.  do we have the same top?
2785     ld(top_ptr_reg_after_save, 0, value_reg);
2786     cmp_and_br(value_reg, top_reg_after_save, Assembler::notEqual, false, Assembler::pt, not_same);
2787 
2788     // yes, same top.
2789     st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
2790     membar(Assembler::StoreStore);
2791 
2792     bind(not_same);
2793     mov(value_reg, ptr_reg_after_save);
2794     st(lock_reg, lock_ptr_reg, 0); // unlock
2795 
2796     restore();
2797   }
2798 }
2799 
2800 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2801                                                       Register tmp,
2802                                                       int offset) {
2803   intptr_t value = *delayed_value_addr;
2804   if (value != 0)
2805     return RegisterOrConstant(value + offset);
2806 


3016                                          Register super_klass,
3017                                          Register temp_reg,
3018                                          Register temp2_reg,
3019                                          Label& L_success) {
3020   Label L_failure, L_pop_to_failure;
3021   check_klass_subtype_fast_path(sub_klass, super_klass,
3022                                 temp_reg, temp2_reg,
3023                                 &L_success, &L_failure, NULL);
3024   Register sub_2 = sub_klass;
3025   Register sup_2 = super_klass;
3026   if (!sub_2->is_global())  sub_2 = L0;
3027   if (!sup_2->is_global())  sup_2 = L1;
3028 
3029   save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
3030   check_klass_subtype_slow_path(sub_2, sup_2,
3031                                 L2, L3, L4, L5,
3032                                 NULL, &L_pop_to_failure);
3033 
3034   // on success:
3035   restore();
3036   ba(L_success);
3037 
3038   // on failure:
3039   bind(L_pop_to_failure);
3040   restore();
3041   bind(L_failure);
3042 }
3043 
3044 
3045 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3046                                                    Register super_klass,
3047                                                    Register temp_reg,
3048                                                    Register temp2_reg,
3049                                                    Label* L_success,
3050                                                    Label* L_failure,
3051                                                    Label* L_slow_path,
3052                                         RegisterOrConstant super_check_offset) {
3053   int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
3054                    Klass::secondary_super_cache_offset_in_bytes());
3055   int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
3056                     Klass::super_check_offset_offset_in_bytes());


3095     // super_check_offset is register.
3096     assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset.as_register());
3097   }
3098   ld_ptr(sub_klass, super_check_offset, temp_reg);
3099   cmp(super_klass, temp_reg);
3100 
3101   // This check has worked decisively for primary supers.
3102   // Secondary supers are sought in the super_cache ('super_cache_addr').
3103   // (Secondary supers are interfaces and very deeply nested subtypes.)
3104   // This works in the same check above because of a tricky aliasing
3105   // between the super_cache and the primary super display elements.
3106   // (The 'super_check_addr' can address either, as the case requires.)
3107   // Note that the cache is updated below if it does not help us find
3108   // what we need immediately.
3109   // So if it was a primary super, we can just fail immediately.
3110   // Otherwise, it's the slow path for us (no success at this point).
3111 
3112   // Hacked ba(), which may only be used just before L_fallthrough.
3113 #define FINAL_JUMP(label)            \
3114   if (&(label) != &L_fallthrough) {  \
3115     ba(label, false);                \
3116     delayed()->nop();                \
3117   }
3118 
3119   if (super_check_offset.is_register()) {
3120     brx(Assembler::equal, false, Assembler::pn, *L_success);
3121     delayed()->cmp(super_check_offset.as_register(), sc_offset);
3122 
3123     if (L_failure == &L_fallthrough) {
3124       brx(Assembler::equal, false, Assembler::pt, *L_slow_path);
3125       delayed()->nop();
3126     } else {
3127       brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
3128       delayed()->nop();
3129       FINAL_JUMP(*L_slow_path);
3130     }
3131   } else if (super_check_offset.as_constant() == sc_offset) {
3132     // Need a slow path; fast failure is impossible.
3133     if (L_slow_path == &L_fallthrough) {
3134       brx(Assembler::equal, false, Assembler::pt, *L_success);
3135       delayed()->nop();
3136     } else {


3228     // Don't use load_heap_oop; we don't want to decode the element.
3229     lduw(   scan_temp, elem_offset, scratch_reg );
3230   } else {
3231     ld_ptr( scan_temp, elem_offset, scratch_reg );
3232   }
3233 
3234   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
3235   cmp(scratch_reg, search_key);
3236 
3237   // A miss means we are NOT a subtype and need to keep looping
3238   brx(Assembler::notEqual, false, Assembler::pn, L_loop);
3239   delayed()->deccc(count_temp); // decrement trip counter in delay slot
3240 
3241   // Falling out the bottom means we found a hit; we ARE a subtype
3242   if (decode_super_klass) decode_heap_oop(super_klass);
3243 
3244   // Success.  Cache the super we found and proceed in triumph.
3245   st_ptr(super_klass, sub_klass, sc_offset);
3246 
3247   if (L_success != &L_fallthrough) {
3248     ba(*L_success, false);
3249     delayed()->nop();
3250   }
3251 
3252   bind(L_fallthrough);
3253 }
3254 
3255 
3256 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
3257                                               Register temp_reg,
3258                                               Label& wrong_method_type) {
3259   assert_different_registers(mtype_reg, mh_reg, temp_reg);
3260   // compare method type against that of the receiver
3261   RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
3262   load_heap_oop(mh_reg, mhtype_offset, temp_reg);
3263   cmp_and_brx(temp_reg, mtype_reg, Assembler::notEqual, false, Assembler::pn, wrong_method_type);
3264 }
3265 
3266 
3267 // A method handle has a "vmslots" field which gives the size of its
3268 // argument list in JVM stack slots.  This field is either located directly
3269 // in every method handle, or else is indirectly accessed through the
3270 // method handle's MethodType.  This macro hides the distinction.
3271 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
3272                                                 Register temp_reg) {
3273   assert_different_registers(vmslots_reg, mh_reg, temp_reg);
3274   // load mh.type.form.vmslots
3275   if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) {
3276     // hoist vmslots into every mh to avoid dependent load chain
3277     ld(           Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg)),   vmslots_reg);
3278   } else {
3279     Register temp2_reg = vmslots_reg;
3280     load_heap_oop(Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)),      temp2_reg);
3281     load_heap_oop(Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)),        temp2_reg);
3282     ld(           Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
3283   }


3336                                           Register temp_reg,
3337                                           Label& done, Label* slow_case,
3338                                           BiasedLockingCounters* counters) {
3339   assert(UseBiasedLocking, "why call this otherwise?");
3340 
3341   if (PrintBiasedLockingStatistics) {
3342     assert_different_registers(obj_reg, mark_reg, temp_reg, O7);
3343     if (counters == NULL)
3344       counters = BiasedLocking::counters();
3345   }
3346 
3347   Label cas_label;
3348 
3349   // Biased locking
3350   // See whether the lock is currently biased toward our thread and
3351   // whether the epoch is still valid
3352   // Note that the runtime guarantees sufficient alignment of JavaThread
3353   // pointers to allow age to be placed into low bits
3354   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
3355   and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3356   cmp_and_brx(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, false, Assembler::pn, cas_label);
3357 
3358   load_klass(obj_reg, temp_reg);
3359   ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3360   or3(G2_thread, temp_reg, temp_reg);
3361   xor3(mark_reg, temp_reg, temp_reg);
3362   andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
3363   if (counters != NULL) {
3364     cond_inc(Assembler::equal, (address) counters->biased_lock_entry_count_addr(), mark_reg, temp_reg);
3365     // Reload mark_reg as we may need it later
3366     ld_ptr(Address(obj_reg, oopDesc::mark_offset_in_bytes()), mark_reg);
3367   }
3368   brx(Assembler::equal, true, Assembler::pt, done);
3369   delayed()->nop();
3370 
3371   Label try_revoke_bias;
3372   Label try_rebias;
3373   Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
3374   assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3375 
3376   // At this point we know that the header has the bias pattern and


3403   // fails we will go in to the runtime to revoke the object's bias.
3404   // Note that we first construct the presumed unbiased header so we
3405   // don't accidentally blow away another thread's valid bias.
3406   delayed()->and3(mark_reg,
3407                   markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
3408                   mark_reg);
3409   or3(G2_thread, mark_reg, temp_reg);
3410   casn(mark_addr.base(), mark_reg, temp_reg);
3411   // If the biasing toward our thread failed, this means that
3412   // another thread succeeded in biasing it toward itself and we
3413   // need to revoke that bias. The revocation will occur in the
3414   // interpreter runtime in the slow case.
3415   cmp(mark_reg, temp_reg);
3416   if (counters != NULL) {
3417     cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg);
3418   }
3419   if (slow_case != NULL) {
3420     brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3421     delayed()->nop();
3422   }
3423   ba(done);
3424 
3425   bind(try_rebias);
3426   // At this point we know the epoch has expired, meaning that the
3427   // current "bias owner", if any, is actually invalid. Under these
3428   // circumstances _only_, we are allowed to use the current header's
3429   // value as the comparison value when doing the cas to acquire the
3430   // bias in the current epoch. In other words, we allow transfer of
3431   // the bias from one thread to another directly in this situation.
3432   //
3433   // FIXME: due to a lack of registers we currently blow away the age
3434   // bits in this situation. Should attempt to preserve them.
3435   load_klass(obj_reg, temp_reg);
3436   ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3437   or3(G2_thread, temp_reg, temp_reg);
3438   casn(mark_addr.base(), mark_reg, temp_reg);
3439   // If the biasing toward our thread failed, this means that
3440   // another thread succeeded in biasing it toward itself and we
3441   // need to revoke that bias. The revocation will occur in the
3442   // interpreter runtime in the slow case.
3443   cmp(mark_reg, temp_reg);
3444   if (counters != NULL) {
3445     cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg);
3446   }
3447   if (slow_case != NULL) {
3448     brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3449     delayed()->nop();
3450   }
3451   ba(done);
3452 
3453   bind(try_revoke_bias);
3454   // The prototype mark in the klass doesn't have the bias bit set any
3455   // more, indicating that objects of this data type are not supposed
3456   // to be biased any more. We are going to try to reset the mark of
3457   // this object to the prototype value and fall through to the
3458   // CAS-based locking scheme. Note that if our CAS fails, it means
3459   // that another thread raced us for the privilege of revoking the
3460   // bias of this particular object, so it's okay to continue in the
3461   // normal locking code.
3462   //
3463   // FIXME: due to a lack of registers we currently blow away the age
3464   // bits in this situation. Should attempt to preserve them.
3465   load_klass(obj_reg, temp_reg);
3466   ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3467   casn(mark_addr.base(), mark_reg, temp_reg);
3468   // Fall through to the normal CAS-based lock, because no matter what
3469   // the result of the above CAS, some thread must have succeeded in
3470   // removing the bias bit from the object's header.
3471   if (counters != NULL) {


3482   // Note: we do not have to check the thread ID for two reasons.
3483   // First, the interpreter checks for IllegalMonitorStateException at
3484   // a higher level. Second, if the bias was revoked while we held the
3485   // lock, the object could not be rebiased toward another thread, so
3486   // the bias bit would be clear.
3487   ld_ptr(mark_addr, temp_reg);
3488   and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3489   cmp(temp_reg, markOopDesc::biased_lock_pattern);
3490   brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done);
3491   delayed();
3492   if (!allow_delay_slot_filling) {
3493     nop();
3494   }
3495 }
3496 
3497 
3498 // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
3499 // Solaris/SPARC's "as".  Another apt name would be cas_ptr()
3500 
3501 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
3502   casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()) ;
3503 }
3504 
3505 
3506 
3507 // compiler_lock_object() and compiler_unlock_object() are direct transliterations
3508 // of i486.ad fast_lock() and fast_unlock().  See those methods for detailed comments.
3509 // The code could be tightened up considerably.
3510 //
3511 // box->dhw disposition - post-conditions at DONE_LABEL.
3512 // -   Successful inflated lock:  box->dhw != 0.
3513 //     Any non-zero value suffices.
3514 //     Consider G2_thread, rsp, boxReg, or unused_mark()
3515 // -   Successful Stack-lock: box->dhw == mark.
3516 //     box->dhw must contain the displaced mark word value
3517 // -   Failure -- icc.ZFlag == 0 and box->dhw is undefined.
3518 //     The slow-path fast_enter() and slow_enter() operators
3519 //     are responsible for setting box->dhw = NonZero (typically ::unused_mark).
3520 // -   Biased: box->dhw is undefined
3521 //
3522 // SPARC refworkload performance - specifically jetstream and scimark - are
3523 // extremely sensitive to the size of the code emitted by compiler_lock_object
3524 // and compiler_unlock_object.  Critically, the key factor is code size, not path
3525 // length.  (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
3526 // effect).
3527 
3528 
3529 void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
3530                                           Register Rbox, Register Rscratch,
3531                                           BiasedLockingCounters* counters,
3532                                           bool try_bias) {
3533    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3534 
3535    verify_oop(Roop);
3536    Label done ;
3537 
3538    if (counters != NULL) {
3539      inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
3540    }
3541 
3542    if (EmitSync & 1) {
3543      mov    (3, Rscratch) ;
3544      st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3545      cmp    (SP, G0) ;
3546      return ;
3547    }
3548 
3549    if (EmitSync & 2) {
3550 
3551      // Fetch object's markword
3552      ld_ptr(mark_addr, Rmark);
3553 
3554      if (try_bias) {
3555         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3556      }
3557 
3558      // Save Rbox in Rscratch to be used for the cas operation
3559      mov(Rbox, Rscratch);
3560 
3561      // set Rmark to markOop | markOopDesc::unlocked_value
3562      or3(Rmark, markOopDesc::unlocked_value, Rmark);
3563 
3564      // Initialize the box.  (Must happen before we update the object mark!)
3565      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3566 
3567      // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
3568      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3569      casx_under_lock(mark_addr.base(), Rmark, Rscratch,
3570         (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3571 
3572      // if compare/exchange succeeded we found an unlocked object and we now have locked it
3573      // hence we are done
3574      cmp(Rmark, Rscratch);
3575 #ifdef _LP64
3576      sub(Rscratch, STACK_BIAS, Rscratch);
3577 #endif
3578      brx(Assembler::equal, false, Assembler::pt, done);
3579      delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
3580 
3581      // we did not find an unlocked object so see if this is a recursive case
3582      // sub(Rscratch, SP, Rscratch);
3583      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3584      andcc(Rscratch, 0xfffff003, Rscratch);
3585      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3586      bind (done) ;
3587      return ;
3588    }
3589 
3590    Label Egress ;
3591 
3592    if (EmitSync & 256) {
3593       Label IsInflated ;
3594 
3595       ld_ptr (mark_addr, Rmark);           // fetch obj->mark
3596       // Triage: biased, stack-locked, neutral, inflated
3597       if (try_bias) {
3598         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3599         // Invariant: if control reaches this point in the emitted stream
3600         // then Rmark has not been modified.
3601       }
3602 
3603       // Store mark into displaced mark field in the on-stack basic-lock "box"
3604       // Critically, this must happen before the CAS
3605       // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
3606       st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3607       andcc  (Rmark, 2, G0) ;
3608       brx    (Assembler::notZero, false, Assembler::pn, IsInflated) ;
3609       delayed() ->
3610 
3611       // Try stack-lock acquisition.
3612       // Beware: the 1st instruction is in a delay slot
3613       mov    (Rbox,  Rscratch);
3614       or3    (Rmark, markOopDesc::unlocked_value, Rmark);
3615       assert (mark_addr.disp() == 0, "cas must take a zero displacement");
3616       casn   (mark_addr.base(), Rmark, Rscratch) ;
3617       cmp    (Rmark, Rscratch);
3618       brx    (Assembler::equal, false, Assembler::pt, done);
3619       delayed()->sub(Rscratch, SP, Rscratch);
3620 
3621       // Stack-lock attempt failed - check for recursive stack-lock.
3622       // See the comments below about how we might remove this case.
3623 #ifdef _LP64
3624       sub    (Rscratch, STACK_BIAS, Rscratch);
3625 #endif
3626       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3627       andcc  (Rscratch, 0xfffff003, Rscratch);
3628       br     (Assembler::always, false, Assembler::pt, done) ;
3629       delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3630 
3631       bind   (IsInflated) ;
3632       if (EmitSync & 64) {
3633          // If m->owner != null goto IsLocked
3634          // Pessimistic form: Test-and-CAS vs CAS
3635          // The optimistic form avoids RTS->RTO cache line upgrades.
3636          ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3637          andcc  (Rscratch, Rscratch, G0) ;
3638          brx    (Assembler::notZero, false, Assembler::pn, done) ;
3639          delayed()->nop() ;
3640          // m->owner == null : it's unlocked.
3641       }
3642 
3643       // Try to CAS m->owner from null to Self
3644       // Invariant: if we acquire the lock then _recursions should be 0.
3645       add    (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
3646       mov    (G2_thread, Rscratch) ;
3647       casn   (Rmark, G0, Rscratch) ;
3648       cmp    (Rscratch, G0) ;
3649       // Intentional fall-through into done
3650    } else {
3651       // Aggressively avoid the Store-before-CAS penalty
3652       // Defer the store into box->dhw until after the CAS
3653       Label IsInflated, Recursive ;
3654 
3655 // Anticipate CAS -- Avoid RTS->RTO upgrade
3656 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
3657 
3658       ld_ptr (mark_addr, Rmark);           // fetch obj->mark
3659       // Triage: biased, stack-locked, neutral, inflated
3660 
3661       if (try_bias) {
3662         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3663         // Invariant: if control reaches this point in the emitted stream
3664         // then Rmark has not been modified.
3665       }
3666       andcc  (Rmark, 2, G0) ;
3667       brx    (Assembler::notZero, false, Assembler::pn, IsInflated) ;
3668       delayed()->                         // Beware - dangling delay-slot
3669 
3670       // Try stack-lock acquisition.
3671       // Transiently install BUSY (0) encoding in the mark word.
3672       // if the CAS of 0 into the mark was successful then we execute:
3673       //   ST box->dhw  = mark   -- save fetched mark in on-stack basiclock box
3674       //   ST obj->mark = box    -- overwrite transient 0 value
3675       // This presumes TSO, of course.
3676 
3677       mov    (0, Rscratch) ;
3678       or3    (Rmark, markOopDesc::unlocked_value, Rmark);
3679       assert (mark_addr.disp() == 0, "cas must take a zero displacement");
3680       casn   (mark_addr.base(), Rmark, Rscratch) ;
3681 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
3682       cmp    (Rscratch, Rmark) ;
3683       brx    (Assembler::notZero, false, Assembler::pn, Recursive) ;
3684       delayed() ->
3685         st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3686       if (counters != NULL) {
3687         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3688       }
3689       br     (Assembler::always, false, Assembler::pt, done);
3690       delayed() ->
3691         st_ptr (Rbox, mark_addr) ;
3692 
3693       bind   (Recursive) ;
3694       // Stack-lock attempt failed - check for recursive stack-lock.
3695       // Tests show that we can remove the recursive case with no impact
3696       // on refworkload 0.83.  If we need to reduce the size of the code
3697       // emitted by compiler_lock_object() the recursive case is perfect
3698       // candidate.
3699       //
3700       // A more extreme idea is to always inflate on stack-lock recursion.
3701       // This lets us eliminate the recursive checks in compiler_lock_object
3702       // and compiler_unlock_object and the (box->dhw == 0) encoding.
3703       // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
3704       // and showed a performance *increase*.  In the same experiment I eliminated
3705       // the fast-path stack-lock code from the interpreter and always passed
3706       // control to the "slow" operators in synchronizer.cpp.
3707 
3708       // RScratch contains the fetched obj->mark value from the failed CASN.
3709 #ifdef _LP64
3710       sub    (Rscratch, STACK_BIAS, Rscratch);
3711 #endif
3712       sub(Rscratch, SP, Rscratch);
3713       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3714       andcc  (Rscratch, 0xfffff003, Rscratch);
3715       if (counters != NULL) {
3716         // Accounting needs the Rscratch register
3717         st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3718         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3719         br     (Assembler::always, false, Assembler::pt, done) ;
3720         delayed()->nop() ;
3721       } else {
3722         br     (Assembler::always, false, Assembler::pt, done) ;
3723         delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3724       }
3725 
3726       bind   (IsInflated) ;
3727       if (EmitSync & 64) {
3728          // If m->owner != null goto IsLocked
3729          // Test-and-CAS vs CAS
3730          // Pessimistic form avoids futile (doomed) CAS attempts
3731          // The optimistic form avoids RTS->RTO cache line upgrades.
3732          ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3733          andcc  (Rscratch, Rscratch, G0) ;
3734          brx    (Assembler::notZero, false, Assembler::pn, done) ;
3735          delayed()->nop() ;
3736          // m->owner == null : it's unlocked.
3737       }
3738 
3739       // Try to CAS m->owner from null to Self
3740       // Invariant: if we acquire the lock then _recursions should be 0.
3741       add    (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
3742       mov    (G2_thread, Rscratch) ;
3743       casn   (Rmark, G0, Rscratch) ;
3744       cmp    (Rscratch, G0) ;
3745       // ST box->displaced_header = NonZero.
3746       // Any non-zero value suffices:
3747       //    unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
3748       st_ptr (Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
3749       // Intentional fall-through into done
3750    }
3751 
3752    bind   (done) ;
3753 }
3754 
3755 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
3756                                             Register Rbox, Register Rscratch,
3757                                             bool try_bias) {
3758    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3759 
3760    Label done ;
3761 
3762    if (EmitSync & 4) {
3763      cmp  (SP, G0) ;
3764      return ;
3765    }
3766 
3767    if (EmitSync & 8) {
3768      if (try_bias) {
3769         biased_locking_exit(mark_addr, Rscratch, done);
3770      }
3771 
3772      // Test first if it is a fast recursive unlock
3773      ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
3774      br_null(Rmark, false, Assembler::pt, done);
3775 
3776      // Check if it is still a light weight lock, this is is true if we see
3777      // the stack address of the basicLock in the markOop of the object
3778      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3779      casx_under_lock(mark_addr.base(), Rbox, Rmark,
3780        (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3781      ba(done, false);
3782      delayed()->cmp(Rbox, Rmark);
3783      bind (done) ;
3784      return ;
3785    }
3786 
3787    // Beware ... If the aggregate size of the code emitted by CLO and CUO is
3788    // is too large performance rolls abruptly off a cliff.
3789    // This could be related to inlining policies, code cache management, or
3790    // I$ effects.
3791    Label LStacked ;
3792 
3793    if (try_bias) {
3794       // TODO: eliminate redundant LDs of obj->mark
3795       biased_locking_exit(mark_addr, Rscratch, done);
3796    }
3797 
3798    ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ;
3799    ld_ptr (Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
3800    andcc  (Rscratch, Rscratch, G0);
3801    brx    (Assembler::zero, false, Assembler::pn, done);
3802    delayed()-> nop() ;      // consider: relocate fetch of mark, above, into this DS
3803    andcc  (Rmark, 2, G0) ;
3804    brx    (Assembler::zero, false, Assembler::pt, LStacked) ;
3805    delayed()-> nop() ;
3806 
3807    // It's inflated
3808    // Conceptually we need a #loadstore|#storestore "release" MEMBAR before
3809    // the ST of 0 into _owner which releases the lock.  This prevents loads
3810    // and stores within the critical section from reordering (floating)
3811    // past the store that releases the lock.  But TSO is a strong memory model
3812    // and that particular flavor of barrier is a noop, so we can safely elide it.
3813    // Note that we use 1-0 locking by default for the inflated case.  We
3814    // close the resultant (and rare) race by having contented threads in
3815    // monitorenter periodically poll _owner.
3816    ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3817    ld_ptr (Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
3818    xor3   (Rscratch, G2_thread, Rscratch) ;
3819    orcc   (Rbox, Rscratch, Rbox) ;
3820    brx    (Assembler::notZero, false, Assembler::pn, done) ;
3821    delayed()->
3822    ld_ptr (Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
3823    ld_ptr (Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
3824    orcc   (Rbox, Rscratch, G0) ;
3825    if (EmitSync & 65536) {
3826       Label LSucc ;
3827       brx    (Assembler::notZero, false, Assembler::pn, LSucc) ;
3828       delayed()->nop() ;
3829       ba     (done, false) ;
3830       delayed()->
3831       st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3832 
3833       bind   (LSucc) ;
3834       st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3835       if (os::is_MP()) { membar (StoreLoad) ; }
3836       ld_ptr (Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
3837       andcc  (Rscratch, Rscratch, G0) ;
3838       brx    (Assembler::notZero, false, Assembler::pt, done) ;
3839       delayed()-> andcc (G0, G0, G0) ;
3840       add    (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
3841       mov    (G2_thread, Rscratch) ;
3842       casn   (Rmark, G0, Rscratch) ;
3843       // invert icc.zf and goto done
3844       br_notnull(Rscratch, false, Assembler::pt, done, false) ;
3845       delayed() -> cmp (G0, G0) ;
3846       ba     (done, false);
3847       delayed() -> cmp (G0, 1) ;
3848    } else {
3849       brx    (Assembler::notZero, false, Assembler::pn, done) ;
3850       delayed()->nop() ;
3851       ba     (done, false) ;
3852       delayed()->
3853       st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3854    }
3855 
3856    bind   (LStacked) ;
3857    // Consider: we could replace the expensive CAS in the exit
3858    // path with a simple ST of the displaced mark value fetched from
3859    // the on-stack basiclock box.  That admits a race where a thread T2
3860    // in the slow lock path -- inflating with monitor M -- could race a
3861    // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
3862    // More precisely T1 in the stack-lock unlock path could "stomp" the
3863    // inflated mark value M installed by T2, resulting in an orphan
3864    // object monitor M and T2 becoming stranded.  We can remedy that situation
3865    // by having T2 periodically poll the object's mark word using timed wait
3866    // operations.  If T2 discovers that a stomp has occurred it vacates
3867    // the monitor M and wakes any other threads stranded on the now-orphan M.
3868    // In addition the monitor scavenger, which performs deflation,
3869    // would also need to check for orpan monitors and stranded threads.
3870    //
3871    // Finally, inflation is also used when T2 needs to assign a hashCode
3872    // to O and O is stack-locked by T1.  The "stomp" race could cause
3873    // an assigned hashCode value to be lost.  We can avoid that condition
3874    // and provide the necessary hashCode stability invariants by ensuring
3875    // that hashCode generation is idempotent between copying GCs.
3876    // For example we could compute the hashCode of an object O as
3877    // O's heap address XOR some high quality RNG value that is refreshed
3878    // at GC-time.  The monitor scavenger would install the hashCode
3879    // found in any orphan monitors.  Again, the mechanism admits a
3880    // lost-update "stomp" WAW race but detects and recovers as needed.
3881    //
3882    // A prototype implementation showed excellent results, although
3883    // the scavenger and timeout code was rather involved.
3884 
3885    casn   (mark_addr.base(), Rbox, Rscratch) ;
3886    cmp    (Rbox, Rscratch);
3887    // Intentional fall through into done ...
3888 
3889    bind   (done) ;
3890 }
3891 
3892 
3893 
3894 void MacroAssembler::print_CPU_state() {
3895   // %%%%% need to implement this
3896 }
3897 
3898 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3899   // %%%%% need to implement this
3900 }
3901 
3902 void MacroAssembler::push_IU_state() {
3903   // %%%%% need to implement this
3904 }
3905 
3906 
3907 void MacroAssembler::pop_IU_state() {
3908   // %%%%% need to implement this
3909 }


3925 
3926 
3927 void MacroAssembler::pop_CPU_state() {
3928   // %%%%% need to implement this
3929 }
3930 
3931 
3932 
3933 void MacroAssembler::verify_tlab() {
3934 #ifdef ASSERT
3935   if (UseTLAB && VerifyOops) {
3936     Label next, next2, ok;
3937     Register t1 = L0;
3938     Register t2 = L1;
3939     Register t3 = L2;
3940 
3941     save_frame(0);
3942     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3943     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
3944     or3(t1, t2, t3);
3945     cmp_and_br(t1, t2, Assembler::greaterEqual, false, Assembler::pn, next);
3946     stop("assert(top >= start)");
3947     should_not_reach_here();
3948 
3949     bind(next);
3950     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3951     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
3952     or3(t3, t2, t3);
3953     cmp_and_br(t1, t2, Assembler::lessEqual, false, Assembler::pn, next2);
3954     stop("assert(top <= end)");
3955     should_not_reach_here();
3956 
3957     bind(next2);
3958     and3(t3, MinObjAlignmentInBytesMask, t3);
3959     cmp_and_br(t3, 0, Assembler::lessEqual, false, Assembler::pn, ok);
3960     stop("assert(aligned)");
3961     should_not_reach_here();
3962 
3963     bind(ok);
3964     restore();
3965   }
3966 #endif
3967 }
3968 
3969 
3970 void MacroAssembler::eden_allocate(
3971   Register obj,                        // result: pointer to object after successful allocation
3972   Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
3973   int      con_size_in_bytes,          // object size in bytes if   known at compile time
3974   Register t1,                         // temp register
3975   Register t2,                         // temp register
3976   Label&   slow_case                   // continuation point if fast allocation fails
3977 ){
3978   // make sure arguments make sense
3979   assert_different_registers(obj, var_size_in_bytes, t1, t2);
3980   assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
3981   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
3982 
3983   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
3984     // No allocation in the shared eden.
3985     ba(slow_case);
3986   } else {
3987     // get eden boundaries
3988     // note: we need both top & top_addr!
3989     const Register top_addr = t1;
3990     const Register end      = t2;
3991 
3992     CollectedHeap* ch = Universe::heap();
3993     set((intx)ch->top_addr(), top_addr);
3994     intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
3995     ld_ptr(top_addr, delta, end);
3996     ld_ptr(top_addr, 0, obj);
3997 
3998     // try to allocate
3999     Label retry;
4000     bind(retry);
4001 #ifdef ASSERT
4002     // make sure eden top is properly aligned
4003     {
4004       Label L;
4005       btst(MinObjAlignmentInBytesMask, obj);


4099     bind(L);
4100   }
4101 #endif // ASSERT
4102 
4103   // update the tlab top pointer
4104   st_ptr(free, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
4105   verify_tlab();
4106 }
4107 
4108 
4109 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
4110   Register top = O0;
4111   Register t1 = G1;
4112   Register t2 = G3;
4113   Register t3 = O1;
4114   assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
4115   Label do_refill, discard_tlab;
4116 
4117   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
4118     // No allocation in the shared eden.
4119     ba(slow_case);
4120   }
4121 
4122   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
4123   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
4124   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
4125 
4126   // calculate amount of free space
4127   sub(t1, top, t1);
4128   srl_ptr(t1, LogHeapWordSize, t1);
4129 
4130   // Retain tlab and allocate object in shared space if
4131   // the amount free in the tlab is too large to discard.
4132   cmp(t1, t2);
4133   brx(Assembler::lessEqual, false, Assembler::pt, discard_tlab);
4134 
4135   // increment waste limit to prevent getting stuck on this slow path
4136   delayed()->add(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment(), t2);
4137   st_ptr(t2, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
4138   if (TLABStats) {
4139     // increment number of slow_allocations
4140     ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
4141     add(t2, 1, t2);
4142     stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
4143   }
4144   ba(try_eden);
4145 
4146   bind(discard_tlab);
4147   if (TLABStats) {
4148     // increment number of refills
4149     ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
4150     add(t2, 1, t2);
4151     stw(t2, G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()));
4152     // accumulate wastage
4153     ld(G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()), t2);
4154     add(t2, t1, t2);
4155     stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
4156   }
4157 
4158   // if tlab is currently allocated (top or end != null) then
4159   // fill [top, end + alignment_reserve) with array object
4160   br_null(top, false, Assembler::pn, do_refill);
4161 
4162   set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
4163   st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
4164   // set klass to intArrayKlass
4165   sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
4166   add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
4167   sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
4168   st(t1, top, arrayOopDesc::length_offset_in_bytes());
4169   set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
4170   ld_ptr(t2, 0, t2);
4171   // store klass last.  concurrent gcs assumes klass length is valid if
4172   // klass field is not null.
4173   store_klass(t2, top);
4174   verify_oop(top);
4175 
4176   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t1);
4177   sub(top, t1, t1); // size of tlab's allocated portion
4178   incr_allocated_bytes(t1, t2, t3);
4179 
4180   // refill the tlab with an eden allocation
4181   bind(do_refill);
4182   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
4183   sll_ptr(t1, LogHeapWordSize, t1);
4184   // allocate new tlab, address returned in top
4185   eden_allocate(top, t1, 0, t2, t3, slow_case);
4186 
4187   st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset()));
4188   st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
4189 #ifdef ASSERT
4190   // check that tlab_size (t1) is still valid
4191   {
4192     Label ok;
4193     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
4194     sll_ptr(t2, LogHeapWordSize, t2);
4195     cmp_and_br(t1, t2, Assembler::equal, false, Assembler::pn, ok);
4196     stop("assert(t1 == tlab_size)");
4197     should_not_reach_here();
4198 
4199     bind(ok);
4200   }
4201 #endif // ASSERT
4202   add(top, t1, top); // t1 is tlab_size
4203   sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
4204   st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
4205   verify_tlab();
4206   ba(retry);
4207 }
4208 
4209 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
4210                                           Register t1, Register t2) {
4211   // Bump total bytes allocated by this thread
4212   assert(t1->is_global(), "must be global reg"); // so all 64 bits are saved on a context switch
4213   assert_different_registers(size_in_bytes.register_or_noreg(), t1, t2);
4214   // v8 support has gone the way of the dodo
4215   ldx(G2_thread, in_bytes(JavaThread::allocated_bytes_offset()), t1);
4216   add(t1, ensure_simm13_or_reg(size_in_bytes, t2), t1);
4217   stx(t1, G2_thread, in_bytes(JavaThread::allocated_bytes_offset()));
4218 }
4219 
4220 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
4221   switch (cond) {
4222     // Note some conditions are synonyms for others
4223     case Assembler::never:                return Assembler::always;
4224     case Assembler::zero:                 return Assembler::notZero;
4225     case Assembler::lessEqual:            return Assembler::greater;
4226     case Assembler::less:                 return Assembler::greaterEqual;


4337   Label refill, restart;
4338   if (with_frame) {
4339     __ save_frame(0);
4340     pre_val = I0;  // Was O0 before the save.
4341   } else {
4342     pre_val = O0;
4343   }
4344   int satb_q_index_byte_offset =
4345     in_bytes(JavaThread::satb_mark_queue_offset() +
4346              PtrQueue::byte_offset_of_index());
4347   int satb_q_buf_byte_offset =
4348     in_bytes(JavaThread::satb_mark_queue_offset() +
4349              PtrQueue::byte_offset_of_buf());
4350   assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
4351          in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
4352          "check sizes in assembly below");
4353 
4354   __ bind(restart);
4355   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
4356 
4357   __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill, false);
4358   // If the branch is taken, no harm in executing this in the delay slot.
4359   __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
4360   __ sub(L0, oopSize, L0);
4361 
4362   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
4363   if (!with_frame) {
4364     // Use return-from-leaf
4365     __ retl();
4366     __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4367   } else {
4368     // Not delayed.
4369     __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4370   }
4371   if (with_frame) {
4372     __ ret();
4373     __ delayed()->restore();
4374   }
4375   __ bind(refill);
4376 
4377   address handle_zero =


4452     assert(pre_val == noreg, "check this code");
4453   }
4454 
4455   // Is marking active?
4456   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
4457     ld(G2,
4458        in_bytes(JavaThread::satb_mark_queue_offset() +
4459                 PtrQueue::byte_offset_of_active()),
4460        tmp);
4461   } else {
4462     guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
4463               "Assumption");
4464     ldsb(G2,
4465          in_bytes(JavaThread::satb_mark_queue_offset() +
4466                   PtrQueue::byte_offset_of_active()),
4467          tmp);
4468   }
4469 
4470   // Check on whether to annul.
4471   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);

4472 
4473   // Do we need to load the previous value?
4474   if (obj != noreg) {
4475     // Load the previous value...
4476     if (index == noreg) {
4477       if (Assembler::is_simm13(offset)) {
4478         load_heap_oop(obj, offset, tmp);
4479       } else {
4480         set(offset, tmp);
4481         load_heap_oop(obj, tmp, tmp);
4482       }
4483     } else {
4484       load_heap_oop(obj, index, tmp);
4485     }
4486     // Previous value has been loaded into tmp
4487     pre_val = tmp;
4488   }
4489 
4490   assert(pre_val != noreg, "must have a real register");
4491 
4492   // Is the previous value null?
4493   // Check on whether to annul.
4494   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);

4495 
4496   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
4497   // case, pre_val will be a scratch G-reg, but there are some cases in
4498   // which it's an O-reg.  In the first case, do a normal call.  In the
4499   // latter, do a save here and call the frameless version.
4500 
4501   guarantee(pre_val->is_global() || pre_val->is_out(),
4502             "Or we need to think harder.");
4503 
4504   if (pre_val->is_global() && !preserve_o_regs) {
4505     generate_satb_log_enqueue_if_necessary(true); // with frame
4506 
4507     call(satb_log_enqueue_with_frame);
4508     delayed()->mov(pre_val, O0);
4509   } else {
4510     generate_satb_log_enqueue_if_necessary(false); // frameless
4511 
4512     save_frame(0);
4513     call(satb_log_enqueue_frameless);
4514     delayed()->mov(pre_val->after_save(), O0);




  89 const char* Argument::name() const {
  90   int nofArgs = sizeof argumentNames / sizeof argumentNames[0];
  91   int num = number();
  92   if (num >= nofArgs)  num = nofArgs - 1;
  93   return argumentNames[num][is_in() ? 1 : 0];
  94 }
  95 
  96 void Assembler::print_instruction(int inst) {
  97   const char* s;
  98   switch (inv_op(inst)) {
  99   default:         s = "????"; break;
 100   case call_op:    s = "call"; break;
 101   case branch_op:
 102     switch (inv_op2(inst)) {
 103       case fb_op2:     s = "fb";   break;
 104       case fbp_op2:    s = "fbp";  break;
 105       case br_op2:     s = "br";   break;
 106       case bp_op2:     s = "bp";   break;
 107       case cb_op2:     s = "cb";   break;
 108       case bpr_op2: {
 109         if (is_cbcond(inst)) {
 110           s = is_cxb(inst) ? "cxb" : "cwb";
 111         } else {
 112           s = "bpr";
 113         }
 114         break;
 115       }
 116       default:         s = "????"; break;
 117     }
 118   }
 119   ::tty->print("%s", s);
 120 }
 121 
 122 
 123 // Patch instruction inst at offset inst_pos to refer to dest_pos
 124 // and return the resulting instruction.
 125 // We should have pcs, not offsets, but since all is relative, it will work out
 126 // OK.
 127 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
 128 
 129   int m; // mask for displacement field
 130   int v; // new value for displacement field
 131   const int word_aligned_ones = -4;
 132   switch (inv_op(inst)) {
 133   default: ShouldNotReachHere();
 134   case call_op:    m = wdisp(word_aligned_ones, 0, 30);  v = wdisp(dest_pos, inst_pos, 30); break;
 135   case branch_op:
 136     switch (inv_op2(inst)) {
 137       case fbp_op2:    m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
 138       case bp_op2:     m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
 139       case fb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
 140       case br_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
 141       case cb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
 142       case bpr_op2: {
 143         if (is_cbcond(inst)) {
 144           m = wdisp10(word_aligned_ones, 0);
 145           v = wdisp10(dest_pos, inst_pos);
 146         } else {
 147           m = wdisp16(word_aligned_ones, 0);
 148           v = wdisp16(dest_pos, inst_pos);
 149         }
 150         break;
 151       }
 152       default: ShouldNotReachHere();
 153     }
 154   }
 155   return  inst & ~m  |  v;
 156 }
 157 
 158 // Return the offset of the branch destionation of instruction inst
 159 // at offset pos.
 160 // Should have pcs, but since all is relative, it works out.
 161 int Assembler::branch_destination(int inst, int pos) {
 162   int r;
 163   switch (inv_op(inst)) {
 164   default: ShouldNotReachHere();
 165   case call_op:        r = inv_wdisp(inst, pos, 30);  break;
 166   case branch_op:
 167     switch (inv_op2(inst)) {
 168       case fbp_op2:    r = inv_wdisp(  inst, pos, 19);  break;
 169       case bp_op2:     r = inv_wdisp(  inst, pos, 19);  break;
 170       case fb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
 171       case br_op2:     r = inv_wdisp(  inst, pos, 22);  break;
 172       case cb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
 173       case bpr_op2: {
 174         if (is_cbcond(inst)) {
 175           r = inv_wdisp10(inst, pos);
 176         } else {
 177           r = inv_wdisp16(inst, pos);
 178         }
 179         break;
 180       }
 181       default: ShouldNotReachHere();
 182     }
 183   }
 184   return r;
 185 }
 186 
 187 int AbstractAssembler::code_fill_byte() {
 188   return 0x00;                  // illegal instruction 0x00000000
 189 }
 190 
 191 Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
 192   switch (in) {
 193   case rc_z:   return equal;
 194   case rc_lez: return lessEqual;


 974   }
 975 }
 976 
 977 
 978 // %%% maybe get rid of [re]set_last_Java_frame
 979 void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_Java_pc) {
 980   assert_not_delayed();
 981   Address flags(G2_thread, JavaThread::frame_anchor_offset() +
 982                            JavaFrameAnchor::flags_offset());
 983   Address pc_addr(G2_thread, JavaThread::last_Java_pc_offset());
 984 
 985   // Always set last_Java_pc and flags first because once last_Java_sp is visible
 986   // has_last_Java_frame is true and users will look at the rest of the fields.
 987   // (Note: flags should always be zero before we get here so doesn't need to be set.)
 988 
 989 #ifdef ASSERT
 990   // Verify that flags was zeroed on return to Java
 991   Label PcOk;
 992   save_frame(0);                // to avoid clobbering O0
 993   ld_ptr(pc_addr, L0);
 994   br_null_short(L0, Assembler::pt, PcOk);
 995   stop("last_Java_pc not zeroed before leaving Java");
 996   bind(PcOk);
 997 
 998   // Verify that flags was zeroed on return to Java
 999   Label FlagsOk;
1000   ld(flags, L0);
1001   tst(L0);
1002   br(Assembler::zero, false, Assembler::pt, FlagsOk);
1003   delayed() -> restore();
1004   stop("flags not zeroed before leaving Java");
1005   bind(FlagsOk);
1006 #endif /* ASSERT */
1007   //
1008   // When returning from calling out from Java mode the frame anchor's last_Java_pc
1009   // will always be set to NULL. It is set here so that if we are doing a call to
1010   // native (not VM) that we capture the known pc and don't have to rely on the
1011   // native call having a standard frame linkage where we can find the pc.
1012 
1013   if (last_Java_pc->is_valid()) {
1014     st_ptr(last_Java_pc, pc_addr);


1099   set(badHeapWordVal, G3);
1100   set(badHeapWordVal, G4);
1101   set(badHeapWordVal, G5);
1102 #endif
1103 
1104   // get oop result if there is one and reset the value in the thread
1105   if (oop_result->is_valid()) {
1106     get_vm_result(oop_result);
1107   }
1108 }
1109 
1110 void MacroAssembler::check_and_forward_exception(Register scratch_reg)
1111 {
1112   Label L;
1113 
1114   check_and_handle_popframe(scratch_reg);
1115   check_and_handle_earlyret(scratch_reg);
1116 
1117   Address exception_addr(G2_thread, Thread::pending_exception_offset());
1118   ld_ptr(exception_addr, scratch_reg);
1119   br_null_short(scratch_reg, pt, L);
1120   // we use O7 linkage so that forward_exception_entry has the issuing PC
1121   call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1122   delayed()->nop();
1123   bind(L);
1124 }
1125 
1126 
1127 void MacroAssembler::check_and_handle_popframe(Register scratch_reg) {
1128 }
1129 
1130 
1131 void MacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
1132 }
1133 
1134 
1135 void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1136   call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
1137 }
1138 
1139 


1873     Register O2_adr   = O2;
1874     Register O3_accum = O3;
1875     inc_counter(StubRoutines::verify_oop_count_addr(), O2_adr, O3_accum);
1876   }
1877 
1878   Register O2_mask = O2;
1879   Register O3_bits = O3;
1880   Register O4_temp = O4;
1881 
1882   // mark lower end of faulting range
1883   assert(_verify_oop_implicit_branch[0] == NULL, "set once");
1884   _verify_oop_implicit_branch[0] = pc();
1885 
1886   // We can't check the mark oop because it could be in the process of
1887   // locking or unlocking while this is running.
1888   set(Universe::verify_oop_mask (), O2_mask);
1889   set(Universe::verify_oop_bits (), O3_bits);
1890 
1891   // assert((obj & oop_mask) == oop_bits);
1892   and3(O0_obj, O2_mask, O4_temp);
1893   cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail);
1894 
1895   if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
1896     // the null_or_fail case is useless; must test for null separately
1897     br_null_short(O0_obj, pn, succeed);
1898   }
1899 
1900   // Check the klassOop of this object for being in the right area of memory.
1901   // Cannot do the load in the delay above slot in case O0 is null
1902   load_klass(O0_obj, O0_obj);
1903   // assert((klass & klass_mask) == klass_bits);
1904   if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
1905     set(Universe::verify_klass_mask(), O2_mask);
1906   if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
1907     set(Universe::verify_klass_bits(), O3_bits);
1908   and3(O0_obj, O2_mask, O4_temp);
1909   cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, fail);
1910   // Check the klass's klass
1911   load_klass(O0_obj, O0_obj);
1912   and3(O0_obj, O2_mask, O4_temp);
1913   cmp(O4_temp, O3_bits);
1914   brx(notEqual, false, pn, fail);
1915   delayed()->wrccr( O5_save_flags ); // Restore CCR's
1916 
1917   // mark upper end of faulting range
1918   _verify_oop_implicit_branch[1] = pc();
1919 
1920   //-----------------------
1921   // all tests pass
1922   bind(succeed);
1923 
1924   // Restore prior 64-bit registers
1925   ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+0*8,O0);
1926   ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+1*8,O1);
1927   ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+2*8,O2);
1928   ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+3*8,O3);
1929   ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+4*8,O4);


2117 }
2118 
2119 
2120 // ---------------------------------------------------------
2121 Assembler::RCondition cond2rcond(Assembler::Condition c) {
2122   switch (c) {
2123     /*case zero: */
2124     case Assembler::equal:        return Assembler::rc_z;
2125     case Assembler::lessEqual:    return Assembler::rc_lez;
2126     case Assembler::less:         return Assembler::rc_lz;
2127     /*case notZero:*/
2128     case Assembler::notEqual:     return Assembler::rc_nz;
2129     case Assembler::greater:      return Assembler::rc_gz;
2130     case Assembler::greaterEqual: return Assembler::rc_gez;
2131   }
2132   ShouldNotReachHere();
2133   return Assembler::rc_z;
2134 }
2135 
2136 // compares (32 bit) register with zero and branches.  NOT FOR USE WITH 64-bit POINTERS
2137 void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {




2138   tst(s1);
2139   br (c, a, p, L);


2140 }
2141 
2142 // Compares a pointer register with zero and branches on null.
2143 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
2144 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L, bool emit_delayed_nop ) {
2145   assert_not_delayed();
2146   if (emit_delayed_nop && use_cbc(L)) {
2147     Assembler::cbc(zero, ptr_cc, s1, 0, L);
2148     return;
2149   }
2150 #ifdef _LP64
2151   bpr( rc_z, a, p, s1, L );
2152 #else
2153   tst(s1);
2154   br ( zero, a, p, L );
2155 #endif
2156   // Some callers can fill the delay slot.
2157   if (emit_delayed_nop) {
2158     delayed()->nop();
2159   }


2205   }
2206   // Some callers can fill the delay slot.
2207   if (emit_delayed_nop) {
2208     delayed()->nop();
2209   }
2210 }
2211 
2212 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
2213 void MacroAssembler::cmp_and_br(Register s1, Register s2, Condition c,
2214                                 bool a, Predict p, Label& L) {
2215   assert_not_delayed();
2216   if (use_cbc(L)) {
2217     Assembler::cbc(c, icc, s1, s2, L);
2218   } else {
2219     cmp(s1, s2);
2220     br(c, a, p, L);
2221     delayed()->nop();
2222   }
2223 }
2224 
2225 // Compare integer (32 bit) values (icc only).
2226 void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
2227                                       Predict p, Label& L) {
2228   assert_not_delayed();
2229   if (use_cbcond(L)) {
2230     Assembler::cbcond(c, icc, s1, s2, L);
2231   } else {
2232     cmp(s1, s2);
2233     br(c, false, p, L);
2234     delayed()->nop();
2235   }
2236 }
2237 
2238 // Compare integer (32 bit) values (icc only).
2239 void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
2240                                       Predict p, Label& L) {
2241   assert_not_delayed();
2242   if (is_simm(simm13a,5) && use_cbcond(L)) {
2243     Assembler::cbcond(c, icc, s1, simm13a, L);
2244   } else {
2245     cmp(s1, simm13a);
2246     br(c, false, p, L);
2247     delayed()->nop();
2248   }
2249 }
2250 
2251 // Branch that tests xcc in LP64 and icc in !LP64
2252 void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c,
2253                                        Predict p, Label& L) {
2254   assert_not_delayed();
2255   if (use_cbcond(L)) {
2256     Assembler::cbcond(c, ptr_cc, s1, s2, L);
2257   } else {
2258     cmp(s1, s2);
2259     brx(c, false, p, L);
2260     delayed()->nop();
2261   }
2262 }
2263 
2264 // Branch that tests xcc in LP64 and icc in !LP64
2265 void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c,
2266                                        Predict p, Label& L) {
2267   assert_not_delayed();
2268   if (is_simm(simm13a,5) && use_cbcond(L)) {
2269     Assembler::cbcond(c, ptr_cc, s1, simm13a, L);
2270   } else {
2271     cmp(s1, simm13a);
2272     brx(c, false, p, L);
2273     delayed()->nop();
2274   }
2275 }
2276 
2277 // Short branch version for compares a pointer with zero.
2278 
2279 void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
2280   assert_not_delayed();
2281   if (use_cbcond(L)) {
2282     Assembler::cbcond(zero, ptr_cc, s1, 0, L);
2283     return;
2284   }
2285   br_null(s1, false, p, L);
2286   delayed()->nop();
2287 }
2288 
2289 void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
2290   assert_not_delayed();
2291   if (use_cbcond(L)) {
2292     Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
2293     return;
2294   }
2295   br_notnull(s1, false, p, L);
2296   delayed()->nop();
2297 }
2298 
2299 // Unconditional short branch
2300 void MacroAssembler::ba_short(Label& L) {
2301   if (use_cbcond(L)) {
2302     Assembler::cbcond(equal, icc, G0, G0, L);
2303     return;
2304   }
2305   br(always, false, pt, L);
2306   delayed()->nop();
2307 }
2308 
2309 // instruction sequences factored across compiler & interpreter
2310 
2311 
2312 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
2313                            Register Rb_hi, Register Rb_low,
2314                            Register Rresult) {
2315 
2316   Label check_low_parts, done;
2317 
2318   cmp(Ra_hi, Rb_hi );  // compare hi parts
2319   br(equal, true, pt, check_low_parts);
2320   delayed()->cmp(Ra_low, Rb_low); // test low parts
2321 
2322   // And, with an unsigned comparison, it does not matter if the numbers
2323   // are negative or not.
2324   // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff.
2325   // The second one is bigger (unsignedly).
2326 
2327   // Other notes:  The first move in each triplet can be unconditional
2328   // (and therefore probably prefetchable).
2329   // And the equals case for the high part does not need testing,
2330   // since that triplet is reached only after finding the high halves differ.
2331 
2332   if (VM_Version::v9_instructions_work()) {
2333     mov(-1, Rresult);
2334     ba(done);  delayed()-> movcc(greater, false, icc,  1, Rresult);
2335   } else {
2336     br(less,    true, pt, done); delayed()-> set(-1, Rresult);
2337     br(greater, true, pt, done); delayed()-> set( 1, Rresult);
2338   }
2339 
2340   bind( check_low_parts );
2341 
2342   if (VM_Version::v9_instructions_work()) {
2343     mov(                               -1, Rresult);
2344     movcc(equal,           false, icc,  0, Rresult);
2345     movcc(greaterUnsigned, false, icc,  1, Rresult);
2346   } else {
2347     set(-1, Rresult);
2348     br(equal,           true, pt, done); delayed()->set( 0, Rresult);
2349     br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult);
2350   }
2351   bind( done );
2352 }
2353 
2354 void MacroAssembler::lneg( Register Rhi, Register Rlow ) {


2389 
2390   // We get the transfer bits by shifting right by 32-count the low
2391   // register. This is done by shifting right by 31-count and then by one
2392   // more to take care of the special (rare) case where count is zero
2393   // (shifting by 32 would not work).
2394 
2395   neg(Ralt_count);
2396 
2397   // The order of the next two instructions is critical in the case where
2398   // Rin and Rout are the same and should not be reversed.
2399 
2400   srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count
2401   if (Rcount != Rout_low) {
2402     sll(Rin_low, Rcount, Rout_low); // low half
2403   }
2404   sll(Rin_high, Rcount, Rout_high);
2405   if (Rcount == Rout_low) {
2406     sll(Rin_low, Rcount, Rout_low); // low half
2407   }
2408   srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
2409   ba(done);
2410   delayed()->or3(Rout_high, Rxfer_bits, Rout_high);   // new hi value: or in shifted old hi part and xfer from low
2411 
2412   // shift >= 32 bits, Ralt_count = Rcount-32
2413   bind(big_shift);
2414   sll(Rin_low, Ralt_count, Rout_high  );
2415   clr(Rout_low);
2416 
2417   bind(done);
2418 }
2419 
2420 
2421 void MacroAssembler::lshr( Register Rin_high,  Register Rin_low,
2422                            Register Rcount,
2423                            Register Rout_high, Register Rout_low,
2424                            Register Rtemp ) {
2425 
2426   Register Ralt_count = Rtemp;
2427   Register Rxfer_bits = Rtemp;
2428 
2429   assert( Ralt_count != Rin_high


2450 
2451   // We get the transfer bits by shifting left by 32-count the high
2452   // register. This is done by shifting left by 31-count and then by one
2453   // more to take care of the special (rare) case where count is zero
2454   // (shifting by 32 would not work).
2455 
2456   neg(Ralt_count);
2457   if (Rcount != Rout_low) {
2458     srl(Rin_low, Rcount, Rout_low);
2459   }
2460 
2461   // The order of the next two instructions is critical in the case where
2462   // Rin and Rout are the same and should not be reversed.
2463 
2464   sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
2465   sra(Rin_high,     Rcount, Rout_high ); // high half
2466   sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
2467   if (Rcount == Rout_low) {
2468     srl(Rin_low, Rcount, Rout_low);
2469   }
2470   ba(done);
2471   delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
2472 
2473   // shift >= 32 bits, Ralt_count = Rcount-32
2474   bind(big_shift);
2475 
2476   sra(Rin_high, Ralt_count, Rout_low);
2477   sra(Rin_high,         31, Rout_high); // sign into hi
2478 
2479   bind( done );
2480 }
2481 
2482 
2483 
2484 void MacroAssembler::lushr( Register Rin_high,  Register Rin_low,
2485                             Register Rcount,
2486                             Register Rout_high, Register Rout_low,
2487                             Register Rtemp ) {
2488 
2489   Register Ralt_count = Rtemp;
2490   Register Rxfer_bits = Rtemp;


2513 
2514   // We get the transfer bits by shifting left by 32-count the high
2515   // register. This is done by shifting left by 31-count and then by one
2516   // more to take care of the special (rare) case where count is zero
2517   // (shifting by 32 would not work).
2518 
2519   neg(Ralt_count);
2520   if (Rcount != Rout_low) {
2521     srl(Rin_low, Rcount, Rout_low);
2522   }
2523 
2524   // The order of the next two instructions is critical in the case where
2525   // Rin and Rout are the same and should not be reversed.
2526 
2527   sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
2528   srl(Rin_high,     Rcount, Rout_high ); // high half
2529   sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
2530   if (Rcount == Rout_low) {
2531     srl(Rin_low, Rcount, Rout_low);
2532   }
2533   ba(done);
2534   delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
2535 
2536   // shift >= 32 bits, Ralt_count = Rcount-32
2537   bind(big_shift);
2538 
2539   srl(Rin_high, Ralt_count, Rout_low);
2540   clr(Rout_high);
2541 
2542   bind( done );
2543 }
2544 
2545 #ifdef _LP64
2546 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
2547   cmp(Ra, Rb);
2548   mov(-1, Rresult);
2549   movcc(equal,   false, xcc,  0, Rresult);
2550   movcc(greater, false, xcc,  1, Rresult);
2551 }
2552 #endif
2553 


2777     if (top_reg_after_save == L1) {
2778       ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save);
2779     }
2780 
2781     if (ptr_reg_after_save == L2) {
2782       ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save);
2783     }
2784 
2785     Label(retry_get_lock);
2786     Label(not_same);
2787     Label(dont_yield);
2788 
2789     assert(lock_addr, "lock_address should be non null for v8");
2790     set((intptr_t)lock_addr, lock_ptr_reg);
2791     // Initialize yield counter
2792     mov(G0,yield_reg);
2793     mov(G0, yieldall_reg);
2794     set(StubRoutines::Sparc::locked, lock_reg);
2795 
2796     bind(retry_get_lock);
2797     cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield);
2798 
2799     if(use_call_vm) {
2800       Untested("Need to verify global reg consistancy");
2801       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
2802     } else {
2803       // Save the regs and make space for a C call
2804       save(SP, -96, SP);
2805       save_all_globals_into_locals();
2806       call(CAST_FROM_FN_PTR(address,os::yield_all));
2807       delayed()->mov(yieldall_reg, O0);
2808       restore_globals_from_locals();
2809       restore();
2810     }
2811 
2812     // reset the counter
2813     mov(G0,yield_reg);
2814     add(yieldall_reg, 1, yieldall_reg);
2815 
2816     bind(dont_yield);
2817     // try to get lock
2818     swap(lock_ptr_reg, 0, lock_reg);
2819 
2820     // did we get the lock?
2821     cmp(lock_reg, StubRoutines::Sparc::unlocked);
2822     br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
2823     delayed()->add(yield_reg,1,yield_reg);
2824 
2825     // yes, got lock.  do we have the same top?
2826     ld(top_ptr_reg_after_save, 0, value_reg);
2827     cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same);
2828 
2829     // yes, same top.
2830     st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
2831     membar(Assembler::StoreStore);
2832 
2833     bind(not_same);
2834     mov(value_reg, ptr_reg_after_save);
2835     st(lock_reg, lock_ptr_reg, 0); // unlock
2836 
2837     restore();
2838   }
2839 }
2840 
2841 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2842                                                       Register tmp,
2843                                                       int offset) {
2844   intptr_t value = *delayed_value_addr;
2845   if (value != 0)
2846     return RegisterOrConstant(value + offset);
2847 


3057                                          Register super_klass,
3058                                          Register temp_reg,
3059                                          Register temp2_reg,
3060                                          Label& L_success) {
3061   Label L_failure, L_pop_to_failure;
3062   check_klass_subtype_fast_path(sub_klass, super_klass,
3063                                 temp_reg, temp2_reg,
3064                                 &L_success, &L_failure, NULL);
3065   Register sub_2 = sub_klass;
3066   Register sup_2 = super_klass;
3067   if (!sub_2->is_global())  sub_2 = L0;
3068   if (!sup_2->is_global())  sup_2 = L1;
3069 
3070   save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
3071   check_klass_subtype_slow_path(sub_2, sup_2,
3072                                 L2, L3, L4, L5,
3073                                 NULL, &L_pop_to_failure);
3074 
3075   // on success:
3076   restore();
3077   ba_short(L_success);
3078 
3079   // on failure:
3080   bind(L_pop_to_failure);
3081   restore();
3082   bind(L_failure);
3083 }
3084 
3085 
3086 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3087                                                    Register super_klass,
3088                                                    Register temp_reg,
3089                                                    Register temp2_reg,
3090                                                    Label* L_success,
3091                                                    Label* L_failure,
3092                                                    Label* L_slow_path,
3093                                         RegisterOrConstant super_check_offset) {
3094   int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
3095                    Klass::secondary_super_cache_offset_in_bytes());
3096   int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
3097                     Klass::super_check_offset_offset_in_bytes());


3136     // super_check_offset is register.
3137     assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset.as_register());
3138   }
3139   ld_ptr(sub_klass, super_check_offset, temp_reg);
3140   cmp(super_klass, temp_reg);
3141 
3142   // This check has worked decisively for primary supers.
3143   // Secondary supers are sought in the super_cache ('super_cache_addr').
3144   // (Secondary supers are interfaces and very deeply nested subtypes.)
3145   // This works in the same check above because of a tricky aliasing
3146   // between the super_cache and the primary super display elements.
3147   // (The 'super_check_addr' can address either, as the case requires.)
3148   // Note that the cache is updated below if it does not help us find
3149   // what we need immediately.
3150   // So if it was a primary super, we can just fail immediately.
3151   // Otherwise, it's the slow path for us (no success at this point).
3152 
3153   // Hacked ba(), which may only be used just before L_fallthrough.
3154 #define FINAL_JUMP(label)            \
3155   if (&(label) != &L_fallthrough) {  \
3156     ba(label);  delayed()->nop();    \

3157   }
3158 
3159   if (super_check_offset.is_register()) {
3160     brx(Assembler::equal, false, Assembler::pn, *L_success);
3161     delayed()->cmp(super_check_offset.as_register(), sc_offset);
3162 
3163     if (L_failure == &L_fallthrough) {
3164       brx(Assembler::equal, false, Assembler::pt, *L_slow_path);
3165       delayed()->nop();
3166     } else {
3167       brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
3168       delayed()->nop();
3169       FINAL_JUMP(*L_slow_path);
3170     }
3171   } else if (super_check_offset.as_constant() == sc_offset) {
3172     // Need a slow path; fast failure is impossible.
3173     if (L_slow_path == &L_fallthrough) {
3174       brx(Assembler::equal, false, Assembler::pt, *L_success);
3175       delayed()->nop();
3176     } else {


3268     // Don't use load_heap_oop; we don't want to decode the element.
3269     lduw(   scan_temp, elem_offset, scratch_reg );
3270   } else {
3271     ld_ptr( scan_temp, elem_offset, scratch_reg );
3272   }
3273 
3274   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
3275   cmp(scratch_reg, search_key);
3276 
3277   // A miss means we are NOT a subtype and need to keep looping
3278   brx(Assembler::notEqual, false, Assembler::pn, L_loop);
3279   delayed()->deccc(count_temp); // decrement trip counter in delay slot
3280 
3281   // Falling out the bottom means we found a hit; we ARE a subtype
3282   if (decode_super_klass) decode_heap_oop(super_klass);
3283 
3284   // Success.  Cache the super we found and proceed in triumph.
3285   st_ptr(super_klass, sub_klass, sc_offset);
3286 
3287   if (L_success != &L_fallthrough) {
3288     ba(*L_success);
3289     delayed()->nop();
3290   }
3291 
3292   bind(L_fallthrough);
3293 }
3294 
3295 
3296 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
3297                                               Register temp_reg,
3298                                               Label& wrong_method_type) {
3299   assert_different_registers(mtype_reg, mh_reg, temp_reg);
3300   // compare method type against that of the receiver
3301   RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
3302   load_heap_oop(mh_reg, mhtype_offset, temp_reg);
3303   cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type);
3304 }
3305 
3306 
3307 // A method handle has a "vmslots" field which gives the size of its
3308 // argument list in JVM stack slots.  This field is either located directly
3309 // in every method handle, or else is indirectly accessed through the
3310 // method handle's MethodType.  This macro hides the distinction.
3311 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
3312                                                 Register temp_reg) {
3313   assert_different_registers(vmslots_reg, mh_reg, temp_reg);
3314   // load mh.type.form.vmslots
3315   if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) {
3316     // hoist vmslots into every mh to avoid dependent load chain
3317     ld(           Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg)),   vmslots_reg);
3318   } else {
3319     Register temp2_reg = vmslots_reg;
3320     load_heap_oop(Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)),      temp2_reg);
3321     load_heap_oop(Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)),        temp2_reg);
3322     ld(           Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
3323   }


3376                                           Register temp_reg,
3377                                           Label& done, Label* slow_case,
3378                                           BiasedLockingCounters* counters) {
3379   assert(UseBiasedLocking, "why call this otherwise?");
3380 
3381   if (PrintBiasedLockingStatistics) {
3382     assert_different_registers(obj_reg, mark_reg, temp_reg, O7);
3383     if (counters == NULL)
3384       counters = BiasedLocking::counters();
3385   }
3386 
3387   Label cas_label;
3388 
3389   // Biased locking
3390   // See whether the lock is currently biased toward our thread and
3391   // whether the epoch is still valid
3392   // Note that the runtime guarantees sufficient alignment of JavaThread
3393   // pointers to allow age to be placed into low bits
3394   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
3395   and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3396   cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label);
3397 
3398   load_klass(obj_reg, temp_reg);
3399   ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3400   or3(G2_thread, temp_reg, temp_reg);
3401   xor3(mark_reg, temp_reg, temp_reg);
3402   andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
3403   if (counters != NULL) {
3404     cond_inc(Assembler::equal, (address) counters->biased_lock_entry_count_addr(), mark_reg, temp_reg);
3405     // Reload mark_reg as we may need it later
3406     ld_ptr(Address(obj_reg, oopDesc::mark_offset_in_bytes()), mark_reg);
3407   }
3408   brx(Assembler::equal, true, Assembler::pt, done);
3409   delayed()->nop();
3410 
3411   Label try_revoke_bias;
3412   Label try_rebias;
3413   Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
3414   assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3415 
3416   // At this point we know that the header has the bias pattern and


3443   // fails we will go in to the runtime to revoke the object's bias.
3444   // Note that we first construct the presumed unbiased header so we
3445   // don't accidentally blow away another thread's valid bias.
3446   delayed()->and3(mark_reg,
3447                   markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
3448                   mark_reg);
3449   or3(G2_thread, mark_reg, temp_reg);
3450   casn(mark_addr.base(), mark_reg, temp_reg);
3451   // If the biasing toward our thread failed, this means that
3452   // another thread succeeded in biasing it toward itself and we
3453   // need to revoke that bias. The revocation will occur in the
3454   // interpreter runtime in the slow case.
3455   cmp(mark_reg, temp_reg);
3456   if (counters != NULL) {
3457     cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg);
3458   }
3459   if (slow_case != NULL) {
3460     brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3461     delayed()->nop();
3462   }
3463   ba_short(done);
3464 
3465   bind(try_rebias);
3466   // At this point we know the epoch has expired, meaning that the
3467   // current "bias owner", if any, is actually invalid. Under these
3468   // circumstances _only_, we are allowed to use the current header's
3469   // value as the comparison value when doing the cas to acquire the
3470   // bias in the current epoch. In other words, we allow transfer of
3471   // the bias from one thread to another directly in this situation.
3472   //
3473   // FIXME: due to a lack of registers we currently blow away the age
3474   // bits in this situation. Should attempt to preserve them.
3475   load_klass(obj_reg, temp_reg);
3476   ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3477   or3(G2_thread, temp_reg, temp_reg);
3478   casn(mark_addr.base(), mark_reg, temp_reg);
3479   // If the biasing toward our thread failed, this means that
3480   // another thread succeeded in biasing it toward itself and we
3481   // need to revoke that bias. The revocation will occur in the
3482   // interpreter runtime in the slow case.
3483   cmp(mark_reg, temp_reg);
3484   if (counters != NULL) {
3485     cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg);
3486   }
3487   if (slow_case != NULL) {
3488     brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3489     delayed()->nop();
3490   }
3491   ba_short(done);
3492 
3493   bind(try_revoke_bias);
3494   // The prototype mark in the klass doesn't have the bias bit set any
3495   // more, indicating that objects of this data type are not supposed
3496   // to be biased any more. We are going to try to reset the mark of
3497   // this object to the prototype value and fall through to the
3498   // CAS-based locking scheme. Note that if our CAS fails, it means
3499   // that another thread raced us for the privilege of revoking the
3500   // bias of this particular object, so it's okay to continue in the
3501   // normal locking code.
3502   //
3503   // FIXME: due to a lack of registers we currently blow away the age
3504   // bits in this situation. Should attempt to preserve them.
3505   load_klass(obj_reg, temp_reg);
3506   ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3507   casn(mark_addr.base(), mark_reg, temp_reg);
3508   // Fall through to the normal CAS-based lock, because no matter what
3509   // the result of the above CAS, some thread must have succeeded in
3510   // removing the bias bit from the object's header.
3511   if (counters != NULL) {


3522   // Note: we do not have to check the thread ID for two reasons.
3523   // First, the interpreter checks for IllegalMonitorStateException at
3524   // a higher level. Second, if the bias was revoked while we held the
3525   // lock, the object could not be rebiased toward another thread, so
3526   // the bias bit would be clear.
3527   ld_ptr(mark_addr, temp_reg);
3528   and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3529   cmp(temp_reg, markOopDesc::biased_lock_pattern);
3530   brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done);
3531   delayed();
3532   if (!allow_delay_slot_filling) {
3533     nop();
3534   }
3535 }
3536 
3537 
3538 // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
3539 // Solaris/SPARC's "as".  Another apt name would be cas_ptr()
3540 
3541 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
3542   casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3543 }
3544 
3545 
3546 
3547 // compiler_lock_object() and compiler_unlock_object() are direct transliterations
3548 // of i486.ad fast_lock() and fast_unlock().  See those methods for detailed comments.
3549 // The code could be tightened up considerably.
3550 //
3551 // box->dhw disposition - post-conditions at DONE_LABEL.
3552 // -   Successful inflated lock:  box->dhw != 0.
3553 //     Any non-zero value suffices.
3554 //     Consider G2_thread, rsp, boxReg, or unused_mark()
3555 // -   Successful Stack-lock: box->dhw == mark.
3556 //     box->dhw must contain the displaced mark word value
3557 // -   Failure -- icc.ZFlag == 0 and box->dhw is undefined.
3558 //     The slow-path fast_enter() and slow_enter() operators
3559 //     are responsible for setting box->dhw = NonZero (typically ::unused_mark).
3560 // -   Biased: box->dhw is undefined
3561 //
3562 // SPARC refworkload performance - specifically jetstream and scimark - are
3563 // extremely sensitive to the size of the code emitted by compiler_lock_object
3564 // and compiler_unlock_object.  Critically, the key factor is code size, not path
3565 // length.  (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
3566 // effect).
3567 
3568 
3569 void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
3570                                           Register Rbox, Register Rscratch,
3571                                           BiasedLockingCounters* counters,
3572                                           bool try_bias) {
3573    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3574 
3575    verify_oop(Roop);
3576    Label done ;
3577 
3578    if (counters != NULL) {
3579      inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
3580    }
3581 
3582    if (EmitSync & 1) {
3583      mov(3, Rscratch);
3584      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3585      cmp(SP, G0);
3586      return ;
3587    }
3588 
3589    if (EmitSync & 2) {
3590 
3591      // Fetch object's markword
3592      ld_ptr(mark_addr, Rmark);
3593 
3594      if (try_bias) {
3595         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3596      }
3597 
3598      // Save Rbox in Rscratch to be used for the cas operation
3599      mov(Rbox, Rscratch);
3600 
3601      // set Rmark to markOop | markOopDesc::unlocked_value
3602      or3(Rmark, markOopDesc::unlocked_value, Rmark);
3603 
3604      // Initialize the box.  (Must happen before we update the object mark!)
3605      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3606 
3607      // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
3608      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3609      casx_under_lock(mark_addr.base(), Rmark, Rscratch,
3610         (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3611 
3612      // if compare/exchange succeeded we found an unlocked object and we now have locked it
3613      // hence we are done
3614      cmp(Rmark, Rscratch);
3615 #ifdef _LP64
3616      sub(Rscratch, STACK_BIAS, Rscratch);
3617 #endif
3618      brx(Assembler::equal, false, Assembler::pt, done);
3619      delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
3620 
3621      // we did not find an unlocked object so see if this is a recursive case
3622      // sub(Rscratch, SP, Rscratch);
3623      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3624      andcc(Rscratch, 0xfffff003, Rscratch);
3625      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3626      bind (done);
3627      return ;
3628    }
3629 
3630    Label Egress ;
3631 
3632    if (EmitSync & 256) {
3633       Label IsInflated ;
3634 
3635       ld_ptr(mark_addr, Rmark);           // fetch obj->mark
3636       // Triage: biased, stack-locked, neutral, inflated
3637       if (try_bias) {
3638         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3639         // Invariant: if control reaches this point in the emitted stream
3640         // then Rmark has not been modified.
3641       }
3642 
3643       // Store mark into displaced mark field in the on-stack basic-lock "box"
3644       // Critically, this must happen before the CAS
3645       // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
3646       st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3647       andcc(Rmark, 2, G0);
3648       brx(Assembler::notZero, false, Assembler::pn, IsInflated);
3649       delayed()->
3650 
3651       // Try stack-lock acquisition.
3652       // Beware: the 1st instruction is in a delay slot
3653       mov(Rbox,  Rscratch);
3654       or3(Rmark, markOopDesc::unlocked_value, Rmark);
3655       assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3656       casn(mark_addr.base(), Rmark, Rscratch);
3657       cmp(Rmark, Rscratch);
3658       brx(Assembler::equal, false, Assembler::pt, done);
3659       delayed()->sub(Rscratch, SP, Rscratch);
3660 
3661       // Stack-lock attempt failed - check for recursive stack-lock.
3662       // See the comments below about how we might remove this case.
3663 #ifdef _LP64
3664       sub(Rscratch, STACK_BIAS, Rscratch);
3665 #endif
3666       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3667       andcc(Rscratch, 0xfffff003, Rscratch);
3668       br(Assembler::always, false, Assembler::pt, done);
3669       delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3670 
3671       bind(IsInflated);
3672       if (EmitSync & 64) {
3673          // If m->owner != null goto IsLocked
3674          // Pessimistic form: Test-and-CAS vs CAS
3675          // The optimistic form avoids RTS->RTO cache line upgrades.
3676          ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3677          andcc(Rscratch, Rscratch, G0);
3678          brx(Assembler::notZero, false, Assembler::pn, done);
3679          delayed()->nop();
3680          // m->owner == null : it's unlocked.
3681       }
3682 
3683       // Try to CAS m->owner from null to Self
3684       // Invariant: if we acquire the lock then _recursions should be 0.
3685       add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3686       mov(G2_thread, Rscratch);
3687       casn(Rmark, G0, Rscratch);
3688       cmp(Rscratch, G0);
3689       // Intentional fall-through into done
3690    } else {
3691       // Aggressively avoid the Store-before-CAS penalty
3692       // Defer the store into box->dhw until after the CAS
3693       Label IsInflated, Recursive ;
3694 
3695 // Anticipate CAS -- Avoid RTS->RTO upgrade
3696 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
3697 
3698       ld_ptr(mark_addr, Rmark);           // fetch obj->mark
3699       // Triage: biased, stack-locked, neutral, inflated
3700 
3701       if (try_bias) {
3702         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3703         // Invariant: if control reaches this point in the emitted stream
3704         // then Rmark has not been modified.
3705       }
3706       andcc(Rmark, 2, G0);
3707       brx(Assembler::notZero, false, Assembler::pn, IsInflated);
3708       delayed()->                         // Beware - dangling delay-slot
3709 
3710       // Try stack-lock acquisition.
3711       // Transiently install BUSY (0) encoding in the mark word.
3712       // if the CAS of 0 into the mark was successful then we execute:
3713       //   ST box->dhw  = mark   -- save fetched mark in on-stack basiclock box
3714       //   ST obj->mark = box    -- overwrite transient 0 value
3715       // This presumes TSO, of course.
3716 
3717       mov(0, Rscratch);
3718       or3(Rmark, markOopDesc::unlocked_value, Rmark);
3719       assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3720       casn(mark_addr.base(), Rmark, Rscratch);
3721 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
3722       cmp(Rscratch, Rmark);
3723       brx(Assembler::notZero, false, Assembler::pn, Recursive);
3724       delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());

3725       if (counters != NULL) {
3726         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3727       }
3728       ba(done);
3729       delayed()->st_ptr(Rbox, mark_addr);

3730 
3731       bind(Recursive);
3732       // Stack-lock attempt failed - check for recursive stack-lock.
3733       // Tests show that we can remove the recursive case with no impact
3734       // on refworkload 0.83.  If we need to reduce the size of the code
3735       // emitted by compiler_lock_object() the recursive case is perfect
3736       // candidate.
3737       //
3738       // A more extreme idea is to always inflate on stack-lock recursion.
3739       // This lets us eliminate the recursive checks in compiler_lock_object
3740       // and compiler_unlock_object and the (box->dhw == 0) encoding.
3741       // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
3742       // and showed a performance *increase*.  In the same experiment I eliminated
3743       // the fast-path stack-lock code from the interpreter and always passed
3744       // control to the "slow" operators in synchronizer.cpp.
3745 
3746       // RScratch contains the fetched obj->mark value from the failed CASN.
3747 #ifdef _LP64
3748       sub(Rscratch, STACK_BIAS, Rscratch);
3749 #endif
3750       sub(Rscratch, SP, Rscratch);
3751       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3752       andcc(Rscratch, 0xfffff003, Rscratch);
3753       if (counters != NULL) {
3754         // Accounting needs the Rscratch register
3755         st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3756         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3757         ba_short(done);

3758       } else {
3759         ba(done);
3760         delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3761       }
3762 
3763       bind   (IsInflated);
3764       if (EmitSync & 64) {
3765          // If m->owner != null goto IsLocked
3766          // Test-and-CAS vs CAS
3767          // Pessimistic form avoids futile (doomed) CAS attempts
3768          // The optimistic form avoids RTS->RTO cache line upgrades.
3769          ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3770          andcc(Rscratch, Rscratch, G0);
3771          brx(Assembler::notZero, false, Assembler::pn, done);
3772          delayed()->nop();
3773          // m->owner == null : it's unlocked.
3774       }
3775 
3776       // Try to CAS m->owner from null to Self
3777       // Invariant: if we acquire the lock then _recursions should be 0.
3778       add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3779       mov(G2_thread, Rscratch);
3780       casn(Rmark, G0, Rscratch);
3781       cmp(Rscratch, G0);
3782       // ST box->displaced_header = NonZero.
3783       // Any non-zero value suffices:
3784       //    unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
3785       st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
3786       // Intentional fall-through into done
3787    }
3788 
3789    bind   (done);
3790 }
3791 
3792 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
3793                                             Register Rbox, Register Rscratch,
3794                                             bool try_bias) {
3795    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3796 
3797    Label done ;
3798 
3799    if (EmitSync & 4) {
3800      cmp(SP, G0);
3801      return ;
3802    }
3803 
3804    if (EmitSync & 8) {
3805      if (try_bias) {
3806         biased_locking_exit(mark_addr, Rscratch, done);
3807      }
3808 
3809      // Test first if it is a fast recursive unlock
3810      ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
3811      br_null_short(Rmark, Assembler::pt, done);
3812 
3813      // Check if it is still a light weight lock, this is is true if we see
3814      // the stack address of the basicLock in the markOop of the object
3815      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3816      casx_under_lock(mark_addr.base(), Rbox, Rmark,
3817        (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3818      ba(done);
3819      delayed()->cmp(Rbox, Rmark);
3820      bind(done);
3821      return ;
3822    }
3823 
3824    // Beware ... If the aggregate size of the code emitted by CLO and CUO is
3825    // is too large performance rolls abruptly off a cliff.
3826    // This could be related to inlining policies, code cache management, or
3827    // I$ effects.
3828    Label LStacked ;
3829 
3830    if (try_bias) {
3831       // TODO: eliminate redundant LDs of obj->mark
3832       biased_locking_exit(mark_addr, Rscratch, done);
3833    }
3834 
3835    ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark);
3836    ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
3837    andcc(Rscratch, Rscratch, G0);
3838    brx(Assembler::zero, false, Assembler::pn, done);
3839    delayed()->nop();      // consider: relocate fetch of mark, above, into this DS
3840    andcc(Rmark, 2, G0);
3841    brx(Assembler::zero, false, Assembler::pt, LStacked);
3842    delayed()->nop();
3843 
3844    // It's inflated
3845    // Conceptually we need a #loadstore|#storestore "release" MEMBAR before
3846    // the ST of 0 into _owner which releases the lock.  This prevents loads
3847    // and stores within the critical section from reordering (floating)
3848    // past the store that releases the lock.  But TSO is a strong memory model
3849    // and that particular flavor of barrier is a noop, so we can safely elide it.
3850    // Note that we use 1-0 locking by default for the inflated case.  We
3851    // close the resultant (and rare) race by having contented threads in
3852    // monitorenter periodically poll _owner.
3853    ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3854    ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
3855    xor3(Rscratch, G2_thread, Rscratch);
3856    orcc(Rbox, Rscratch, Rbox);
3857    brx(Assembler::notZero, false, Assembler::pn, done);
3858    delayed()->
3859    ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
3860    ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
3861    orcc(Rbox, Rscratch, G0);
3862    if (EmitSync & 65536) {
3863       Label LSucc ;
3864       brx(Assembler::notZero, false, Assembler::pn, LSucc);
3865       delayed()->nop();
3866       ba(done);
3867       delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);

3868 
3869       bind(LSucc);
3870       st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3871       if (os::is_MP()) { membar (StoreLoad); }
3872       ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
3873       andcc(Rscratch, Rscratch, G0);
3874       brx(Assembler::notZero, false, Assembler::pt, done);
3875       delayed()->andcc(G0, G0, G0);
3876       add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3877       mov(G2_thread, Rscratch);
3878       casn(Rmark, G0, Rscratch);
3879       // invert icc.zf and goto done
3880       br_notnull(Rscratch, false, Assembler::pt, done);
3881       delayed()->cmp(G0, G0);
3882       ba(done);
3883       delayed()->cmp(G0, 1);
3884    } else {
3885       brx(Assembler::notZero, false, Assembler::pn, done);
3886       delayed()->nop();
3887       ba(done);
3888       delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);

3889    }
3890 
3891    bind   (LStacked);
3892    // Consider: we could replace the expensive CAS in the exit
3893    // path with a simple ST of the displaced mark value fetched from
3894    // the on-stack basiclock box.  That admits a race where a thread T2
3895    // in the slow lock path -- inflating with monitor M -- could race a
3896    // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
3897    // More precisely T1 in the stack-lock unlock path could "stomp" the
3898    // inflated mark value M installed by T2, resulting in an orphan
3899    // object monitor M and T2 becoming stranded.  We can remedy that situation
3900    // by having T2 periodically poll the object's mark word using timed wait
3901    // operations.  If T2 discovers that a stomp has occurred it vacates
3902    // the monitor M and wakes any other threads stranded on the now-orphan M.
3903    // In addition the monitor scavenger, which performs deflation,
3904    // would also need to check for orpan monitors and stranded threads.
3905    //
3906    // Finally, inflation is also used when T2 needs to assign a hashCode
3907    // to O and O is stack-locked by T1.  The "stomp" race could cause
3908    // an assigned hashCode value to be lost.  We can avoid that condition
3909    // and provide the necessary hashCode stability invariants by ensuring
3910    // that hashCode generation is idempotent between copying GCs.
3911    // For example we could compute the hashCode of an object O as
3912    // O's heap address XOR some high quality RNG value that is refreshed
3913    // at GC-time.  The monitor scavenger would install the hashCode
3914    // found in any orphan monitors.  Again, the mechanism admits a
3915    // lost-update "stomp" WAW race but detects and recovers as needed.
3916    //
3917    // A prototype implementation showed excellent results, although
3918    // the scavenger and timeout code was rather involved.
3919 
3920    casn(mark_addr.base(), Rbox, Rscratch);
3921    cmp(Rbox, Rscratch);
3922    // Intentional fall through into done ...
3923 
3924    bind(done);
3925 }
3926 
3927 
3928 
3929 void MacroAssembler::print_CPU_state() {
3930   // %%%%% need to implement this
3931 }
3932 
3933 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3934   // %%%%% need to implement this
3935 }
3936 
3937 void MacroAssembler::push_IU_state() {
3938   // %%%%% need to implement this
3939 }
3940 
3941 
3942 void MacroAssembler::pop_IU_state() {
3943   // %%%%% need to implement this
3944 }


3960 
3961 
3962 void MacroAssembler::pop_CPU_state() {
3963   // %%%%% need to implement this
3964 }
3965 
3966 
3967 
3968 void MacroAssembler::verify_tlab() {
3969 #ifdef ASSERT
3970   if (UseTLAB && VerifyOops) {
3971     Label next, next2, ok;
3972     Register t1 = L0;
3973     Register t2 = L1;
3974     Register t3 = L2;
3975 
3976     save_frame(0);
3977     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3978     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
3979     or3(t1, t2, t3);
3980     cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next);
3981     stop("assert(top >= start)");
3982     should_not_reach_here();
3983 
3984     bind(next);
3985     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3986     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
3987     or3(t3, t2, t3);
3988     cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2);
3989     stop("assert(top <= end)");
3990     should_not_reach_here();
3991 
3992     bind(next2);
3993     and3(t3, MinObjAlignmentInBytesMask, t3);
3994     cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok);
3995     stop("assert(aligned)");
3996     should_not_reach_here();
3997 
3998     bind(ok);
3999     restore();
4000   }
4001 #endif
4002 }
4003 
4004 
4005 void MacroAssembler::eden_allocate(
4006   Register obj,                        // result: pointer to object after successful allocation
4007   Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
4008   int      con_size_in_bytes,          // object size in bytes if   known at compile time
4009   Register t1,                         // temp register
4010   Register t2,                         // temp register
4011   Label&   slow_case                   // continuation point if fast allocation fails
4012 ){
4013   // make sure arguments make sense
4014   assert_different_registers(obj, var_size_in_bytes, t1, t2);
4015   assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
4016   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
4017 
4018   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
4019     // No allocation in the shared eden.
4020     ba_short(slow_case);
4021   } else {
4022     // get eden boundaries
4023     // note: we need both top & top_addr!
4024     const Register top_addr = t1;
4025     const Register end      = t2;
4026 
4027     CollectedHeap* ch = Universe::heap();
4028     set((intx)ch->top_addr(), top_addr);
4029     intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
4030     ld_ptr(top_addr, delta, end);
4031     ld_ptr(top_addr, 0, obj);
4032 
4033     // try to allocate
4034     Label retry;
4035     bind(retry);
4036 #ifdef ASSERT
4037     // make sure eden top is properly aligned
4038     {
4039       Label L;
4040       btst(MinObjAlignmentInBytesMask, obj);


4134     bind(L);
4135   }
4136 #endif // ASSERT
4137 
4138   // update the tlab top pointer
4139   st_ptr(free, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
4140   verify_tlab();
4141 }
4142 
4143 
4144 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
4145   Register top = O0;
4146   Register t1 = G1;
4147   Register t2 = G3;
4148   Register t3 = O1;
4149   assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
4150   Label do_refill, discard_tlab;
4151 
4152   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
4153     // No allocation in the shared eden.
4154     ba_short(slow_case);
4155   }
4156 
4157   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
4158   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
4159   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
4160 
4161   // calculate amount of free space
4162   sub(t1, top, t1);
4163   srl_ptr(t1, LogHeapWordSize, t1);
4164 
4165   // Retain tlab and allocate object in shared space if
4166   // the amount free in the tlab is too large to discard.
4167   cmp(t1, t2);
4168   brx(Assembler::lessEqual, false, Assembler::pt, discard_tlab);
4169 
4170   // increment waste limit to prevent getting stuck on this slow path
4171   delayed()->add(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment(), t2);
4172   st_ptr(t2, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
4173   if (TLABStats) {
4174     // increment number of slow_allocations
4175     ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
4176     add(t2, 1, t2);
4177     stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
4178   }
4179   ba_short(try_eden);
4180 
4181   bind(discard_tlab);
4182   if (TLABStats) {
4183     // increment number of refills
4184     ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
4185     add(t2, 1, t2);
4186     stw(t2, G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()));
4187     // accumulate wastage
4188     ld(G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()), t2);
4189     add(t2, t1, t2);
4190     stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
4191   }
4192 
4193   // if tlab is currently allocated (top or end != null) then
4194   // fill [top, end + alignment_reserve) with array object
4195   br_null_short(top, Assembler::pn, do_refill);
4196 
4197   set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
4198   st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
4199   // set klass to intArrayKlass
4200   sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
4201   add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
4202   sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
4203   st(t1, top, arrayOopDesc::length_offset_in_bytes());
4204   set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
4205   ld_ptr(t2, 0, t2);
4206   // store klass last.  concurrent gcs assumes klass length is valid if
4207   // klass field is not null.
4208   store_klass(t2, top);
4209   verify_oop(top);
4210 
4211   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t1);
4212   sub(top, t1, t1); // size of tlab's allocated portion
4213   incr_allocated_bytes(t1, t2, t3);
4214 
4215   // refill the tlab with an eden allocation
4216   bind(do_refill);
4217   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
4218   sll_ptr(t1, LogHeapWordSize, t1);
4219   // allocate new tlab, address returned in top
4220   eden_allocate(top, t1, 0, t2, t3, slow_case);
4221 
4222   st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset()));
4223   st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
4224 #ifdef ASSERT
4225   // check that tlab_size (t1) is still valid
4226   {
4227     Label ok;
4228     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
4229     sll_ptr(t2, LogHeapWordSize, t2);
4230     cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
4231     stop("assert(t1 == tlab_size)");
4232     should_not_reach_here();
4233 
4234     bind(ok);
4235   }
4236 #endif // ASSERT
4237   add(top, t1, top); // t1 is tlab_size
4238   sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
4239   st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
4240   verify_tlab();
4241   ba_short(retry);
4242 }
4243 
4244 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
4245                                           Register t1, Register t2) {
4246   // Bump total bytes allocated by this thread
4247   assert(t1->is_global(), "must be global reg"); // so all 64 bits are saved on a context switch
4248   assert_different_registers(size_in_bytes.register_or_noreg(), t1, t2);
4249   // v8 support has gone the way of the dodo
4250   ldx(G2_thread, in_bytes(JavaThread::allocated_bytes_offset()), t1);
4251   add(t1, ensure_simm13_or_reg(size_in_bytes, t2), t1);
4252   stx(t1, G2_thread, in_bytes(JavaThread::allocated_bytes_offset()));
4253 }
4254 
4255 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
4256   switch (cond) {
4257     // Note some conditions are synonyms for others
4258     case Assembler::never:                return Assembler::always;
4259     case Assembler::zero:                 return Assembler::notZero;
4260     case Assembler::lessEqual:            return Assembler::greater;
4261     case Assembler::less:                 return Assembler::greaterEqual;


4372   Label refill, restart;
4373   if (with_frame) {
4374     __ save_frame(0);
4375     pre_val = I0;  // Was O0 before the save.
4376   } else {
4377     pre_val = O0;
4378   }
4379   int satb_q_index_byte_offset =
4380     in_bytes(JavaThread::satb_mark_queue_offset() +
4381              PtrQueue::byte_offset_of_index());
4382   int satb_q_buf_byte_offset =
4383     in_bytes(JavaThread::satb_mark_queue_offset() +
4384              PtrQueue::byte_offset_of_buf());
4385   assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
4386          in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
4387          "check sizes in assembly below");
4388 
4389   __ bind(restart);
4390   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
4391 
4392   __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
4393   // If the branch is taken, no harm in executing this in the delay slot.
4394   __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
4395   __ sub(L0, oopSize, L0);
4396 
4397   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
4398   if (!with_frame) {
4399     // Use return-from-leaf
4400     __ retl();
4401     __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4402   } else {
4403     // Not delayed.
4404     __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4405   }
4406   if (with_frame) {
4407     __ ret();
4408     __ delayed()->restore();
4409   }
4410   __ bind(refill);
4411 
4412   address handle_zero =


4487     assert(pre_val == noreg, "check this code");
4488   }
4489 
4490   // Is marking active?
4491   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
4492     ld(G2,
4493        in_bytes(JavaThread::satb_mark_queue_offset() +
4494                 PtrQueue::byte_offset_of_active()),
4495        tmp);
4496   } else {
4497     guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
4498               "Assumption");
4499     ldsb(G2,
4500          in_bytes(JavaThread::satb_mark_queue_offset() +
4501                   PtrQueue::byte_offset_of_active()),
4502          tmp);
4503   }
4504 
4505   // Check on whether to annul.
4506   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
4507   delayed()->nop();
4508 
4509   // Do we need to load the previous value?
4510   if (obj != noreg) {
4511     // Load the previous value...
4512     if (index == noreg) {
4513       if (Assembler::is_simm13(offset)) {
4514         load_heap_oop(obj, offset, tmp);
4515       } else {
4516         set(offset, tmp);
4517         load_heap_oop(obj, tmp, tmp);
4518       }
4519     } else {
4520       load_heap_oop(obj, index, tmp);
4521     }
4522     // Previous value has been loaded into tmp
4523     pre_val = tmp;
4524   }
4525 
4526   assert(pre_val != noreg, "must have a real register");
4527 
4528   // Is the previous value null?
4529   // Check on whether to annul.
4530   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
4531   delayed()->nop();
4532 
4533   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
4534   // case, pre_val will be a scratch G-reg, but there are some cases in
4535   // which it's an O-reg.  In the first case, do a normal call.  In the
4536   // latter, do a save here and call the frameless version.
4537 
4538   guarantee(pre_val->is_global() || pre_val->is_out(),
4539             "Or we need to think harder.");
4540 
4541   if (pre_val->is_global() && !preserve_o_regs) {
4542     generate_satb_log_enqueue_if_necessary(true); // with frame
4543 
4544     call(satb_log_enqueue_with_frame);
4545     delayed()->mov(pre_val, O0);
4546   } else {
4547     generate_satb_log_enqueue_if_necessary(false); // frameless
4548 
4549     save_frame(0);
4550     call(satb_log_enqueue_frameless);
4551     delayed()->mov(pre_val->after_save(), O0);


src/cpu/sparc/vm/assembler_sparc.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File