src/cpu/sparc/vm/macroAssembler_sparc.cpp

Print this page




 101 bool AbstractAssembler::pd_check_instruction_mark() { return false; }
 102 #endif
 103 
 104 // Patch instruction inst at offset inst_pos to refer to dest_pos
 105 // and return the resulting instruction.
 106 // We should have pcs, not offsets, but since all is relative, it will work out
 107 // OK.
 108 int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) {
 109   int m; // mask for displacement field
 110   int v; // new value for displacement field
 111   const int word_aligned_ones = -4;
 112   switch (inv_op(inst)) {
 113   default: ShouldNotReachHere();
 114   case call_op:    m = wdisp(word_aligned_ones, 0, 30);  v = wdisp(dest_pos, inst_pos, 30); break;
 115   case branch_op:
 116     switch (inv_op2(inst)) {
 117       case fbp_op2:    m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
 118       case bp_op2:     m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
 119       case fb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
 120       case br_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
 121       case cb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
 122       case bpr_op2: {
 123         if (is_cbcond(inst)) {
 124           m = wdisp10(word_aligned_ones, 0);
 125           v = wdisp10(dest_pos, inst_pos);
 126         } else {
 127           m = wdisp16(word_aligned_ones, 0);
 128           v = wdisp16(dest_pos, inst_pos);
 129         }
 130         break;
 131       }
 132       default: ShouldNotReachHere();
 133     }
 134   }
 135   return  inst & ~m  |  v;
 136 }
 137 
 138 // Return the offset of the branch destionation of instruction inst
 139 // at offset pos.
 140 // Should have pcs, but since all is relative, it works out.
 141 int MacroAssembler::branch_destination(int inst, int pos) {
 142   int r;
 143   switch (inv_op(inst)) {
 144   default: ShouldNotReachHere();
 145   case call_op:        r = inv_wdisp(inst, pos, 30);  break;
 146   case branch_op:
 147     switch (inv_op2(inst)) {
 148       case fbp_op2:    r = inv_wdisp(  inst, pos, 19);  break;
 149       case bp_op2:     r = inv_wdisp(  inst, pos, 19);  break;
 150       case fb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
 151       case br_op2:     r = inv_wdisp(  inst, pos, 22);  break;
 152       case cb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
 153       case bpr_op2: {
 154         if (is_cbcond(inst)) {
 155           r = inv_wdisp10(inst, pos);
 156         } else {
 157           r = inv_wdisp16(inst, pos);
 158         }
 159         break;
 160       }
 161       default: ShouldNotReachHere();
 162     }
 163   }
 164   return r;
 165 }
 166 
 167 void MacroAssembler::null_check(Register reg, int offset) {
 168   if (needs_explicit_null_check((intptr_t)offset)) {
 169     // provoke OS NULL exception if reg = NULL by
 170     // accessing M[reg] w/o changing any registers
 171     ld_ptr(reg, 0, G0);
 172   }


 308   } else {
 309     jmpl(a.base(), a.disp(), d);
 310   }
 311 }
 312 
 313 void MacroAssembler::jump(const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line) {
 314   jumpl(addrlit, temp, G0, offset, file, line);
 315 }
 316 
 317 
 318 // Conditional breakpoint (for assertion checks in assembly code)
 319 void MacroAssembler::breakpoint_trap(Condition c, CC cc) {
 320   trap(c, cc, G0, ST_RESERVED_FOR_USER_0);
 321 }
 322 
 323 // We want to use ST_BREAKPOINT here, but the debugger is confused by it.
 324 void MacroAssembler::breakpoint_trap() {
 325   trap(ST_RESERVED_FOR_USER_0);
 326 }
 327 
 328 // flush windows (except current) using flushw instruction if avail.
 329 void MacroAssembler::flush_windows() {
 330   if (VM_Version::v9_instructions_work())  flushw();
 331   else                                     flush_windows_trap();
 332 }
 333 
 334 // Write serialization page so VM thread can do a pseudo remote membar
 335 // We use the current thread pointer to calculate a thread specific
 336 // offset to write to within the page. This minimizes bus traffic
 337 // due to cache line collision.
 338 void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
 339   srl(thread, os::get_serialize_page_shift_count(), tmp2);
 340   if (Assembler::is_simm13(os::vm_page_size())) {
 341     and3(tmp2, (os::vm_page_size() - sizeof(int)), tmp2);
 342   }
 343   else {
 344     set((os::vm_page_size() - sizeof(int)), tmp1);
 345     and3(tmp2, tmp1, tmp2);
 346   }
 347   set(os::get_memory_serialize_page(), tmp1);
 348   st(G0, tmp1, tmp2);
 349 }
 350 
 351 
 352 
 353 void MacroAssembler::enter() {
 354   Unimplemented();
 355 }
 356 
 357 void MacroAssembler::leave() {
 358   Unimplemented();
 359 }
 360 
 361 void MacroAssembler::mult(Register s1, Register s2, Register d) {
 362   if(VM_Version::v9_instructions_work()) {
 363     mulx (s1, s2, d);
 364   } else {
 365     smul (s1, s2, d);
 366   }
 367 }
 368 
 369 void MacroAssembler::mult(Register s1, int simm13a, Register d) {
 370   if(VM_Version::v9_instructions_work()) {
 371     mulx (s1, simm13a, d);
 372   } else {
 373     smul (s1, simm13a, d);
 374   }
 375 }
 376 
 377 
 378 #ifdef ASSERT
 379 void MacroAssembler::read_ccr_v8_assert(Register ccr_save) {
 380   const Register s1 = G3_scratch;
 381   const Register s2 = G4_scratch;
 382   Label get_psr_test;
 383   // Get the condition codes the V8 way.
 384   read_ccr_trap(s1);
 385   mov(ccr_save, s2);
 386   // This is a test of V8 which has icc but not xcc
 387   // so mask off the xcc bits
 388   and3(s2, 0xf, s2);
 389   // Compare condition codes from the V8 and V9 ways.
 390   subcc(s2, s1, G0);
 391   br(Assembler::notEqual, true, Assembler::pt, get_psr_test);
 392   delayed()->breakpoint_trap();
 393   bind(get_psr_test);
 394 }
 395 
 396 void MacroAssembler::write_ccr_v8_assert(Register ccr_save) {
 397   const Register s1 = G3_scratch;
 398   const Register s2 = G4_scratch;
 399   Label set_psr_test;
 400   // Write out the saved condition codes the V8 way
 401   write_ccr_trap(ccr_save, s1, s2);
 402   // Read back the condition codes using the V9 instruction
 403   rdccr(s1);
 404   mov(ccr_save, s2);
 405   // This is a test of V8 which has icc but not xcc
 406   // so mask off the xcc bits
 407   and3(s2, 0xf, s2);
 408   and3(s1, 0xf, s1);
 409   // Compare the V8 way with the V9 way.
 410   subcc(s2, s1, G0);
 411   br(Assembler::notEqual, true, Assembler::pt, set_psr_test);
 412   delayed()->breakpoint_trap();
 413   bind(set_psr_test);
 414 }
 415 #else
 416 #define read_ccr_v8_assert(x)
 417 #define write_ccr_v8_assert(x)
 418 #endif // ASSERT
 419 
 420 void MacroAssembler::read_ccr(Register ccr_save) {
 421   if (VM_Version::v9_instructions_work()) {
 422     rdccr(ccr_save);
 423     // Test code sequence used on V8.  Do not move above rdccr.
 424     read_ccr_v8_assert(ccr_save);
 425   } else {
 426     read_ccr_trap(ccr_save);
 427   }
 428 }
 429 
 430 void MacroAssembler::write_ccr(Register ccr_save) {
 431   if (VM_Version::v9_instructions_work()) {
 432     // Test code sequence used on V8.  Do not move below wrccr.
 433     write_ccr_v8_assert(ccr_save);
 434     wrccr(ccr_save);
 435   } else {
 436     const Register temp_reg1 = G3_scratch;
 437     const Register temp_reg2 = G4_scratch;
 438     write_ccr_trap(ccr_save, temp_reg1, temp_reg2);
 439   }
 440 }
 441 
 442 
 443 // Calls to C land
 444 
 445 #ifdef ASSERT
 446 // a hook for debugging
 447 static Thread* reinitialize_thread() {
 448   return ThreadLocalStorage::thread();
 449 }
 450 #else
 451 #define reinitialize_thread ThreadLocalStorage::thread
 452 #endif
 453 
 454 #ifdef ASSERT
 455 address last_get_thread = NULL;
 456 #endif
 457 
 458 // call this when G2_thread is not known to be valid
 459 void MacroAssembler::get_thread() {


1310       char b1[1024], b2[1024];
1311       sprintf(b1, "%f", val);
1312       sprintf(b2, "%f", d[last+1]);
1313       if (strcmp(b1, b2))
1314         break;
1315     }
1316     s->print("d%d", 2 * j);
1317     if ( j != last )  s->print(" - d%d", last);
1318     s->print(" = %f", val);
1319     s->fill_to(30);
1320     s->print("(0x%x)", *(int*)&val);
1321     s->fill_to(42);
1322     s->print_cr("(0x%x)", *(1 + (int*)&val));
1323     j = last + 1;
1324   }
1325   s->cr();
1326 }
1327 
1328 void RegistersForDebugging::save_registers(MacroAssembler* a) {
1329   a->sub(FP, round_to(sizeof(RegistersForDebugging), sizeof(jdouble)) - STACK_BIAS, O0);
1330   a->flush_windows();
1331   int i;
1332   for (i = 0; i < 8; ++i) {
1333     a->ld_ptr(as_iRegister(i)->address_in_saved_window().after_save(), L1);  a->st_ptr( L1, O0, i_offset(i));
1334     a->ld_ptr(as_lRegister(i)->address_in_saved_window().after_save(), L1);  a->st_ptr( L1, O0, l_offset(i));
1335     a->st_ptr(as_oRegister(i)->after_save(), O0, o_offset(i));
1336     a->st_ptr(as_gRegister(i)->after_save(), O0, g_offset(i));
1337   }
1338   for (i = 0;  i < 32; ++i) {
1339     a->stf(FloatRegisterImpl::S, as_FloatRegister(i), O0, f_offset(i));
1340   }
1341   for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
1342     a->stf(FloatRegisterImpl::D, as_FloatRegister(i), O0, d_offset(i));
1343   }
1344 }
1345 
1346 void RegistersForDebugging::restore_registers(MacroAssembler* a, Register r) {
1347   for (int i = 1; i < 8;  ++i) {
1348     a->ld_ptr(r, g_offset(i), as_gRegister(i));
1349   }
1350   for (int j = 0; j < 32; ++j) {
1351     a->ldf(FloatRegisterImpl::S, O0, f_offset(j), as_FloatRegister(j));
1352   }
1353   for (int k = 0; k < (VM_Version::v9_instructions_work() ? 64 : 32); k += 2) {
1354     a->ldf(FloatRegisterImpl::D, O0, d_offset(k), as_FloatRegister(k));
1355   }
1356 }
1357 
1358 
1359 // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
1360 void MacroAssembler::push_fTOS() {
1361   // %%%%%% need to implement this
1362 }
1363 
1364 // pops double TOS element from CPU stack and pushes on FPU stack
1365 void MacroAssembler::pop_fTOS() {
1366   // %%%%%% need to implement this
1367 }
1368 
1369 void MacroAssembler::empty_FPU_stack() {
1370   // %%%%%% need to implement this
1371 }
1372 
1373 void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * file, int line) {


1448   // Size of set() should stay the same
1449   patchable_set((intptr_t)real_msg, O1);
1450   // Load address to call to into O7
1451   load_ptr_contents(a, O7);
1452   // Register call to verify_oop_subroutine
1453   callr(O7, G0);
1454   delayed()->nop();
1455   // recover frame size
1456   add(SP, 8*8,SP);
1457 }
1458 
1459 // side-door communication with signalHandler in os_solaris.cpp
1460 address MacroAssembler::_verify_oop_implicit_branch[3] = { NULL };
1461 
1462 // This macro is expanded just once; it creates shared code.  Contract:
1463 // receives an oop in O0.  Must restore O0 & O7 from TLS.  Must not smash ANY
1464 // registers, including flags.  May not use a register 'save', as this blows
1465 // the high bits of the O-regs if they contain Long values.  Acts as a 'leaf'
1466 // call.
1467 void MacroAssembler::verify_oop_subroutine() {
1468   assert( VM_Version::v9_instructions_work(), "VerifyOops not supported for V8" );
1469 
1470   // Leaf call; no frame.
1471   Label succeed, fail, null_or_fail;
1472 
1473   // O0 and O7 were saved already (O0 in O0's TLS home, O7 in O5's TLS home).
1474   // O0 is now the oop to be checked.  O7 is the return address.
1475   Register O0_obj = O0;
1476 
1477   // Save some more registers for temps.
1478   stx(O2,SP,frame::register_save_words*wordSize+STACK_BIAS+2*8);
1479   stx(O3,SP,frame::register_save_words*wordSize+STACK_BIAS+3*8);
1480   stx(O4,SP,frame::register_save_words*wordSize+STACK_BIAS+4*8);
1481   stx(O5,SP,frame::register_save_words*wordSize+STACK_BIAS+5*8);
1482 
1483   // Save flags
1484   Register O5_save_flags = O5;
1485   rdccr( O5_save_flags );
1486 
1487   { // count number of verifies
1488     Register O2_adr   = O2;
1489     Register O3_accum = O3;


1853 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
1854                            Register Rb_hi, Register Rb_low,
1855                            Register Rresult) {
1856 
1857   Label check_low_parts, done;
1858 
1859   cmp(Ra_hi, Rb_hi );  // compare hi parts
1860   br(equal, true, pt, check_low_parts);
1861   delayed()->cmp(Ra_low, Rb_low); // test low parts
1862 
1863   // And, with an unsigned comparison, it does not matter if the numbers
1864   // are negative or not.
1865   // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff.
1866   // The second one is bigger (unsignedly).
1867 
1868   // Other notes:  The first move in each triplet can be unconditional
1869   // (and therefore probably prefetchable).
1870   // And the equals case for the high part does not need testing,
1871   // since that triplet is reached only after finding the high halves differ.
1872 
1873   if (VM_Version::v9_instructions_work()) {
1874     mov(-1, Rresult);
1875     ba(done);  delayed()-> movcc(greater, false, icc,  1, Rresult);
1876   } else {
1877     br(less,    true, pt, done); delayed()-> set(-1, Rresult);
1878     br(greater, true, pt, done); delayed()-> set( 1, Rresult);
1879   }
1880 
1881   bind( check_low_parts );
1882 
1883   if (VM_Version::v9_instructions_work()) {
1884     mov(                               -1, Rresult);
1885     movcc(equal,           false, icc,  0, Rresult);
1886     movcc(greaterUnsigned, false, icc,  1, Rresult);
1887   } else {
1888     set(-1, Rresult);
1889     br(equal,           true, pt, done); delayed()->set( 0, Rresult);
1890     br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult);
1891   }
1892   bind( done );
1893 }
1894 
1895 void MacroAssembler::lneg( Register Rhi, Register Rlow ) {
1896   subcc(  G0, Rlow, Rlow );
1897   subc(   G0, Rhi,  Rhi  );
1898 }
1899 
1900 void MacroAssembler::lshl( Register Rin_high,  Register Rin_low,
1901                            Register Rcount,
1902                            Register Rout_high, Register Rout_low,
1903                            Register Rtemp ) {
1904 
1905 
1906   Register Ralt_count = Rtemp;
1907   Register Rxfer_bits = Rtemp;
1908 
1909   assert( Ralt_count != Rin_high
1910       &&  Ralt_count != Rin_low
1911       &&  Ralt_count != Rcount
1912       &&  Rxfer_bits != Rin_low


2100   case  2:  is_signed ? ldsh(src, dst) : lduh(src, dst); break;
2101   case  1:  is_signed ? ldsb(src, dst) : ldub(src, dst); break;
2102   default:  ShouldNotReachHere();
2103   }
2104 }
2105 
2106 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2107   switch (size_in_bytes) {
2108   case  8:  st_long(src, dst); break;
2109   case  4:  st(     src, dst); break;
2110   case  2:  sth(    src, dst); break;
2111   case  1:  stb(    src, dst); break;
2112   default:  ShouldNotReachHere();
2113   }
2114 }
2115 
2116 
2117 void MacroAssembler::float_cmp( bool is_float, int unordered_result,
2118                                 FloatRegister Fa, FloatRegister Fb,
2119                                 Register Rresult) {
2120 
2121   fcmp(is_float ? FloatRegisterImpl::S : FloatRegisterImpl::D, fcc0, Fa, Fb);
2122 
2123   Condition lt = unordered_result == -1 ? f_unorderedOrLess    : f_less;
2124   Condition eq =                          f_equal;
2125   Condition gt = unordered_result ==  1 ? f_unorderedOrGreater : f_greater;
2126 
2127   if (VM_Version::v9_instructions_work()) {
2128 
2129     mov(-1, Rresult);
2130     movcc(eq, true, fcc0, 0, Rresult);
2131     movcc(gt, true, fcc0, 1, Rresult);
2132 
2133   } else {
2134     Label done;
2135 
2136     set( -1, Rresult );
2137     //fb(lt, true, pn, done); delayed()->set( -1, Rresult );
2138     fb( eq, true, pn, done);  delayed()->set(  0, Rresult );
2139     fb( gt, true, pn, done);  delayed()->set(  1, Rresult );
2140 
2141     bind (done);
2142   }
2143 }
2144 
2145 
2146 void MacroAssembler::fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
2147 {
2148   if (VM_Version::v9_instructions_work()) {
2149     Assembler::fneg(w, s, d);
2150   } else {
2151     if (w == FloatRegisterImpl::S) {
2152       Assembler::fneg(w, s, d);
2153     } else if (w == FloatRegisterImpl::D) {
2154       // number() does a sanity check on the alignment.
2155       assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
2156         ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
2157 
2158       Assembler::fneg(FloatRegisterImpl::S, s, d);
2159       Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2160     } else {
2161       assert(w == FloatRegisterImpl::Q, "Invalid float register width");
2162 
2163       // number() does a sanity check on the alignment.
2164       assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
2165         ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
2166 
2167       Assembler::fneg(FloatRegisterImpl::S, s, d);
2168       Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2169       Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
2170       Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
2171     }
2172   }
2173 }
2174 
2175 void MacroAssembler::fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
2176 {
2177   if (VM_Version::v9_instructions_work()) {
2178     Assembler::fmov(w, s, d);
2179   } else {
2180     if (w == FloatRegisterImpl::S) {
2181       Assembler::fmov(w, s, d);
2182     } else if (w == FloatRegisterImpl::D) {
2183       // number() does a sanity check on the alignment.
2184       assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
2185         ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
2186 
2187       Assembler::fmov(FloatRegisterImpl::S, s, d);
2188       Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2189     } else {
2190       assert(w == FloatRegisterImpl::Q, "Invalid float register width");
2191 
2192       // number() does a sanity check on the alignment.
2193       assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
2194         ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
2195 
2196       Assembler::fmov(FloatRegisterImpl::S, s, d);
2197       Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2198       Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
2199       Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
2200     }
2201   }
2202 }
2203 
2204 void MacroAssembler::fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
2205 {
2206   if (VM_Version::v9_instructions_work()) {
2207     Assembler::fabs(w, s, d);
2208   } else {
2209     if (w == FloatRegisterImpl::S) {
2210       Assembler::fabs(w, s, d);
2211     } else if (w == FloatRegisterImpl::D) {
2212       // number() does a sanity check on the alignment.
2213       assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
2214         ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
2215 
2216       Assembler::fabs(FloatRegisterImpl::S, s, d);
2217       Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2218     } else {
2219       assert(w == FloatRegisterImpl::Q, "Invalid float register width");
2220 
2221       // number() does a sanity check on the alignment.
2222       assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
2223        ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
2224 
2225       Assembler::fabs(FloatRegisterImpl::S, s, d);
2226       Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2227       Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
2228       Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
2229     }
2230   }
2231 }
2232 
2233 void MacroAssembler::save_all_globals_into_locals() {
2234   mov(G1,L1);
2235   mov(G2,L2);
2236   mov(G3,L3);
2237   mov(G4,L4);
2238   mov(G5,L5);
2239   mov(G6,L6);
2240   mov(G7,L7);
2241 }
2242 
2243 void MacroAssembler::restore_globals_from_locals() {
2244   mov(L1,G1);
2245   mov(L2,G2);
2246   mov(L3,G3);
2247   mov(L4,G4);
2248   mov(L5,G5);
2249   mov(L6,G6);
2250   mov(L7,G7);
2251 }
2252 
2253 // Use for 64 bit operation.
2254 void MacroAssembler::casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm)
2255 {
2256   // store ptr_reg as the new top value
2257 #ifdef _LP64
2258   casx(top_ptr_reg, top_reg, ptr_reg);
2259 #else
2260   cas_under_lock(top_ptr_reg, top_reg, ptr_reg, lock_addr, use_call_vm);
2261 #endif // _LP64
2262 }
2263 
2264 // [RGV] This routine does not handle 64 bit operations.
2265 //       use casx_under_lock() or casx directly!!!
2266 void MacroAssembler::cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm)
2267 {
2268   // store ptr_reg as the new top value
2269   if (VM_Version::v9_instructions_work()) {
2270     cas(top_ptr_reg, top_reg, ptr_reg);
2271   } else {
2272 
2273     // If the register is not an out nor global, it is not visible
2274     // after the save.  Allocate a register for it, save its
2275     // value in the register save area (the save may not flush
2276     // registers to the save area).
2277 
2278     Register top_ptr_reg_after_save;
2279     Register top_reg_after_save;
2280     Register ptr_reg_after_save;
2281 
2282     if (top_ptr_reg->is_out() || top_ptr_reg->is_global()) {
2283       top_ptr_reg_after_save = top_ptr_reg->after_save();
2284     } else {
2285       Address reg_save_addr = top_ptr_reg->address_in_saved_window();
2286       top_ptr_reg_after_save = L0;
2287       st(top_ptr_reg, reg_save_addr);
2288     }
2289 
2290     if (top_reg->is_out() || top_reg->is_global()) {
2291       top_reg_after_save = top_reg->after_save();
2292     } else {
2293       Address reg_save_addr = top_reg->address_in_saved_window();
2294       top_reg_after_save = L1;
2295       st(top_reg, reg_save_addr);
2296     }
2297 
2298     if (ptr_reg->is_out() || ptr_reg->is_global()) {
2299       ptr_reg_after_save = ptr_reg->after_save();
2300     } else {
2301       Address reg_save_addr = ptr_reg->address_in_saved_window();
2302       ptr_reg_after_save = L2;
2303       st(ptr_reg, reg_save_addr);
2304     }
2305 
2306     const Register& lock_reg = L3;
2307     const Register& lock_ptr_reg = L4;
2308     const Register& value_reg = L5;
2309     const Register& yield_reg = L6;
2310     const Register& yieldall_reg = L7;
2311 
2312     save_frame();
2313 
2314     if (top_ptr_reg_after_save == L0) {
2315       ld(top_ptr_reg->address_in_saved_window().after_save(), top_ptr_reg_after_save);
2316     }
2317 
2318     if (top_reg_after_save == L1) {
2319       ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save);
2320     }
2321 
2322     if (ptr_reg_after_save == L2) {
2323       ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save);
2324     }
2325 
2326     Label(retry_get_lock);
2327     Label(not_same);
2328     Label(dont_yield);
2329 
2330     assert(lock_addr, "lock_address should be non null for v8");
2331     set((intptr_t)lock_addr, lock_ptr_reg);
2332     // Initialize yield counter
2333     mov(G0,yield_reg);
2334     mov(G0, yieldall_reg);
2335     set(StubRoutines::Sparc::locked, lock_reg);
2336 
2337     bind(retry_get_lock);
2338     cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield);
2339 
2340     if(use_call_vm) {
2341       Untested("Need to verify global reg consistancy");
2342       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
2343     } else {
2344       // Save the regs and make space for a C call
2345       save(SP, -96, SP);
2346       save_all_globals_into_locals();
2347       call(CAST_FROM_FN_PTR(address,os::yield_all));
2348       delayed()->mov(yieldall_reg, O0);
2349       restore_globals_from_locals();
2350       restore();
2351     }
2352 
2353     // reset the counter
2354     mov(G0,yield_reg);
2355     add(yieldall_reg, 1, yieldall_reg);
2356 
2357     bind(dont_yield);
2358     // try to get lock
2359     Assembler::swap(lock_ptr_reg, 0, lock_reg);
2360 
2361     // did we get the lock?
2362     cmp(lock_reg, StubRoutines::Sparc::unlocked);
2363     br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
2364     delayed()->add(yield_reg,1,yield_reg);
2365 
2366     // yes, got lock.  do we have the same top?
2367     ld(top_ptr_reg_after_save, 0, value_reg);
2368     cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same);
2369 
2370     // yes, same top.
2371     st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
2372     membar(Assembler::StoreStore);
2373 
2374     bind(not_same);
2375     mov(value_reg, ptr_reg_after_save);
2376     st(lock_reg, lock_ptr_reg, 0); // unlock
2377 
2378     restore();
2379   }
2380 }
2381 
2382 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2383                                                       Register tmp,
2384                                                       int offset) {
2385   intptr_t value = *delayed_value_addr;
2386   if (value != 0)
2387     return RegisterOrConstant(value + offset);
2388 
2389   // load indirectly to solve generation ordering problem
2390   AddressLiteral a(delayed_value_addr);
2391   load_ptr_contents(a, tmp);
2392 
2393 #ifdef ASSERT
2394   tst(tmp);
2395   breakpoint_trap(zero, xcc);
2396 #endif
2397 
2398   if (offset != 0)
2399     add(tmp, offset, tmp);
2400 
2401   return RegisterOrConstant(tmp);


2953   // bits of the mark word are equal to the epoch bits of the
2954   // prototype header. (Note that the prototype header's epoch bits
2955   // only change at a safepoint.) If not, attempt to rebias the object
2956   // toward the current thread. Note that we must be absolutely sure
2957   // that the current epoch is invalid in order to do this because
2958   // otherwise the manipulations it performs on the mark word are
2959   // illegal.
2960   delayed()->btst(markOopDesc::epoch_mask_in_place, temp_reg);
2961   brx(Assembler::notZero, false, Assembler::pn, try_rebias);
2962 
2963   // The epoch of the current bias is still valid but we know nothing
2964   // about the owner; it might be set or it might be clear. Try to
2965   // acquire the bias of the object using an atomic operation. If this
2966   // fails we will go in to the runtime to revoke the object's bias.
2967   // Note that we first construct the presumed unbiased header so we
2968   // don't accidentally blow away another thread's valid bias.
2969   delayed()->and3(mark_reg,
2970                   markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
2971                   mark_reg);
2972   or3(G2_thread, mark_reg, temp_reg);
2973   casn(mark_addr.base(), mark_reg, temp_reg);
2974   // If the biasing toward our thread failed, this means that
2975   // another thread succeeded in biasing it toward itself and we
2976   // need to revoke that bias. The revocation will occur in the
2977   // interpreter runtime in the slow case.
2978   cmp(mark_reg, temp_reg);
2979   if (counters != NULL) {
2980     cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg);
2981   }
2982   if (slow_case != NULL) {
2983     brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
2984     delayed()->nop();
2985   }
2986   ba_short(done);
2987 
2988   bind(try_rebias);
2989   // At this point we know the epoch has expired, meaning that the
2990   // current "bias owner", if any, is actually invalid. Under these
2991   // circumstances _only_, we are allowed to use the current header's
2992   // value as the comparison value when doing the cas to acquire the
2993   // bias in the current epoch. In other words, we allow transfer of
2994   // the bias from one thread to another directly in this situation.
2995   //
2996   // FIXME: due to a lack of registers we currently blow away the age
2997   // bits in this situation. Should attempt to preserve them.
2998   load_klass(obj_reg, temp_reg);
2999   ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
3000   or3(G2_thread, temp_reg, temp_reg);
3001   casn(mark_addr.base(), mark_reg, temp_reg);
3002   // If the biasing toward our thread failed, this means that
3003   // another thread succeeded in biasing it toward itself and we
3004   // need to revoke that bias. The revocation will occur in the
3005   // interpreter runtime in the slow case.
3006   cmp(mark_reg, temp_reg);
3007   if (counters != NULL) {
3008     cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg);
3009   }
3010   if (slow_case != NULL) {
3011     brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3012     delayed()->nop();
3013   }
3014   ba_short(done);
3015 
3016   bind(try_revoke_bias);
3017   // The prototype mark in the klass doesn't have the bias bit set any
3018   // more, indicating that objects of this data type are not supposed
3019   // to be biased any more. We are going to try to reset the mark of
3020   // this object to the prototype value and fall through to the
3021   // CAS-based locking scheme. Note that if our CAS fails, it means
3022   // that another thread raced us for the privilege of revoking the
3023   // bias of this particular object, so it's okay to continue in the
3024   // normal locking code.
3025   //
3026   // FIXME: due to a lack of registers we currently blow away the age
3027   // bits in this situation. Should attempt to preserve them.
3028   load_klass(obj_reg, temp_reg);
3029   ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
3030   casn(mark_addr.base(), mark_reg, temp_reg);
3031   // Fall through to the normal CAS-based lock, because no matter what
3032   // the result of the above CAS, some thread must have succeeded in
3033   // removing the bias bit from the object's header.
3034   if (counters != NULL) {
3035     cmp(mark_reg, temp_reg);
3036     cond_inc(Assembler::zero, (address) counters->revoked_lock_entry_count_addr(), mark_reg, temp_reg);
3037   }
3038 
3039   bind(cas_label);
3040 }
3041 
3042 void MacroAssembler::biased_locking_exit (Address mark_addr, Register temp_reg, Label& done,
3043                                           bool allow_delay_slot_filling) {
3044   // Check for biased locking unlock case, which is a no-op
3045   // Note: we do not have to check the thread ID for two reasons.
3046   // First, the interpreter checks for IllegalMonitorStateException at
3047   // a higher level. Second, if the bias was revoked while we held the
3048   // lock, the object could not be rebiased toward another thread, so
3049   // the bias bit would be clear.
3050   ld_ptr(mark_addr, temp_reg);
3051   and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3052   cmp(temp_reg, markOopDesc::biased_lock_pattern);
3053   brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done);
3054   delayed();
3055   if (!allow_delay_slot_filling) {
3056     nop();
3057   }
3058 }
3059 
3060 
3061 // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
3062 // Solaris/SPARC's "as".  Another apt name would be cas_ptr()
3063 
3064 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
3065   casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3066 }
3067 
3068 
3069 
3070 // compiler_lock_object() and compiler_unlock_object() are direct transliterations
3071 // of i486.ad fast_lock() and fast_unlock().  See those methods for detailed comments.
3072 // The code could be tightened up considerably.
3073 //
3074 // box->dhw disposition - post-conditions at DONE_LABEL.
3075 // -   Successful inflated lock:  box->dhw != 0.
3076 //     Any non-zero value suffices.
3077 //     Consider G2_thread, rsp, boxReg, or unused_mark()
3078 // -   Successful Stack-lock: box->dhw == mark.
3079 //     box->dhw must contain the displaced mark word value
3080 // -   Failure -- icc.ZFlag == 0 and box->dhw is undefined.
3081 //     The slow-path fast_enter() and slow_enter() operators
3082 //     are responsible for setting box->dhw = NonZero (typically ::unused_mark).
3083 // -   Biased: box->dhw is undefined
3084 //
3085 // SPARC refworkload performance - specifically jetstream and scimark - are
3086 // extremely sensitive to the size of the code emitted by compiler_lock_object
3087 // and compiler_unlock_object.  Critically, the key factor is code size, not path
3088 // length.  (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
3089 // effect).


3112    if (EmitSync & 2) {
3113 
3114      // Fetch object's markword
3115      ld_ptr(mark_addr, Rmark);
3116 
3117      if (try_bias) {
3118         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3119      }
3120 
3121      // Save Rbox in Rscratch to be used for the cas operation
3122      mov(Rbox, Rscratch);
3123 
3124      // set Rmark to markOop | markOopDesc::unlocked_value
3125      or3(Rmark, markOopDesc::unlocked_value, Rmark);
3126 
3127      // Initialize the box.  (Must happen before we update the object mark!)
3128      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3129 
3130      // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
3131      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3132      casx_under_lock(mark_addr.base(), Rmark, Rscratch,
3133         (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3134 
3135      // if compare/exchange succeeded we found an unlocked object and we now have locked it
3136      // hence we are done
3137      cmp(Rmark, Rscratch);
3138 #ifdef _LP64
3139      sub(Rscratch, STACK_BIAS, Rscratch);
3140 #endif
3141      brx(Assembler::equal, false, Assembler::pt, done);
3142      delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
3143 
3144      // we did not find an unlocked object so see if this is a recursive case
3145      // sub(Rscratch, SP, Rscratch);
3146      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3147      andcc(Rscratch, 0xfffff003, Rscratch);
3148      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3149      bind (done);
3150      return ;
3151    }
3152 
3153    Label Egress ;


3159       // Triage: biased, stack-locked, neutral, inflated
3160       if (try_bias) {
3161         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3162         // Invariant: if control reaches this point in the emitted stream
3163         // then Rmark has not been modified.
3164       }
3165 
3166       // Store mark into displaced mark field in the on-stack basic-lock "box"
3167       // Critically, this must happen before the CAS
3168       // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
3169       st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3170       andcc(Rmark, 2, G0);
3171       brx(Assembler::notZero, false, Assembler::pn, IsInflated);
3172       delayed()->
3173 
3174       // Try stack-lock acquisition.
3175       // Beware: the 1st instruction is in a delay slot
3176       mov(Rbox,  Rscratch);
3177       or3(Rmark, markOopDesc::unlocked_value, Rmark);
3178       assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3179       casn(mark_addr.base(), Rmark, Rscratch);
3180       cmp(Rmark, Rscratch);
3181       brx(Assembler::equal, false, Assembler::pt, done);
3182       delayed()->sub(Rscratch, SP, Rscratch);
3183 
3184       // Stack-lock attempt failed - check for recursive stack-lock.
3185       // See the comments below about how we might remove this case.
3186 #ifdef _LP64
3187       sub(Rscratch, STACK_BIAS, Rscratch);
3188 #endif
3189       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3190       andcc(Rscratch, 0xfffff003, Rscratch);
3191       br(Assembler::always, false, Assembler::pt, done);
3192       delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3193 
3194       bind(IsInflated);
3195       if (EmitSync & 64) {
3196          // If m->owner != null goto IsLocked
3197          // Pessimistic form: Test-and-CAS vs CAS
3198          // The optimistic form avoids RTS->RTO cache line upgrades.
3199          ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3200          andcc(Rscratch, Rscratch, G0);
3201          brx(Assembler::notZero, false, Assembler::pn, done);
3202          delayed()->nop();
3203          // m->owner == null : it's unlocked.
3204       }
3205 
3206       // Try to CAS m->owner from null to Self
3207       // Invariant: if we acquire the lock then _recursions should be 0.
3208       add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3209       mov(G2_thread, Rscratch);
3210       casn(Rmark, G0, Rscratch);
3211       cmp(Rscratch, G0);
3212       // Intentional fall-through into done
3213    } else {
3214       // Aggressively avoid the Store-before-CAS penalty
3215       // Defer the store into box->dhw until after the CAS
3216       Label IsInflated, Recursive ;
3217 
3218 // Anticipate CAS -- Avoid RTS->RTO upgrade
3219 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
3220 
3221       ld_ptr(mark_addr, Rmark);           // fetch obj->mark
3222       // Triage: biased, stack-locked, neutral, inflated
3223 
3224       if (try_bias) {
3225         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3226         // Invariant: if control reaches this point in the emitted stream
3227         // then Rmark has not been modified.
3228       }
3229       andcc(Rmark, 2, G0);
3230       brx(Assembler::notZero, false, Assembler::pn, IsInflated);
3231       delayed()->                         // Beware - dangling delay-slot
3232 
3233       // Try stack-lock acquisition.
3234       // Transiently install BUSY (0) encoding in the mark word.
3235       // if the CAS of 0 into the mark was successful then we execute:
3236       //   ST box->dhw  = mark   -- save fetched mark in on-stack basiclock box
3237       //   ST obj->mark = box    -- overwrite transient 0 value
3238       // This presumes TSO, of course.
3239 
3240       mov(0, Rscratch);
3241       or3(Rmark, markOopDesc::unlocked_value, Rmark);
3242       assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3243       casn(mark_addr.base(), Rmark, Rscratch);
3244 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
3245       cmp(Rscratch, Rmark);
3246       brx(Assembler::notZero, false, Assembler::pn, Recursive);
3247       delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3248       if (counters != NULL) {
3249         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3250       }
3251       ba(done);
3252       delayed()->st_ptr(Rbox, mark_addr);
3253 
3254       bind(Recursive);
3255       // Stack-lock attempt failed - check for recursive stack-lock.
3256       // Tests show that we can remove the recursive case with no impact
3257       // on refworkload 0.83.  If we need to reduce the size of the code
3258       // emitted by compiler_lock_object() the recursive case is perfect
3259       // candidate.
3260       //
3261       // A more extreme idea is to always inflate on stack-lock recursion.
3262       // This lets us eliminate the recursive checks in compiler_lock_object
3263       // and compiler_unlock_object and the (box->dhw == 0) encoding.
3264       // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
3265       // and showed a performance *increase*.  In the same experiment I eliminated
3266       // the fast-path stack-lock code from the interpreter and always passed
3267       // control to the "slow" operators in synchronizer.cpp.
3268 
3269       // RScratch contains the fetched obj->mark value from the failed CASN.
3270 #ifdef _LP64
3271       sub(Rscratch, STACK_BIAS, Rscratch);
3272 #endif
3273       sub(Rscratch, SP, Rscratch);
3274       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3275       andcc(Rscratch, 0xfffff003, Rscratch);
3276       if (counters != NULL) {
3277         // Accounting needs the Rscratch register
3278         st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3279         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3280         ba_short(done);
3281       } else {
3282         ba(done);
3283         delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3284       }
3285 
3286       bind   (IsInflated);
3287       if (EmitSync & 64) {
3288          // If m->owner != null goto IsLocked
3289          // Test-and-CAS vs CAS
3290          // Pessimistic form avoids futile (doomed) CAS attempts
3291          // The optimistic form avoids RTS->RTO cache line upgrades.
3292          ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3293          andcc(Rscratch, Rscratch, G0);
3294          brx(Assembler::notZero, false, Assembler::pn, done);
3295          delayed()->nop();
3296          // m->owner == null : it's unlocked.
3297       }
3298 
3299       // Try to CAS m->owner from null to Self
3300       // Invariant: if we acquire the lock then _recursions should be 0.
3301       add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3302       mov(G2_thread, Rscratch);
3303       casn(Rmark, G0, Rscratch);
3304       cmp(Rscratch, G0);
3305       // ST box->displaced_header = NonZero.
3306       // Any non-zero value suffices:
3307       //    unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
3308       st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
3309       // Intentional fall-through into done
3310    }
3311 
3312    bind   (done);
3313 }
3314 
3315 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
3316                                             Register Rbox, Register Rscratch,
3317                                             bool try_bias) {
3318    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3319 
3320    Label done ;
3321 
3322    if (EmitSync & 4) {
3323      cmp(SP, G0);
3324      return ;
3325    }
3326 
3327    if (EmitSync & 8) {
3328      if (try_bias) {
3329         biased_locking_exit(mark_addr, Rscratch, done);
3330      }
3331 
3332      // Test first if it is a fast recursive unlock
3333      ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
3334      br_null_short(Rmark, Assembler::pt, done);
3335 
3336      // Check if it is still a light weight lock, this is is true if we see
3337      // the stack address of the basicLock in the markOop of the object
3338      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3339      casx_under_lock(mark_addr.base(), Rbox, Rmark,
3340        (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3341      ba(done);
3342      delayed()->cmp(Rbox, Rmark);
3343      bind(done);
3344      return ;
3345    }
3346 
3347    // Beware ... If the aggregate size of the code emitted by CLO and CUO is
3348    // is too large performance rolls abruptly off a cliff.
3349    // This could be related to inlining policies, code cache management, or
3350    // I$ effects.
3351    Label LStacked ;
3352 
3353    if (try_bias) {
3354       // TODO: eliminate redundant LDs of obj->mark
3355       biased_locking_exit(mark_addr, Rscratch, done);
3356    }
3357 
3358    ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark);
3359    ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
3360    andcc(Rscratch, Rscratch, G0);


3381    delayed()->
3382    ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
3383    ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
3384    orcc(Rbox, Rscratch, G0);
3385    if (EmitSync & 65536) {
3386       Label LSucc ;
3387       brx(Assembler::notZero, false, Assembler::pn, LSucc);
3388       delayed()->nop();
3389       ba(done);
3390       delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3391 
3392       bind(LSucc);
3393       st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3394       if (os::is_MP()) { membar (StoreLoad); }
3395       ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
3396       andcc(Rscratch, Rscratch, G0);
3397       brx(Assembler::notZero, false, Assembler::pt, done);
3398       delayed()->andcc(G0, G0, G0);
3399       add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3400       mov(G2_thread, Rscratch);
3401       casn(Rmark, G0, Rscratch);
3402       // invert icc.zf and goto done
3403       br_notnull(Rscratch, false, Assembler::pt, done);
3404       delayed()->cmp(G0, G0);
3405       ba(done);
3406       delayed()->cmp(G0, 1);
3407    } else {
3408       brx(Assembler::notZero, false, Assembler::pn, done);
3409       delayed()->nop();
3410       ba(done);
3411       delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3412    }
3413 
3414    bind   (LStacked);
3415    // Consider: we could replace the expensive CAS in the exit
3416    // path with a simple ST of the displaced mark value fetched from
3417    // the on-stack basiclock box.  That admits a race where a thread T2
3418    // in the slow lock path -- inflating with monitor M -- could race a
3419    // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
3420    // More precisely T1 in the stack-lock unlock path could "stomp" the
3421    // inflated mark value M installed by T2, resulting in an orphan


3423    // by having T2 periodically poll the object's mark word using timed wait
3424    // operations.  If T2 discovers that a stomp has occurred it vacates
3425    // the monitor M and wakes any other threads stranded on the now-orphan M.
3426    // In addition the monitor scavenger, which performs deflation,
3427    // would also need to check for orpan monitors and stranded threads.
3428    //
3429    // Finally, inflation is also used when T2 needs to assign a hashCode
3430    // to O and O is stack-locked by T1.  The "stomp" race could cause
3431    // an assigned hashCode value to be lost.  We can avoid that condition
3432    // and provide the necessary hashCode stability invariants by ensuring
3433    // that hashCode generation is idempotent between copying GCs.
3434    // For example we could compute the hashCode of an object O as
3435    // O's heap address XOR some high quality RNG value that is refreshed
3436    // at GC-time.  The monitor scavenger would install the hashCode
3437    // found in any orphan monitors.  Again, the mechanism admits a
3438    // lost-update "stomp" WAW race but detects and recovers as needed.
3439    //
3440    // A prototype implementation showed excellent results, although
3441    // the scavenger and timeout code was rather involved.
3442 
3443    casn(mark_addr.base(), Rbox, Rscratch);
3444    cmp(Rbox, Rscratch);
3445    // Intentional fall through into done ...
3446 
3447    bind(done);
3448 }
3449 
3450 
3451 
3452 void MacroAssembler::print_CPU_state() {
3453   // %%%%% need to implement this
3454 }
3455 
3456 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3457   // %%%%% need to implement this
3458 }
3459 
3460 void MacroAssembler::push_IU_state() {
3461   // %%%%% need to implement this
3462 }
3463 


3566       STOP("eden top is not properly aligned");
3567       bind(L);
3568     }
3569 #endif // ASSERT
3570     const Register free = end;
3571     sub(end, obj, free);                                   // compute amount of free space
3572     if (var_size_in_bytes->is_valid()) {
3573       // size is unknown at compile time
3574       cmp(free, var_size_in_bytes);
3575       br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
3576       delayed()->add(obj, var_size_in_bytes, end);
3577     } else {
3578       // size is known at compile time
3579       cmp(free, con_size_in_bytes);
3580       br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
3581       delayed()->add(obj, con_size_in_bytes, end);
3582     }
3583     // Compare obj with the value at top_addr; if still equal, swap the value of
3584     // end with the value at top_addr. If not equal, read the value at top_addr
3585     // into end.
3586     casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3587     // if someone beat us on the allocation, try again, otherwise continue
3588     cmp(obj, end);
3589     brx(Assembler::notEqual, false, Assembler::pn, retry);
3590     delayed()->mov(end, obj);                              // nop if successfull since obj == end
3591 
3592 #ifdef ASSERT
3593     // make sure eden top is properly aligned
3594     {
3595       Label L;
3596       const Register top_addr = t1;
3597 
3598       set((intx)ch->top_addr(), top_addr);
3599       ld_ptr(top_addr, 0, top_addr);
3600       btst(MinObjAlignmentInBytesMask, top_addr);
3601       br(Assembler::zero, false, Assembler::pt, L);
3602       delayed()->nop();
3603       STOP("eden top is not properly aligned");
3604       bind(L);
3605     }
3606 #endif // ASSERT




 101 bool AbstractAssembler::pd_check_instruction_mark() { return false; }
 102 #endif
 103 
 104 // Patch instruction inst at offset inst_pos to refer to dest_pos
 105 // and return the resulting instruction.
 106 // We should have pcs, not offsets, but since all is relative, it will work out
 107 // OK.
 108 int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) {
 109   int m; // mask for displacement field
 110   int v; // new value for displacement field
 111   const int word_aligned_ones = -4;
 112   switch (inv_op(inst)) {
 113   default: ShouldNotReachHere();
 114   case call_op:    m = wdisp(word_aligned_ones, 0, 30);  v = wdisp(dest_pos, inst_pos, 30); break;
 115   case branch_op:
 116     switch (inv_op2(inst)) {
 117       case fbp_op2:    m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
 118       case bp_op2:     m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
 119       case fb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
 120       case br_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;

 121       case bpr_op2: {
 122         if (is_cbcond(inst)) {
 123           m = wdisp10(word_aligned_ones, 0);
 124           v = wdisp10(dest_pos, inst_pos);
 125         } else {
 126           m = wdisp16(word_aligned_ones, 0);
 127           v = wdisp16(dest_pos, inst_pos);
 128         }
 129         break;
 130       }
 131       default: ShouldNotReachHere();
 132     }
 133   }
 134   return  inst & ~m  |  v;
 135 }
 136 
 137 // Return the offset of the branch destionation of instruction inst
 138 // at offset pos.
 139 // Should have pcs, but since all is relative, it works out.
 140 int MacroAssembler::branch_destination(int inst, int pos) {
 141   int r;
 142   switch (inv_op(inst)) {
 143   default: ShouldNotReachHere();
 144   case call_op:        r = inv_wdisp(inst, pos, 30);  break;
 145   case branch_op:
 146     switch (inv_op2(inst)) {
 147       case fbp_op2:    r = inv_wdisp(  inst, pos, 19);  break;
 148       case bp_op2:     r = inv_wdisp(  inst, pos, 19);  break;
 149       case fb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
 150       case br_op2:     r = inv_wdisp(  inst, pos, 22);  break;

 151       case bpr_op2: {
 152         if (is_cbcond(inst)) {
 153           r = inv_wdisp10(inst, pos);
 154         } else {
 155           r = inv_wdisp16(inst, pos);
 156         }
 157         break;
 158       }
 159       default: ShouldNotReachHere();
 160     }
 161   }
 162   return r;
 163 }
 164 
 165 void MacroAssembler::null_check(Register reg, int offset) {
 166   if (needs_explicit_null_check((intptr_t)offset)) {
 167     // provoke OS NULL exception if reg = NULL by
 168     // accessing M[reg] w/o changing any registers
 169     ld_ptr(reg, 0, G0);
 170   }


 306   } else {
 307     jmpl(a.base(), a.disp(), d);
 308   }
 309 }
 310 
 311 void MacroAssembler::jump(const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line) {
 312   jumpl(addrlit, temp, G0, offset, file, line);
 313 }
 314 
 315 
 316 // Conditional breakpoint (for assertion checks in assembly code)
 317 void MacroAssembler::breakpoint_trap(Condition c, CC cc) {
 318   trap(c, cc, G0, ST_RESERVED_FOR_USER_0);
 319 }
 320 
 321 // We want to use ST_BREAKPOINT here, but the debugger is confused by it.
 322 void MacroAssembler::breakpoint_trap() {
 323   trap(ST_RESERVED_FOR_USER_0);
 324 }
 325 






 326 // Write serialization page so VM thread can do a pseudo remote membar
 327 // We use the current thread pointer to calculate a thread specific
 328 // offset to write to within the page. This minimizes bus traffic
 329 // due to cache line collision.
 330 void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
 331   srl(thread, os::get_serialize_page_shift_count(), tmp2);
 332   if (Assembler::is_simm13(os::vm_page_size())) {
 333     and3(tmp2, (os::vm_page_size() - sizeof(int)), tmp2);
 334   }
 335   else {
 336     set((os::vm_page_size() - sizeof(int)), tmp1);
 337     and3(tmp2, tmp1, tmp2);
 338   }
 339   set(os::get_memory_serialize_page(), tmp1);
 340   st(G0, tmp1, tmp2);
 341 }
 342 
 343 
 344 
 345 void MacroAssembler::enter() {
 346   Unimplemented();
 347 }
 348 
 349 void MacroAssembler::leave() {
 350   Unimplemented();
 351 }
 352 
 353 void MacroAssembler::mult(Register s1, Register s2, Register d) {

 354   mulx (s1, s2, d);



 355 }
 356 
 357 void MacroAssembler::mult(Register s1, int simm13a, Register d) {

 358   mulx (s1, simm13a, d);




































































 359 }
 360 
 361 
 362 // Calls to C land
 363 
 364 #ifdef ASSERT
 365 // a hook for debugging
 366 static Thread* reinitialize_thread() {
 367   return ThreadLocalStorage::thread();
 368 }
 369 #else
 370 #define reinitialize_thread ThreadLocalStorage::thread
 371 #endif
 372 
 373 #ifdef ASSERT
 374 address last_get_thread = NULL;
 375 #endif
 376 
 377 // call this when G2_thread is not known to be valid
 378 void MacroAssembler::get_thread() {


1229       char b1[1024], b2[1024];
1230       sprintf(b1, "%f", val);
1231       sprintf(b2, "%f", d[last+1]);
1232       if (strcmp(b1, b2))
1233         break;
1234     }
1235     s->print("d%d", 2 * j);
1236     if ( j != last )  s->print(" - d%d", last);
1237     s->print(" = %f", val);
1238     s->fill_to(30);
1239     s->print("(0x%x)", *(int*)&val);
1240     s->fill_to(42);
1241     s->print_cr("(0x%x)", *(1 + (int*)&val));
1242     j = last + 1;
1243   }
1244   s->cr();
1245 }
1246 
1247 void RegistersForDebugging::save_registers(MacroAssembler* a) {
1248   a->sub(FP, round_to(sizeof(RegistersForDebugging), sizeof(jdouble)) - STACK_BIAS, O0);
1249   a->flushw();
1250   int i;
1251   for (i = 0; i < 8; ++i) {
1252     a->ld_ptr(as_iRegister(i)->address_in_saved_window().after_save(), L1);  a->st_ptr( L1, O0, i_offset(i));
1253     a->ld_ptr(as_lRegister(i)->address_in_saved_window().after_save(), L1);  a->st_ptr( L1, O0, l_offset(i));
1254     a->st_ptr(as_oRegister(i)->after_save(), O0, o_offset(i));
1255     a->st_ptr(as_gRegister(i)->after_save(), O0, g_offset(i));
1256   }
1257   for (i = 0;  i < 32; ++i) {
1258     a->stf(FloatRegisterImpl::S, as_FloatRegister(i), O0, f_offset(i));
1259   }
1260   for (i = 0; i < 64; i += 2) {
1261     a->stf(FloatRegisterImpl::D, as_FloatRegister(i), O0, d_offset(i));
1262   }
1263 }
1264 
1265 void RegistersForDebugging::restore_registers(MacroAssembler* a, Register r) {
1266   for (int i = 1; i < 8;  ++i) {
1267     a->ld_ptr(r, g_offset(i), as_gRegister(i));
1268   }
1269   for (int j = 0; j < 32; ++j) {
1270     a->ldf(FloatRegisterImpl::S, O0, f_offset(j), as_FloatRegister(j));
1271   }
1272   for (int k = 0; k < 64; k += 2) {
1273     a->ldf(FloatRegisterImpl::D, O0, d_offset(k), as_FloatRegister(k));
1274   }
1275 }
1276 
1277 
1278 // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
1279 void MacroAssembler::push_fTOS() {
1280   // %%%%%% need to implement this
1281 }
1282 
1283 // pops double TOS element from CPU stack and pushes on FPU stack
1284 void MacroAssembler::pop_fTOS() {
1285   // %%%%%% need to implement this
1286 }
1287 
1288 void MacroAssembler::empty_FPU_stack() {
1289   // %%%%%% need to implement this
1290 }
1291 
1292 void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * file, int line) {


1367   // Size of set() should stay the same
1368   patchable_set((intptr_t)real_msg, O1);
1369   // Load address to call to into O7
1370   load_ptr_contents(a, O7);
1371   // Register call to verify_oop_subroutine
1372   callr(O7, G0);
1373   delayed()->nop();
1374   // recover frame size
1375   add(SP, 8*8,SP);
1376 }
1377 
1378 // side-door communication with signalHandler in os_solaris.cpp
1379 address MacroAssembler::_verify_oop_implicit_branch[3] = { NULL };
1380 
1381 // This macro is expanded just once; it creates shared code.  Contract:
1382 // receives an oop in O0.  Must restore O0 & O7 from TLS.  Must not smash ANY
1383 // registers, including flags.  May not use a register 'save', as this blows
1384 // the high bits of the O-regs if they contain Long values.  Acts as a 'leaf'
1385 // call.
1386 void MacroAssembler::verify_oop_subroutine() {


1387   // Leaf call; no frame.
1388   Label succeed, fail, null_or_fail;
1389 
1390   // O0 and O7 were saved already (O0 in O0's TLS home, O7 in O5's TLS home).
1391   // O0 is now the oop to be checked.  O7 is the return address.
1392   Register O0_obj = O0;
1393 
1394   // Save some more registers for temps.
1395   stx(O2,SP,frame::register_save_words*wordSize+STACK_BIAS+2*8);
1396   stx(O3,SP,frame::register_save_words*wordSize+STACK_BIAS+3*8);
1397   stx(O4,SP,frame::register_save_words*wordSize+STACK_BIAS+4*8);
1398   stx(O5,SP,frame::register_save_words*wordSize+STACK_BIAS+5*8);
1399 
1400   // Save flags
1401   Register O5_save_flags = O5;
1402   rdccr( O5_save_flags );
1403 
1404   { // count number of verifies
1405     Register O2_adr   = O2;
1406     Register O3_accum = O3;


1770 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
1771                            Register Rb_hi, Register Rb_low,
1772                            Register Rresult) {
1773 
1774   Label check_low_parts, done;
1775 
1776   cmp(Ra_hi, Rb_hi );  // compare hi parts
1777   br(equal, true, pt, check_low_parts);
1778   delayed()->cmp(Ra_low, Rb_low); // test low parts
1779 
1780   // And, with an unsigned comparison, it does not matter if the numbers
1781   // are negative or not.
1782   // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff.
1783   // The second one is bigger (unsignedly).
1784 
1785   // Other notes:  The first move in each triplet can be unconditional
1786   // (and therefore probably prefetchable).
1787   // And the equals case for the high part does not need testing,
1788   // since that triplet is reached only after finding the high halves differ.
1789 

1790   mov(-1, Rresult);
1791   ba(done);
1792   delayed()->movcc(greater, false, icc,  1, Rresult);



1793 
1794   bind(check_low_parts);
1795 

1796   mov(                               -1, Rresult);
1797   movcc(equal,           false, icc,  0, Rresult);
1798   movcc(greaterUnsigned, false, icc,  1, Rresult);
1799 
1800   bind(done);




1801 }
1802 
1803 void MacroAssembler::lneg( Register Rhi, Register Rlow ) {
1804   subcc(  G0, Rlow, Rlow );
1805   subc(   G0, Rhi,  Rhi  );
1806 }
1807 
1808 void MacroAssembler::lshl( Register Rin_high,  Register Rin_low,
1809                            Register Rcount,
1810                            Register Rout_high, Register Rout_low,
1811                            Register Rtemp ) {
1812 
1813 
1814   Register Ralt_count = Rtemp;
1815   Register Rxfer_bits = Rtemp;
1816 
1817   assert( Ralt_count != Rin_high
1818       &&  Ralt_count != Rin_low
1819       &&  Ralt_count != Rcount
1820       &&  Rxfer_bits != Rin_low


2008   case  2:  is_signed ? ldsh(src, dst) : lduh(src, dst); break;
2009   case  1:  is_signed ? ldsb(src, dst) : ldub(src, dst); break;
2010   default:  ShouldNotReachHere();
2011   }
2012 }
2013 
2014 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2015   switch (size_in_bytes) {
2016   case  8:  st_long(src, dst); break;
2017   case  4:  st(     src, dst); break;
2018   case  2:  sth(    src, dst); break;
2019   case  1:  stb(    src, dst); break;
2020   default:  ShouldNotReachHere();
2021   }
2022 }
2023 
2024 
2025 void MacroAssembler::float_cmp( bool is_float, int unordered_result,
2026                                 FloatRegister Fa, FloatRegister Fb,
2027                                 Register Rresult) {
2028   if (is_float) {
2029     fcmp(FloatRegisterImpl::S, fcc0, Fa, Fb);











2030   } else {
2031     fcmp(FloatRegisterImpl::D, fcc0, Fa, Fb);







2032   }


2033 
2034   if (unordered_result == 1) {
2035     mov(                                    -1, Rresult);
2036     movcc(f_equal,              true, fcc0,  0, Rresult);
2037     movcc(f_unorderedOrGreater, true, fcc0,  1, Rresult);







































2038   } else {
2039     mov(                                    -1, Rresult);
2040     movcc(f_equal,              true, fcc0,  0, Rresult);
2041     movcc(f_greater,            true, fcc0,  1, Rresult);








2042   }
2043 }
2044 




























2045 
2046 void MacroAssembler::save_all_globals_into_locals() {
2047   mov(G1,L1);
2048   mov(G2,L2);
2049   mov(G3,L3);
2050   mov(G4,L4);
2051   mov(G5,L5);
2052   mov(G6,L6);
2053   mov(G7,L7);
2054 }
2055 
2056 void MacroAssembler::restore_globals_from_locals() {
2057   mov(L1,G1);
2058   mov(L2,G2);
2059   mov(L3,G3);
2060   mov(L4,G4);
2061   mov(L5,G5);
2062   mov(L6,G6);
2063   mov(L7,G7);
2064 }
2065 

































































































































2066 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2067                                                       Register tmp,
2068                                                       int offset) {
2069   intptr_t value = *delayed_value_addr;
2070   if (value != 0)
2071     return RegisterOrConstant(value + offset);
2072 
2073   // load indirectly to solve generation ordering problem
2074   AddressLiteral a(delayed_value_addr);
2075   load_ptr_contents(a, tmp);
2076 
2077 #ifdef ASSERT
2078   tst(tmp);
2079   breakpoint_trap(zero, xcc);
2080 #endif
2081 
2082   if (offset != 0)
2083     add(tmp, offset, tmp);
2084 
2085   return RegisterOrConstant(tmp);


2637   // bits of the mark word are equal to the epoch bits of the
2638   // prototype header. (Note that the prototype header's epoch bits
2639   // only change at a safepoint.) If not, attempt to rebias the object
2640   // toward the current thread. Note that we must be absolutely sure
2641   // that the current epoch is invalid in order to do this because
2642   // otherwise the manipulations it performs on the mark word are
2643   // illegal.
2644   delayed()->btst(markOopDesc::epoch_mask_in_place, temp_reg);
2645   brx(Assembler::notZero, false, Assembler::pn, try_rebias);
2646 
2647   // The epoch of the current bias is still valid but we know nothing
2648   // about the owner; it might be set or it might be clear. Try to
2649   // acquire the bias of the object using an atomic operation. If this
2650   // fails we will go in to the runtime to revoke the object's bias.
2651   // Note that we first construct the presumed unbiased header so we
2652   // don't accidentally blow away another thread's valid bias.
2653   delayed()->and3(mark_reg,
2654                   markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
2655                   mark_reg);
2656   or3(G2_thread, mark_reg, temp_reg);
2657   cas_ptr(mark_addr.base(), mark_reg, temp_reg);
2658   // If the biasing toward our thread failed, this means that
2659   // another thread succeeded in biasing it toward itself and we
2660   // need to revoke that bias. The revocation will occur in the
2661   // interpreter runtime in the slow case.
2662   cmp(mark_reg, temp_reg);
2663   if (counters != NULL) {
2664     cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg);
2665   }
2666   if (slow_case != NULL) {
2667     brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
2668     delayed()->nop();
2669   }
2670   ba_short(done);
2671 
2672   bind(try_rebias);
2673   // At this point we know the epoch has expired, meaning that the
2674   // current "bias owner", if any, is actually invalid. Under these
2675   // circumstances _only_, we are allowed to use the current header's
2676   // value as the comparison value when doing the cas to acquire the
2677   // bias in the current epoch. In other words, we allow transfer of
2678   // the bias from one thread to another directly in this situation.
2679   //
2680   // FIXME: due to a lack of registers we currently blow away the age
2681   // bits in this situation. Should attempt to preserve them.
2682   load_klass(obj_reg, temp_reg);
2683   ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
2684   or3(G2_thread, temp_reg, temp_reg);
2685   cas_ptr(mark_addr.base(), mark_reg, temp_reg);
2686   // If the biasing toward our thread failed, this means that
2687   // another thread succeeded in biasing it toward itself and we
2688   // need to revoke that bias. The revocation will occur in the
2689   // interpreter runtime in the slow case.
2690   cmp(mark_reg, temp_reg);
2691   if (counters != NULL) {
2692     cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg);
2693   }
2694   if (slow_case != NULL) {
2695     brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
2696     delayed()->nop();
2697   }
2698   ba_short(done);
2699 
2700   bind(try_revoke_bias);
2701   // The prototype mark in the klass doesn't have the bias bit set any
2702   // more, indicating that objects of this data type are not supposed
2703   // to be biased any more. We are going to try to reset the mark of
2704   // this object to the prototype value and fall through to the
2705   // CAS-based locking scheme. Note that if our CAS fails, it means
2706   // that another thread raced us for the privilege of revoking the
2707   // bias of this particular object, so it's okay to continue in the
2708   // normal locking code.
2709   //
2710   // FIXME: due to a lack of registers we currently blow away the age
2711   // bits in this situation. Should attempt to preserve them.
2712   load_klass(obj_reg, temp_reg);
2713   ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
2714   cas_ptr(mark_addr.base(), mark_reg, temp_reg);
2715   // Fall through to the normal CAS-based lock, because no matter what
2716   // the result of the above CAS, some thread must have succeeded in
2717   // removing the bias bit from the object's header.
2718   if (counters != NULL) {
2719     cmp(mark_reg, temp_reg);
2720     cond_inc(Assembler::zero, (address) counters->revoked_lock_entry_count_addr(), mark_reg, temp_reg);
2721   }
2722 
2723   bind(cas_label);
2724 }
2725 
2726 void MacroAssembler::biased_locking_exit (Address mark_addr, Register temp_reg, Label& done,
2727                                           bool allow_delay_slot_filling) {
2728   // Check for biased locking unlock case, which is a no-op
2729   // Note: we do not have to check the thread ID for two reasons.
2730   // First, the interpreter checks for IllegalMonitorStateException at
2731   // a higher level. Second, if the bias was revoked while we held the
2732   // lock, the object could not be rebiased toward another thread, so
2733   // the bias bit would be clear.
2734   ld_ptr(mark_addr, temp_reg);
2735   and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
2736   cmp(temp_reg, markOopDesc::biased_lock_pattern);
2737   brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done);
2738   delayed();
2739   if (!allow_delay_slot_filling) {
2740     nop();
2741   }
2742 }
2743 
2744 









2745 // compiler_lock_object() and compiler_unlock_object() are direct transliterations
2746 // of i486.ad fast_lock() and fast_unlock().  See those methods for detailed comments.
2747 // The code could be tightened up considerably.
2748 //
2749 // box->dhw disposition - post-conditions at DONE_LABEL.
2750 // -   Successful inflated lock:  box->dhw != 0.
2751 //     Any non-zero value suffices.
2752 //     Consider G2_thread, rsp, boxReg, or unused_mark()
2753 // -   Successful Stack-lock: box->dhw == mark.
2754 //     box->dhw must contain the displaced mark word value
2755 // -   Failure -- icc.ZFlag == 0 and box->dhw is undefined.
2756 //     The slow-path fast_enter() and slow_enter() operators
2757 //     are responsible for setting box->dhw = NonZero (typically ::unused_mark).
2758 // -   Biased: box->dhw is undefined
2759 //
2760 // SPARC refworkload performance - specifically jetstream and scimark - are
2761 // extremely sensitive to the size of the code emitted by compiler_lock_object
2762 // and compiler_unlock_object.  Critically, the key factor is code size, not path
2763 // length.  (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
2764 // effect).


2787    if (EmitSync & 2) {
2788 
2789      // Fetch object's markword
2790      ld_ptr(mark_addr, Rmark);
2791 
2792      if (try_bias) {
2793         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
2794      }
2795 
2796      // Save Rbox in Rscratch to be used for the cas operation
2797      mov(Rbox, Rscratch);
2798 
2799      // set Rmark to markOop | markOopDesc::unlocked_value
2800      or3(Rmark, markOopDesc::unlocked_value, Rmark);
2801 
2802      // Initialize the box.  (Must happen before we update the object mark!)
2803      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2804 
2805      // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
2806      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2807      cas_ptr(mark_addr.base(), Rmark, Rscratch);

2808 
2809      // if compare/exchange succeeded we found an unlocked object and we now have locked it
2810      // hence we are done
2811      cmp(Rmark, Rscratch);
2812 #ifdef _LP64
2813      sub(Rscratch, STACK_BIAS, Rscratch);
2814 #endif
2815      brx(Assembler::equal, false, Assembler::pt, done);
2816      delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
2817 
2818      // we did not find an unlocked object so see if this is a recursive case
2819      // sub(Rscratch, SP, Rscratch);
2820      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2821      andcc(Rscratch, 0xfffff003, Rscratch);
2822      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2823      bind (done);
2824      return ;
2825    }
2826 
2827    Label Egress ;


2833       // Triage: biased, stack-locked, neutral, inflated
2834       if (try_bias) {
2835         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
2836         // Invariant: if control reaches this point in the emitted stream
2837         // then Rmark has not been modified.
2838       }
2839 
2840       // Store mark into displaced mark field in the on-stack basic-lock "box"
2841       // Critically, this must happen before the CAS
2842       // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
2843       st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2844       andcc(Rmark, 2, G0);
2845       brx(Assembler::notZero, false, Assembler::pn, IsInflated);
2846       delayed()->
2847 
2848       // Try stack-lock acquisition.
2849       // Beware: the 1st instruction is in a delay slot
2850       mov(Rbox,  Rscratch);
2851       or3(Rmark, markOopDesc::unlocked_value, Rmark);
2852       assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2853       cas_ptr(mark_addr.base(), Rmark, Rscratch);
2854       cmp(Rmark, Rscratch);
2855       brx(Assembler::equal, false, Assembler::pt, done);
2856       delayed()->sub(Rscratch, SP, Rscratch);
2857 
2858       // Stack-lock attempt failed - check for recursive stack-lock.
2859       // See the comments below about how we might remove this case.
2860 #ifdef _LP64
2861       sub(Rscratch, STACK_BIAS, Rscratch);
2862 #endif
2863       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2864       andcc(Rscratch, 0xfffff003, Rscratch);
2865       br(Assembler::always, false, Assembler::pt, done);
2866       delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2867 
2868       bind(IsInflated);
2869       if (EmitSync & 64) {
2870          // If m->owner != null goto IsLocked
2871          // Pessimistic form: Test-and-CAS vs CAS
2872          // The optimistic form avoids RTS->RTO cache line upgrades.
2873          ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
2874          andcc(Rscratch, Rscratch, G0);
2875          brx(Assembler::notZero, false, Assembler::pn, done);
2876          delayed()->nop();
2877          // m->owner == null : it's unlocked.
2878       }
2879 
2880       // Try to CAS m->owner from null to Self
2881       // Invariant: if we acquire the lock then _recursions should be 0.
2882       add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
2883       mov(G2_thread, Rscratch);
2884       cas_ptr(Rmark, G0, Rscratch);
2885       cmp(Rscratch, G0);
2886       // Intentional fall-through into done
2887    } else {
2888       // Aggressively avoid the Store-before-CAS penalty
2889       // Defer the store into box->dhw until after the CAS
2890       Label IsInflated, Recursive ;
2891 
2892 // Anticipate CAS -- Avoid RTS->RTO upgrade
2893 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
2894 
2895       ld_ptr(mark_addr, Rmark);           // fetch obj->mark
2896       // Triage: biased, stack-locked, neutral, inflated
2897 
2898       if (try_bias) {
2899         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
2900         // Invariant: if control reaches this point in the emitted stream
2901         // then Rmark has not been modified.
2902       }
2903       andcc(Rmark, 2, G0);
2904       brx(Assembler::notZero, false, Assembler::pn, IsInflated);
2905       delayed()->                         // Beware - dangling delay-slot
2906 
2907       // Try stack-lock acquisition.
2908       // Transiently install BUSY (0) encoding in the mark word.
2909       // if the CAS of 0 into the mark was successful then we execute:
2910       //   ST box->dhw  = mark   -- save fetched mark in on-stack basiclock box
2911       //   ST obj->mark = box    -- overwrite transient 0 value
2912       // This presumes TSO, of course.
2913 
2914       mov(0, Rscratch);
2915       or3(Rmark, markOopDesc::unlocked_value, Rmark);
2916       assert(mark_addr.disp() == 0, "cas must take a zero displacement");
2917       cas_ptr(mark_addr.base(), Rmark, Rscratch);
2918 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
2919       cmp(Rscratch, Rmark);
2920       brx(Assembler::notZero, false, Assembler::pn, Recursive);
2921       delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
2922       if (counters != NULL) {
2923         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
2924       }
2925       ba(done);
2926       delayed()->st_ptr(Rbox, mark_addr);
2927 
2928       bind(Recursive);
2929       // Stack-lock attempt failed - check for recursive stack-lock.
2930       // Tests show that we can remove the recursive case with no impact
2931       // on refworkload 0.83.  If we need to reduce the size of the code
2932       // emitted by compiler_lock_object() the recursive case is perfect
2933       // candidate.
2934       //
2935       // A more extreme idea is to always inflate on stack-lock recursion.
2936       // This lets us eliminate the recursive checks in compiler_lock_object
2937       // and compiler_unlock_object and the (box->dhw == 0) encoding.
2938       // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
2939       // and showed a performance *increase*.  In the same experiment I eliminated
2940       // the fast-path stack-lock code from the interpreter and always passed
2941       // control to the "slow" operators in synchronizer.cpp.
2942 
2943       // RScratch contains the fetched obj->mark value from the failed CAS.
2944 #ifdef _LP64
2945       sub(Rscratch, STACK_BIAS, Rscratch);
2946 #endif
2947       sub(Rscratch, SP, Rscratch);
2948       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
2949       andcc(Rscratch, 0xfffff003, Rscratch);
2950       if (counters != NULL) {
2951         // Accounting needs the Rscratch register
2952         st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2953         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
2954         ba_short(done);
2955       } else {
2956         ba(done);
2957         delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
2958       }
2959 
2960       bind   (IsInflated);
2961       if (EmitSync & 64) {
2962          // If m->owner != null goto IsLocked
2963          // Test-and-CAS vs CAS
2964          // Pessimistic form avoids futile (doomed) CAS attempts
2965          // The optimistic form avoids RTS->RTO cache line upgrades.
2966          ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
2967          andcc(Rscratch, Rscratch, G0);
2968          brx(Assembler::notZero, false, Assembler::pn, done);
2969          delayed()->nop();
2970          // m->owner == null : it's unlocked.
2971       }
2972 
2973       // Try to CAS m->owner from null to Self
2974       // Invariant: if we acquire the lock then _recursions should be 0.
2975       add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
2976       mov(G2_thread, Rscratch);
2977       cas_ptr(Rmark, G0, Rscratch);
2978       cmp(Rscratch, G0);
2979       // ST box->displaced_header = NonZero.
2980       // Any non-zero value suffices:
2981       //    unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
2982       st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
2983       // Intentional fall-through into done
2984    }
2985 
2986    bind   (done);
2987 }
2988 
2989 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
2990                                             Register Rbox, Register Rscratch,
2991                                             bool try_bias) {
2992    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
2993 
2994    Label done ;
2995 
2996    if (EmitSync & 4) {
2997      cmp(SP, G0);
2998      return ;
2999    }
3000 
3001    if (EmitSync & 8) {
3002      if (try_bias) {
3003         biased_locking_exit(mark_addr, Rscratch, done);
3004      }
3005 
3006      // Test first if it is a fast recursive unlock
3007      ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
3008      br_null_short(Rmark, Assembler::pt, done);
3009 
3010      // Check if it is still a light weight lock, this is is true if we see
3011      // the stack address of the basicLock in the markOop of the object
3012      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3013      cas_ptr(mark_addr.base(), Rbox, Rmark);

3014      ba(done);
3015      delayed()->cmp(Rbox, Rmark);
3016      bind(done);
3017      return ;
3018    }
3019 
3020    // Beware ... If the aggregate size of the code emitted by CLO and CUO is
3021    // is too large performance rolls abruptly off a cliff.
3022    // This could be related to inlining policies, code cache management, or
3023    // I$ effects.
3024    Label LStacked ;
3025 
3026    if (try_bias) {
3027       // TODO: eliminate redundant LDs of obj->mark
3028       biased_locking_exit(mark_addr, Rscratch, done);
3029    }
3030 
3031    ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark);
3032    ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
3033    andcc(Rscratch, Rscratch, G0);


3054    delayed()->
3055    ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
3056    ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
3057    orcc(Rbox, Rscratch, G0);
3058    if (EmitSync & 65536) {
3059       Label LSucc ;
3060       brx(Assembler::notZero, false, Assembler::pn, LSucc);
3061       delayed()->nop();
3062       ba(done);
3063       delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3064 
3065       bind(LSucc);
3066       st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3067       if (os::is_MP()) { membar (StoreLoad); }
3068       ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
3069       andcc(Rscratch, Rscratch, G0);
3070       brx(Assembler::notZero, false, Assembler::pt, done);
3071       delayed()->andcc(G0, G0, G0);
3072       add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3073       mov(G2_thread, Rscratch);
3074       cas_ptr(Rmark, G0, Rscratch);
3075       // invert icc.zf and goto done
3076       br_notnull(Rscratch, false, Assembler::pt, done);
3077       delayed()->cmp(G0, G0);
3078       ba(done);
3079       delayed()->cmp(G0, 1);
3080    } else {
3081       brx(Assembler::notZero, false, Assembler::pn, done);
3082       delayed()->nop();
3083       ba(done);
3084       delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3085    }
3086 
3087    bind   (LStacked);
3088    // Consider: we could replace the expensive CAS in the exit
3089    // path with a simple ST of the displaced mark value fetched from
3090    // the on-stack basiclock box.  That admits a race where a thread T2
3091    // in the slow lock path -- inflating with monitor M -- could race a
3092    // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
3093    // More precisely T1 in the stack-lock unlock path could "stomp" the
3094    // inflated mark value M installed by T2, resulting in an orphan


3096    // by having T2 periodically poll the object's mark word using timed wait
3097    // operations.  If T2 discovers that a stomp has occurred it vacates
3098    // the monitor M and wakes any other threads stranded on the now-orphan M.
3099    // In addition the monitor scavenger, which performs deflation,
3100    // would also need to check for orpan monitors and stranded threads.
3101    //
3102    // Finally, inflation is also used when T2 needs to assign a hashCode
3103    // to O and O is stack-locked by T1.  The "stomp" race could cause
3104    // an assigned hashCode value to be lost.  We can avoid that condition
3105    // and provide the necessary hashCode stability invariants by ensuring
3106    // that hashCode generation is idempotent between copying GCs.
3107    // For example we could compute the hashCode of an object O as
3108    // O's heap address XOR some high quality RNG value that is refreshed
3109    // at GC-time.  The monitor scavenger would install the hashCode
3110    // found in any orphan monitors.  Again, the mechanism admits a
3111    // lost-update "stomp" WAW race but detects and recovers as needed.
3112    //
3113    // A prototype implementation showed excellent results, although
3114    // the scavenger and timeout code was rather involved.
3115 
3116    cas_ptr(mark_addr.base(), Rbox, Rscratch);
3117    cmp(Rbox, Rscratch);
3118    // Intentional fall through into done ...
3119 
3120    bind(done);
3121 }
3122 
3123 
3124 
3125 void MacroAssembler::print_CPU_state() {
3126   // %%%%% need to implement this
3127 }
3128 
3129 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3130   // %%%%% need to implement this
3131 }
3132 
3133 void MacroAssembler::push_IU_state() {
3134   // %%%%% need to implement this
3135 }
3136 


3239       STOP("eden top is not properly aligned");
3240       bind(L);
3241     }
3242 #endif // ASSERT
3243     const Register free = end;
3244     sub(end, obj, free);                                   // compute amount of free space
3245     if (var_size_in_bytes->is_valid()) {
3246       // size is unknown at compile time
3247       cmp(free, var_size_in_bytes);
3248       br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
3249       delayed()->add(obj, var_size_in_bytes, end);
3250     } else {
3251       // size is known at compile time
3252       cmp(free, con_size_in_bytes);
3253       br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
3254       delayed()->add(obj, con_size_in_bytes, end);
3255     }
3256     // Compare obj with the value at top_addr; if still equal, swap the value of
3257     // end with the value at top_addr. If not equal, read the value at top_addr
3258     // into end.
3259     cas_ptr(top_addr, obj, end);
3260     // if someone beat us on the allocation, try again, otherwise continue
3261     cmp(obj, end);
3262     brx(Assembler::notEqual, false, Assembler::pn, retry);
3263     delayed()->mov(end, obj);                              // nop if successfull since obj == end
3264 
3265 #ifdef ASSERT
3266     // make sure eden top is properly aligned
3267     {
3268       Label L;
3269       const Register top_addr = t1;
3270 
3271       set((intx)ch->top_addr(), top_addr);
3272       ld_ptr(top_addr, 0, top_addr);
3273       btst(MinObjAlignmentInBytesMask, top_addr);
3274       br(Assembler::zero, false, Assembler::pt, L);
3275       delayed()->nop();
3276       STOP("eden top is not properly aligned");
3277       bind(L);
3278     }
3279 #endif // ASSERT