89 const char* Argument::name() const {
90 int nofArgs = sizeof argumentNames / sizeof argumentNames[0];
91 int num = number();
92 if (num >= nofArgs) num = nofArgs - 1;
93 return argumentNames[num][is_in() ? 1 : 0];
94 }
95
96 void Assembler::print_instruction(int inst) {
97 const char* s;
98 switch (inv_op(inst)) {
99 default: s = "????"; break;
100 case call_op: s = "call"; break;
101 case branch_op:
102 switch (inv_op2(inst)) {
103 case fb_op2: s = "fb"; break;
104 case fbp_op2: s = "fbp"; break;
105 case br_op2: s = "br"; break;
106 case bp_op2: s = "bp"; break;
107 case cb_op2: s = "cb"; break;
108 case bpr_op2: {
109 if (is_cbc(inst)) {
110 s = is_cxb(inst) ? "cxb" : "cwb";
111 } else {
112 s = "bpr";
113 }
114 break;
115 }
116 default: s = "????"; break;
117 }
118 }
119 ::tty->print("%s", s);
120 }
121
122
123 // Patch instruction inst at offset inst_pos to refer to dest_pos
124 // and return the resulting instruction.
125 // We should have pcs, not offsets, but since all is relative, it will work out
126 // OK.
127 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
128
129 int m; // mask for displacement field
130 int v; // new value for displacement field
131 const int word_aligned_ones = -4;
132 switch (inv_op(inst)) {
133 default: ShouldNotReachHere();
134 case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break;
135 case branch_op:
136 switch (inv_op2(inst)) {
137 case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break;
138 case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break;
139 case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
140 case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
141 case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
142 case bpr_op2: {
143 if (is_cbc(inst)) {
144 m = wdisp10(word_aligned_ones, 0);
145 v = wdisp10(dest_pos, inst_pos);
146 } else {
147 m = wdisp16(word_aligned_ones, 0);
148 v = wdisp16(dest_pos, inst_pos);
149 }
150 break;
151 }
152 default: ShouldNotReachHere();
153 }
154 }
155 return inst & ~m | v;
156 }
157
158 // Return the offset of the branch destionation of instruction inst
159 // at offset pos.
160 // Should have pcs, but since all is relative, it works out.
161 int Assembler::branch_destination(int inst, int pos) {
162 int r;
163 switch (inv_op(inst)) {
164 default: ShouldNotReachHere();
165 case call_op: r = inv_wdisp(inst, pos, 30); break;
166 case branch_op:
167 switch (inv_op2(inst)) {
168 case fbp_op2: r = inv_wdisp( inst, pos, 19); break;
169 case bp_op2: r = inv_wdisp( inst, pos, 19); break;
170 case fb_op2: r = inv_wdisp( inst, pos, 22); break;
171 case br_op2: r = inv_wdisp( inst, pos, 22); break;
172 case cb_op2: r = inv_wdisp( inst, pos, 22); break;
173 case bpr_op2: {
174 if (is_cbc(inst)) {
175 r = inv_wdisp10(inst, pos);
176 } else {
177 r = inv_wdisp16(inst, pos);
178 }
179 break;
180 }
181 default: ShouldNotReachHere();
182 }
183 }
184 return r;
185 }
186
187 int AbstractAssembler::code_fill_byte() {
188 return 0x00; // illegal instruction 0x00000000
189 }
190
191 Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
192 switch (in) {
193 case rc_z: return equal;
194 case rc_lez: return lessEqual;
974 }
975 }
976
977
978 // %%% maybe get rid of [re]set_last_Java_frame
979 void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_Java_pc) {
980 assert_not_delayed();
981 Address flags(G2_thread, JavaThread::frame_anchor_offset() +
982 JavaFrameAnchor::flags_offset());
983 Address pc_addr(G2_thread, JavaThread::last_Java_pc_offset());
984
985 // Always set last_Java_pc and flags first because once last_Java_sp is visible
986 // has_last_Java_frame is true and users will look at the rest of the fields.
987 // (Note: flags should always be zero before we get here so doesn't need to be set.)
988
989 #ifdef ASSERT
990 // Verify that flags was zeroed on return to Java
991 Label PcOk;
992 save_frame(0); // to avoid clobbering O0
993 ld_ptr(pc_addr, L0);
994 br_null(L0, false, Assembler::pt, PcOk);
995 stop("last_Java_pc not zeroed before leaving Java");
996 bind(PcOk);
997
998 // Verify that flags was zeroed on return to Java
999 Label FlagsOk;
1000 ld(flags, L0);
1001 tst(L0);
1002 br(Assembler::zero, false, Assembler::pt, FlagsOk);
1003 delayed() -> restore();
1004 stop("flags not zeroed before leaving Java");
1005 bind(FlagsOk);
1006 #endif /* ASSERT */
1007 //
1008 // When returning from calling out from Java mode the frame anchor's last_Java_pc
1009 // will always be set to NULL. It is set here so that if we are doing a call to
1010 // native (not VM) that we capture the known pc and don't have to rely on the
1011 // native call having a standard frame linkage where we can find the pc.
1012
1013 if (last_Java_pc->is_valid()) {
1014 st_ptr(last_Java_pc, pc_addr);
1099 set(badHeapWordVal, G3);
1100 set(badHeapWordVal, G4);
1101 set(badHeapWordVal, G5);
1102 #endif
1103
1104 // get oop result if there is one and reset the value in the thread
1105 if (oop_result->is_valid()) {
1106 get_vm_result(oop_result);
1107 }
1108 }
1109
1110 void MacroAssembler::check_and_forward_exception(Register scratch_reg)
1111 {
1112 Label L;
1113
1114 check_and_handle_popframe(scratch_reg);
1115 check_and_handle_earlyret(scratch_reg);
1116
1117 Address exception_addr(G2_thread, Thread::pending_exception_offset());
1118 ld_ptr(exception_addr, scratch_reg);
1119 br_null(scratch_reg,false,pt,L);
1120 // we use O7 linkage so that forward_exception_entry has the issuing PC
1121 call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1122 delayed()->nop();
1123 bind(L);
1124 }
1125
1126
1127 void MacroAssembler::check_and_handle_popframe(Register scratch_reg) {
1128 }
1129
1130
1131 void MacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
1132 }
1133
1134
1135 void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1136 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
1137 }
1138
1139
1873 Register O2_adr = O2;
1874 Register O3_accum = O3;
1875 inc_counter(StubRoutines::verify_oop_count_addr(), O2_adr, O3_accum);
1876 }
1877
1878 Register O2_mask = O2;
1879 Register O3_bits = O3;
1880 Register O4_temp = O4;
1881
1882 // mark lower end of faulting range
1883 assert(_verify_oop_implicit_branch[0] == NULL, "set once");
1884 _verify_oop_implicit_branch[0] = pc();
1885
1886 // We can't check the mark oop because it could be in the process of
1887 // locking or unlocking while this is running.
1888 set(Universe::verify_oop_mask (), O2_mask);
1889 set(Universe::verify_oop_bits (), O3_bits);
1890
1891 // assert((obj & oop_mask) == oop_bits);
1892 and3(O0_obj, O2_mask, O4_temp);
1893 cmp_and_brx(O4_temp, O3_bits, notEqual, false, pn, null_or_fail);
1894
1895 if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
1896 // the null_or_fail case is useless; must test for null separately
1897 br_null(O0_obj, false, pn, succeed);
1898 }
1899
1900 // Check the klassOop of this object for being in the right area of memory.
1901 // Cannot do the load in the delay above slot in case O0 is null
1902 load_klass(O0_obj, O0_obj);
1903 // assert((klass & klass_mask) == klass_bits);
1904 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
1905 set(Universe::verify_klass_mask(), O2_mask);
1906 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
1907 set(Universe::verify_klass_bits(), O3_bits);
1908 and3(O0_obj, O2_mask, O4_temp);
1909 cmp_and_brx(O4_temp, O3_bits, notEqual, false, pn, fail);
1910 // Check the klass's klass
1911 load_klass(O0_obj, O0_obj);
1912 and3(O0_obj, O2_mask, O4_temp);
1913 cmp(O4_temp, O3_bits);
1914 brx(notEqual, false, pn, fail);
1915 delayed()->wrccr( O5_save_flags ); // Restore CCR's
1916
1917 // mark upper end of faulting range
1918 _verify_oop_implicit_branch[1] = pc();
1919
1920 //-----------------------
1921 // all tests pass
1922 bind(succeed);
1923
1924 // Restore prior 64-bit registers
1925 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+0*8,O0);
1926 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+1*8,O1);
1927 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+2*8,O2);
1928 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+3*8,O3);
1929 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+4*8,O4);
2117 }
2118
2119
2120 // ---------------------------------------------------------
2121 Assembler::RCondition cond2rcond(Assembler::Condition c) {
2122 switch (c) {
2123 /*case zero: */
2124 case Assembler::equal: return Assembler::rc_z;
2125 case Assembler::lessEqual: return Assembler::rc_lez;
2126 case Assembler::less: return Assembler::rc_lz;
2127 /*case notZero:*/
2128 case Assembler::notEqual: return Assembler::rc_nz;
2129 case Assembler::greater: return Assembler::rc_gz;
2130 case Assembler::greaterEqual: return Assembler::rc_gez;
2131 }
2132 ShouldNotReachHere();
2133 return Assembler::rc_z;
2134 }
2135
2136 // compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS
2137 void MacroAssembler::br_zero(Register s1, Label& L) {
2138 assert_not_delayed();
2139 if (use_cbc(L)) {
2140 Assembler::cbc(zero, icc, s1, 0, L);
2141 } else {
2142 tst(s1);
2143 br (zero, false, pt, L);
2144 delayed()->nop();
2145 }
2146 }
2147
2148 // Compares a pointer register with zero and branches on null.
2149 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
2150 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L, bool emit_delayed_nop ) {
2151 assert_not_delayed();
2152 if (emit_delayed_nop && use_cbc(L)) {
2153 Assembler::cbc(zero, ptr_cc, s1, 0, L);
2154 return;
2155 }
2156 #ifdef _LP64
2157 bpr( rc_z, a, p, s1, L );
2158 #else
2159 tst(s1);
2160 br ( zero, a, p, L );
2161 #endif
2162 // Some callers can fill the delay slot.
2163 if (emit_delayed_nop) {
2164 delayed()->nop();
2165 }
2211 }
2212 // Some callers can fill the delay slot.
2213 if (emit_delayed_nop) {
2214 delayed()->nop();
2215 }
2216 }
2217
2218 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
2219 void MacroAssembler::cmp_and_br(Register s1, Register s2, Condition c,
2220 bool a, Predict p, Label& L) {
2221 assert_not_delayed();
2222 if (use_cbc(L)) {
2223 Assembler::cbc(c, icc, s1, s2, L);
2224 } else {
2225 cmp(s1, s2);
2226 br(c, a, p, L);
2227 delayed()->nop();
2228 }
2229 }
2230
2231 void MacroAssembler::cmp_and_br(Register s1, int simm13a, Condition c,
2232 bool a, Predict p, Label& L) {
2233 assert_not_delayed();
2234 if (is_simm(simm13a,5) && use_cbc(L)) {
2235 Assembler::cbc(c, icc, s1, simm13a, L);
2236 } else {
2237 cmp(s1, simm13a);
2238 br(c, a, p, L);
2239 delayed()->nop();
2240 }
2241 }
2242
2243 // Branch that tests xcc in LP64 and icc in !LP64
2244 void MacroAssembler::cmp_and_brx(Register s1, Register s2, Condition c,
2245 bool a, Predict p, Label& L) {
2246 assert_not_delayed();
2247 if (use_cbc(L)) {
2248 Assembler::cbc(c, ptr_cc, s1, s2, L);
2249 } else {
2250 cmp(s1, s2);
2251 brx(c, a, p, L);
2252 delayed()->nop();
2253 }
2254 }
2255
2256 void MacroAssembler::cmp_and_brx(Register s1, int simm13a, Condition c,
2257 bool a, Predict p, Label& L) {
2258 assert_not_delayed();
2259 if (is_simm(simm13a,5) && use_cbc(L)) {
2260 Assembler::cbc(c, ptr_cc, s1, simm13a, L);
2261 } else {
2262 cmp(s1, simm13a);
2263 brx(c, a, p, L);
2264 delayed()->nop();
2265 }
2266 }
2267
2268 // instruction sequences factored across compiler & interpreter
2269
2270
2271 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
2272 Register Rb_hi, Register Rb_low,
2273 Register Rresult) {
2274
2275 Label check_low_parts, done;
2276
2277 cmp(Ra_hi, Rb_hi ); // compare hi parts
2278 br(equal, true, pt, check_low_parts);
2279 delayed()->cmp(Ra_low, Rb_low); // test low parts
2280
2281 // And, with an unsigned comparison, it does not matter if the numbers
2282 // are negative or not.
2283 // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff.
2284 // The second one is bigger (unsignedly).
2285
2286 // Other notes: The first move in each triplet can be unconditional
2287 // (and therefore probably prefetchable).
2288 // And the equals case for the high part does not need testing,
2289 // since that triplet is reached only after finding the high halves differ.
2290
2291 if (VM_Version::v9_instructions_work()) {
2292 mov(-1, Rresult);
2293 ba(done, false); delayed()-> movcc(greater, false, icc, 1, Rresult);
2294 } else {
2295 br(less, true, pt, done); delayed()-> set(-1, Rresult);
2296 br(greater, true, pt, done); delayed()-> set( 1, Rresult);
2297 }
2298
2299 bind( check_low_parts );
2300
2301 if (VM_Version::v9_instructions_work()) {
2302 mov( -1, Rresult);
2303 movcc(equal, false, icc, 0, Rresult);
2304 movcc(greaterUnsigned, false, icc, 1, Rresult);
2305 } else {
2306 set(-1, Rresult);
2307 br(equal, true, pt, done); delayed()->set( 0, Rresult);
2308 br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult);
2309 }
2310 bind( done );
2311 }
2312
2313 void MacroAssembler::lneg( Register Rhi, Register Rlow ) {
2348
2349 // We get the transfer bits by shifting right by 32-count the low
2350 // register. This is done by shifting right by 31-count and then by one
2351 // more to take care of the special (rare) case where count is zero
2352 // (shifting by 32 would not work).
2353
2354 neg(Ralt_count);
2355
2356 // The order of the next two instructions is critical in the case where
2357 // Rin and Rout are the same and should not be reversed.
2358
2359 srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count
2360 if (Rcount != Rout_low) {
2361 sll(Rin_low, Rcount, Rout_low); // low half
2362 }
2363 sll(Rin_high, Rcount, Rout_high);
2364 if (Rcount == Rout_low) {
2365 sll(Rin_low, Rcount, Rout_low); // low half
2366 }
2367 srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
2368 ba(done, false);
2369 delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low
2370
2371 // shift >= 32 bits, Ralt_count = Rcount-32
2372 bind(big_shift);
2373 sll(Rin_low, Ralt_count, Rout_high );
2374 clr(Rout_low);
2375
2376 bind(done);
2377 }
2378
2379
2380 void MacroAssembler::lshr( Register Rin_high, Register Rin_low,
2381 Register Rcount,
2382 Register Rout_high, Register Rout_low,
2383 Register Rtemp ) {
2384
2385 Register Ralt_count = Rtemp;
2386 Register Rxfer_bits = Rtemp;
2387
2388 assert( Ralt_count != Rin_high
2409
2410 // We get the transfer bits by shifting left by 32-count the high
2411 // register. This is done by shifting left by 31-count and then by one
2412 // more to take care of the special (rare) case where count is zero
2413 // (shifting by 32 would not work).
2414
2415 neg(Ralt_count);
2416 if (Rcount != Rout_low) {
2417 srl(Rin_low, Rcount, Rout_low);
2418 }
2419
2420 // The order of the next two instructions is critical in the case where
2421 // Rin and Rout are the same and should not be reversed.
2422
2423 sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
2424 sra(Rin_high, Rcount, Rout_high ); // high half
2425 sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
2426 if (Rcount == Rout_low) {
2427 srl(Rin_low, Rcount, Rout_low);
2428 }
2429 ba(done, false);
2430 delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
2431
2432 // shift >= 32 bits, Ralt_count = Rcount-32
2433 bind(big_shift);
2434
2435 sra(Rin_high, Ralt_count, Rout_low);
2436 sra(Rin_high, 31, Rout_high); // sign into hi
2437
2438 bind( done );
2439 }
2440
2441
2442
2443 void MacroAssembler::lushr( Register Rin_high, Register Rin_low,
2444 Register Rcount,
2445 Register Rout_high, Register Rout_low,
2446 Register Rtemp ) {
2447
2448 Register Ralt_count = Rtemp;
2449 Register Rxfer_bits = Rtemp;
2472
2473 // We get the transfer bits by shifting left by 32-count the high
2474 // register. This is done by shifting left by 31-count and then by one
2475 // more to take care of the special (rare) case where count is zero
2476 // (shifting by 32 would not work).
2477
2478 neg(Ralt_count);
2479 if (Rcount != Rout_low) {
2480 srl(Rin_low, Rcount, Rout_low);
2481 }
2482
2483 // The order of the next two instructions is critical in the case where
2484 // Rin and Rout are the same and should not be reversed.
2485
2486 sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
2487 srl(Rin_high, Rcount, Rout_high ); // high half
2488 sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
2489 if (Rcount == Rout_low) {
2490 srl(Rin_low, Rcount, Rout_low);
2491 }
2492 ba(done, false);
2493 delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
2494
2495 // shift >= 32 bits, Ralt_count = Rcount-32
2496 bind(big_shift);
2497
2498 srl(Rin_high, Ralt_count, Rout_low);
2499 clr(Rout_high);
2500
2501 bind( done );
2502 }
2503
2504 #ifdef _LP64
2505 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
2506 cmp(Ra, Rb);
2507 mov(-1, Rresult);
2508 movcc(equal, false, xcc, 0, Rresult);
2509 movcc(greater, false, xcc, 1, Rresult);
2510 }
2511 #endif
2512
2736 if (top_reg_after_save == L1) {
2737 ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save);
2738 }
2739
2740 if (ptr_reg_after_save == L2) {
2741 ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save);
2742 }
2743
2744 Label(retry_get_lock);
2745 Label(not_same);
2746 Label(dont_yield);
2747
2748 assert(lock_addr, "lock_address should be non null for v8");
2749 set((intptr_t)lock_addr, lock_ptr_reg);
2750 // Initialize yield counter
2751 mov(G0,yield_reg);
2752 mov(G0, yieldall_reg);
2753 set(StubRoutines::Sparc::locked, lock_reg);
2754
2755 bind(retry_get_lock);
2756 cmp_and_br(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, false, Assembler::pt, dont_yield);
2757
2758 if(use_call_vm) {
2759 Untested("Need to verify global reg consistancy");
2760 call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
2761 } else {
2762 // Save the regs and make space for a C call
2763 save(SP, -96, SP);
2764 save_all_globals_into_locals();
2765 call(CAST_FROM_FN_PTR(address,os::yield_all));
2766 delayed()->mov(yieldall_reg, O0);
2767 restore_globals_from_locals();
2768 restore();
2769 }
2770
2771 // reset the counter
2772 mov(G0,yield_reg);
2773 add(yieldall_reg, 1, yieldall_reg);
2774
2775 bind(dont_yield);
2776 // try to get lock
2777 swap(lock_ptr_reg, 0, lock_reg);
2778
2779 // did we get the lock?
2780 cmp(lock_reg, StubRoutines::Sparc::unlocked);
2781 br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
2782 delayed()->add(yield_reg,1,yield_reg);
2783
2784 // yes, got lock. do we have the same top?
2785 ld(top_ptr_reg_after_save, 0, value_reg);
2786 cmp_and_br(value_reg, top_reg_after_save, Assembler::notEqual, false, Assembler::pt, not_same);
2787
2788 // yes, same top.
2789 st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
2790 membar(Assembler::StoreStore);
2791
2792 bind(not_same);
2793 mov(value_reg, ptr_reg_after_save);
2794 st(lock_reg, lock_ptr_reg, 0); // unlock
2795
2796 restore();
2797 }
2798 }
2799
2800 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2801 Register tmp,
2802 int offset) {
2803 intptr_t value = *delayed_value_addr;
2804 if (value != 0)
2805 return RegisterOrConstant(value + offset);
2806
3016 Register super_klass,
3017 Register temp_reg,
3018 Register temp2_reg,
3019 Label& L_success) {
3020 Label L_failure, L_pop_to_failure;
3021 check_klass_subtype_fast_path(sub_klass, super_klass,
3022 temp_reg, temp2_reg,
3023 &L_success, &L_failure, NULL);
3024 Register sub_2 = sub_klass;
3025 Register sup_2 = super_klass;
3026 if (!sub_2->is_global()) sub_2 = L0;
3027 if (!sup_2->is_global()) sup_2 = L1;
3028
3029 save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
3030 check_klass_subtype_slow_path(sub_2, sup_2,
3031 L2, L3, L4, L5,
3032 NULL, &L_pop_to_failure);
3033
3034 // on success:
3035 restore();
3036 ba(L_success);
3037
3038 // on failure:
3039 bind(L_pop_to_failure);
3040 restore();
3041 bind(L_failure);
3042 }
3043
3044
3045 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3046 Register super_klass,
3047 Register temp_reg,
3048 Register temp2_reg,
3049 Label* L_success,
3050 Label* L_failure,
3051 Label* L_slow_path,
3052 RegisterOrConstant super_check_offset) {
3053 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
3054 Klass::secondary_super_cache_offset_in_bytes());
3055 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
3056 Klass::super_check_offset_offset_in_bytes());
3095 // super_check_offset is register.
3096 assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset.as_register());
3097 }
3098 ld_ptr(sub_klass, super_check_offset, temp_reg);
3099 cmp(super_klass, temp_reg);
3100
3101 // This check has worked decisively for primary supers.
3102 // Secondary supers are sought in the super_cache ('super_cache_addr').
3103 // (Secondary supers are interfaces and very deeply nested subtypes.)
3104 // This works in the same check above because of a tricky aliasing
3105 // between the super_cache and the primary super display elements.
3106 // (The 'super_check_addr' can address either, as the case requires.)
3107 // Note that the cache is updated below if it does not help us find
3108 // what we need immediately.
3109 // So if it was a primary super, we can just fail immediately.
3110 // Otherwise, it's the slow path for us (no success at this point).
3111
3112 // Hacked ba(), which may only be used just before L_fallthrough.
3113 #define FINAL_JUMP(label) \
3114 if (&(label) != &L_fallthrough) { \
3115 ba(label, false); \
3116 delayed()->nop(); \
3117 }
3118
3119 if (super_check_offset.is_register()) {
3120 brx(Assembler::equal, false, Assembler::pn, *L_success);
3121 delayed()->cmp(super_check_offset.as_register(), sc_offset);
3122
3123 if (L_failure == &L_fallthrough) {
3124 brx(Assembler::equal, false, Assembler::pt, *L_slow_path);
3125 delayed()->nop();
3126 } else {
3127 brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
3128 delayed()->nop();
3129 FINAL_JUMP(*L_slow_path);
3130 }
3131 } else if (super_check_offset.as_constant() == sc_offset) {
3132 // Need a slow path; fast failure is impossible.
3133 if (L_slow_path == &L_fallthrough) {
3134 brx(Assembler::equal, false, Assembler::pt, *L_success);
3135 delayed()->nop();
3136 } else {
3228 // Don't use load_heap_oop; we don't want to decode the element.
3229 lduw( scan_temp, elem_offset, scratch_reg );
3230 } else {
3231 ld_ptr( scan_temp, elem_offset, scratch_reg );
3232 }
3233
3234 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
3235 cmp(scratch_reg, search_key);
3236
3237 // A miss means we are NOT a subtype and need to keep looping
3238 brx(Assembler::notEqual, false, Assembler::pn, L_loop);
3239 delayed()->deccc(count_temp); // decrement trip counter in delay slot
3240
3241 // Falling out the bottom means we found a hit; we ARE a subtype
3242 if (decode_super_klass) decode_heap_oop(super_klass);
3243
3244 // Success. Cache the super we found and proceed in triumph.
3245 st_ptr(super_klass, sub_klass, sc_offset);
3246
3247 if (L_success != &L_fallthrough) {
3248 ba(*L_success, false);
3249 delayed()->nop();
3250 }
3251
3252 bind(L_fallthrough);
3253 }
3254
3255
3256 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
3257 Register temp_reg,
3258 Label& wrong_method_type) {
3259 assert_different_registers(mtype_reg, mh_reg, temp_reg);
3260 // compare method type against that of the receiver
3261 RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
3262 load_heap_oop(mh_reg, mhtype_offset, temp_reg);
3263 cmp_and_brx(temp_reg, mtype_reg, Assembler::notEqual, false, Assembler::pn, wrong_method_type);
3264 }
3265
3266
3267 // A method handle has a "vmslots" field which gives the size of its
3268 // argument list in JVM stack slots. This field is either located directly
3269 // in every method handle, or else is indirectly accessed through the
3270 // method handle's MethodType. This macro hides the distinction.
3271 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
3272 Register temp_reg) {
3273 assert_different_registers(vmslots_reg, mh_reg, temp_reg);
3274 // load mh.type.form.vmslots
3275 if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) {
3276 // hoist vmslots into every mh to avoid dependent load chain
3277 ld( Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
3278 } else {
3279 Register temp2_reg = vmslots_reg;
3280 load_heap_oop(Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)), temp2_reg);
3281 load_heap_oop(Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)), temp2_reg);
3282 ld( Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
3283 }
3336 Register temp_reg,
3337 Label& done, Label* slow_case,
3338 BiasedLockingCounters* counters) {
3339 assert(UseBiasedLocking, "why call this otherwise?");
3340
3341 if (PrintBiasedLockingStatistics) {
3342 assert_different_registers(obj_reg, mark_reg, temp_reg, O7);
3343 if (counters == NULL)
3344 counters = BiasedLocking::counters();
3345 }
3346
3347 Label cas_label;
3348
3349 // Biased locking
3350 // See whether the lock is currently biased toward our thread and
3351 // whether the epoch is still valid
3352 // Note that the runtime guarantees sufficient alignment of JavaThread
3353 // pointers to allow age to be placed into low bits
3354 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
3355 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3356 cmp_and_brx(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, false, Assembler::pn, cas_label);
3357
3358 load_klass(obj_reg, temp_reg);
3359 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3360 or3(G2_thread, temp_reg, temp_reg);
3361 xor3(mark_reg, temp_reg, temp_reg);
3362 andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
3363 if (counters != NULL) {
3364 cond_inc(Assembler::equal, (address) counters->biased_lock_entry_count_addr(), mark_reg, temp_reg);
3365 // Reload mark_reg as we may need it later
3366 ld_ptr(Address(obj_reg, oopDesc::mark_offset_in_bytes()), mark_reg);
3367 }
3368 brx(Assembler::equal, true, Assembler::pt, done);
3369 delayed()->nop();
3370
3371 Label try_revoke_bias;
3372 Label try_rebias;
3373 Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
3374 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3375
3376 // At this point we know that the header has the bias pattern and
3403 // fails we will go in to the runtime to revoke the object's bias.
3404 // Note that we first construct the presumed unbiased header so we
3405 // don't accidentally blow away another thread's valid bias.
3406 delayed()->and3(mark_reg,
3407 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
3408 mark_reg);
3409 or3(G2_thread, mark_reg, temp_reg);
3410 casn(mark_addr.base(), mark_reg, temp_reg);
3411 // If the biasing toward our thread failed, this means that
3412 // another thread succeeded in biasing it toward itself and we
3413 // need to revoke that bias. The revocation will occur in the
3414 // interpreter runtime in the slow case.
3415 cmp(mark_reg, temp_reg);
3416 if (counters != NULL) {
3417 cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg);
3418 }
3419 if (slow_case != NULL) {
3420 brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3421 delayed()->nop();
3422 }
3423 ba(done);
3424
3425 bind(try_rebias);
3426 // At this point we know the epoch has expired, meaning that the
3427 // current "bias owner", if any, is actually invalid. Under these
3428 // circumstances _only_, we are allowed to use the current header's
3429 // value as the comparison value when doing the cas to acquire the
3430 // bias in the current epoch. In other words, we allow transfer of
3431 // the bias from one thread to another directly in this situation.
3432 //
3433 // FIXME: due to a lack of registers we currently blow away the age
3434 // bits in this situation. Should attempt to preserve them.
3435 load_klass(obj_reg, temp_reg);
3436 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3437 or3(G2_thread, temp_reg, temp_reg);
3438 casn(mark_addr.base(), mark_reg, temp_reg);
3439 // If the biasing toward our thread failed, this means that
3440 // another thread succeeded in biasing it toward itself and we
3441 // need to revoke that bias. The revocation will occur in the
3442 // interpreter runtime in the slow case.
3443 cmp(mark_reg, temp_reg);
3444 if (counters != NULL) {
3445 cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg);
3446 }
3447 if (slow_case != NULL) {
3448 brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3449 delayed()->nop();
3450 }
3451 ba(done);
3452
3453 bind(try_revoke_bias);
3454 // The prototype mark in the klass doesn't have the bias bit set any
3455 // more, indicating that objects of this data type are not supposed
3456 // to be biased any more. We are going to try to reset the mark of
3457 // this object to the prototype value and fall through to the
3458 // CAS-based locking scheme. Note that if our CAS fails, it means
3459 // that another thread raced us for the privilege of revoking the
3460 // bias of this particular object, so it's okay to continue in the
3461 // normal locking code.
3462 //
3463 // FIXME: due to a lack of registers we currently blow away the age
3464 // bits in this situation. Should attempt to preserve them.
3465 load_klass(obj_reg, temp_reg);
3466 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3467 casn(mark_addr.base(), mark_reg, temp_reg);
3468 // Fall through to the normal CAS-based lock, because no matter what
3469 // the result of the above CAS, some thread must have succeeded in
3470 // removing the bias bit from the object's header.
3471 if (counters != NULL) {
3482 // Note: we do not have to check the thread ID for two reasons.
3483 // First, the interpreter checks for IllegalMonitorStateException at
3484 // a higher level. Second, if the bias was revoked while we held the
3485 // lock, the object could not be rebiased toward another thread, so
3486 // the bias bit would be clear.
3487 ld_ptr(mark_addr, temp_reg);
3488 and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3489 cmp(temp_reg, markOopDesc::biased_lock_pattern);
3490 brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done);
3491 delayed();
3492 if (!allow_delay_slot_filling) {
3493 nop();
3494 }
3495 }
3496
3497
3498 // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
3499 // Solaris/SPARC's "as". Another apt name would be cas_ptr()
3500
3501 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
3502 casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()) ;
3503 }
3504
3505
3506
3507 // compiler_lock_object() and compiler_unlock_object() are direct transliterations
3508 // of i486.ad fast_lock() and fast_unlock(). See those methods for detailed comments.
3509 // The code could be tightened up considerably.
3510 //
3511 // box->dhw disposition - post-conditions at DONE_LABEL.
3512 // - Successful inflated lock: box->dhw != 0.
3513 // Any non-zero value suffices.
3514 // Consider G2_thread, rsp, boxReg, or unused_mark()
3515 // - Successful Stack-lock: box->dhw == mark.
3516 // box->dhw must contain the displaced mark word value
3517 // - Failure -- icc.ZFlag == 0 and box->dhw is undefined.
3518 // The slow-path fast_enter() and slow_enter() operators
3519 // are responsible for setting box->dhw = NonZero (typically ::unused_mark).
3520 // - Biased: box->dhw is undefined
3521 //
3522 // SPARC refworkload performance - specifically jetstream and scimark - are
3523 // extremely sensitive to the size of the code emitted by compiler_lock_object
3524 // and compiler_unlock_object. Critically, the key factor is code size, not path
3525 // length. (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
3526 // effect).
3527
3528
3529 void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
3530 Register Rbox, Register Rscratch,
3531 BiasedLockingCounters* counters,
3532 bool try_bias) {
3533 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3534
3535 verify_oop(Roop);
3536 Label done ;
3537
3538 if (counters != NULL) {
3539 inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
3540 }
3541
3542 if (EmitSync & 1) {
3543 mov (3, Rscratch) ;
3544 st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3545 cmp (SP, G0) ;
3546 return ;
3547 }
3548
3549 if (EmitSync & 2) {
3550
3551 // Fetch object's markword
3552 ld_ptr(mark_addr, Rmark);
3553
3554 if (try_bias) {
3555 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3556 }
3557
3558 // Save Rbox in Rscratch to be used for the cas operation
3559 mov(Rbox, Rscratch);
3560
3561 // set Rmark to markOop | markOopDesc::unlocked_value
3562 or3(Rmark, markOopDesc::unlocked_value, Rmark);
3563
3564 // Initialize the box. (Must happen before we update the object mark!)
3565 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3566
3567 // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
3568 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3569 casx_under_lock(mark_addr.base(), Rmark, Rscratch,
3570 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3571
3572 // if compare/exchange succeeded we found an unlocked object and we now have locked it
3573 // hence we are done
3574 cmp(Rmark, Rscratch);
3575 #ifdef _LP64
3576 sub(Rscratch, STACK_BIAS, Rscratch);
3577 #endif
3578 brx(Assembler::equal, false, Assembler::pt, done);
3579 delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot
3580
3581 // we did not find an unlocked object so see if this is a recursive case
3582 // sub(Rscratch, SP, Rscratch);
3583 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3584 andcc(Rscratch, 0xfffff003, Rscratch);
3585 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3586 bind (done) ;
3587 return ;
3588 }
3589
3590 Label Egress ;
3591
3592 if (EmitSync & 256) {
3593 Label IsInflated ;
3594
3595 ld_ptr (mark_addr, Rmark); // fetch obj->mark
3596 // Triage: biased, stack-locked, neutral, inflated
3597 if (try_bias) {
3598 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3599 // Invariant: if control reaches this point in the emitted stream
3600 // then Rmark has not been modified.
3601 }
3602
3603 // Store mark into displaced mark field in the on-stack basic-lock "box"
3604 // Critically, this must happen before the CAS
3605 // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
3606 st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3607 andcc (Rmark, 2, G0) ;
3608 brx (Assembler::notZero, false, Assembler::pn, IsInflated) ;
3609 delayed() ->
3610
3611 // Try stack-lock acquisition.
3612 // Beware: the 1st instruction is in a delay slot
3613 mov (Rbox, Rscratch);
3614 or3 (Rmark, markOopDesc::unlocked_value, Rmark);
3615 assert (mark_addr.disp() == 0, "cas must take a zero displacement");
3616 casn (mark_addr.base(), Rmark, Rscratch) ;
3617 cmp (Rmark, Rscratch);
3618 brx (Assembler::equal, false, Assembler::pt, done);
3619 delayed()->sub(Rscratch, SP, Rscratch);
3620
3621 // Stack-lock attempt failed - check for recursive stack-lock.
3622 // See the comments below about how we might remove this case.
3623 #ifdef _LP64
3624 sub (Rscratch, STACK_BIAS, Rscratch);
3625 #endif
3626 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3627 andcc (Rscratch, 0xfffff003, Rscratch);
3628 br (Assembler::always, false, Assembler::pt, done) ;
3629 delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3630
3631 bind (IsInflated) ;
3632 if (EmitSync & 64) {
3633 // If m->owner != null goto IsLocked
3634 // Pessimistic form: Test-and-CAS vs CAS
3635 // The optimistic form avoids RTS->RTO cache line upgrades.
3636 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3637 andcc (Rscratch, Rscratch, G0) ;
3638 brx (Assembler::notZero, false, Assembler::pn, done) ;
3639 delayed()->nop() ;
3640 // m->owner == null : it's unlocked.
3641 }
3642
3643 // Try to CAS m->owner from null to Self
3644 // Invariant: if we acquire the lock then _recursions should be 0.
3645 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
3646 mov (G2_thread, Rscratch) ;
3647 casn (Rmark, G0, Rscratch) ;
3648 cmp (Rscratch, G0) ;
3649 // Intentional fall-through into done
3650 } else {
3651 // Aggressively avoid the Store-before-CAS penalty
3652 // Defer the store into box->dhw until after the CAS
3653 Label IsInflated, Recursive ;
3654
3655 // Anticipate CAS -- Avoid RTS->RTO upgrade
3656 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
3657
3658 ld_ptr (mark_addr, Rmark); // fetch obj->mark
3659 // Triage: biased, stack-locked, neutral, inflated
3660
3661 if (try_bias) {
3662 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3663 // Invariant: if control reaches this point in the emitted stream
3664 // then Rmark has not been modified.
3665 }
3666 andcc (Rmark, 2, G0) ;
3667 brx (Assembler::notZero, false, Assembler::pn, IsInflated) ;
3668 delayed()-> // Beware - dangling delay-slot
3669
3670 // Try stack-lock acquisition.
3671 // Transiently install BUSY (0) encoding in the mark word.
3672 // if the CAS of 0 into the mark was successful then we execute:
3673 // ST box->dhw = mark -- save fetched mark in on-stack basiclock box
3674 // ST obj->mark = box -- overwrite transient 0 value
3675 // This presumes TSO, of course.
3676
3677 mov (0, Rscratch) ;
3678 or3 (Rmark, markOopDesc::unlocked_value, Rmark);
3679 assert (mark_addr.disp() == 0, "cas must take a zero displacement");
3680 casn (mark_addr.base(), Rmark, Rscratch) ;
3681 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
3682 cmp (Rscratch, Rmark) ;
3683 brx (Assembler::notZero, false, Assembler::pn, Recursive) ;
3684 delayed() ->
3685 st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3686 if (counters != NULL) {
3687 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3688 }
3689 br (Assembler::always, false, Assembler::pt, done);
3690 delayed() ->
3691 st_ptr (Rbox, mark_addr) ;
3692
3693 bind (Recursive) ;
3694 // Stack-lock attempt failed - check for recursive stack-lock.
3695 // Tests show that we can remove the recursive case with no impact
3696 // on refworkload 0.83. If we need to reduce the size of the code
3697 // emitted by compiler_lock_object() the recursive case is perfect
3698 // candidate.
3699 //
3700 // A more extreme idea is to always inflate on stack-lock recursion.
3701 // This lets us eliminate the recursive checks in compiler_lock_object
3702 // and compiler_unlock_object and the (box->dhw == 0) encoding.
3703 // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
3704 // and showed a performance *increase*. In the same experiment I eliminated
3705 // the fast-path stack-lock code from the interpreter and always passed
3706 // control to the "slow" operators in synchronizer.cpp.
3707
3708 // RScratch contains the fetched obj->mark value from the failed CASN.
3709 #ifdef _LP64
3710 sub (Rscratch, STACK_BIAS, Rscratch);
3711 #endif
3712 sub(Rscratch, SP, Rscratch);
3713 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3714 andcc (Rscratch, 0xfffff003, Rscratch);
3715 if (counters != NULL) {
3716 // Accounting needs the Rscratch register
3717 st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3718 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3719 br (Assembler::always, false, Assembler::pt, done) ;
3720 delayed()->nop() ;
3721 } else {
3722 br (Assembler::always, false, Assembler::pt, done) ;
3723 delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3724 }
3725
3726 bind (IsInflated) ;
3727 if (EmitSync & 64) {
3728 // If m->owner != null goto IsLocked
3729 // Test-and-CAS vs CAS
3730 // Pessimistic form avoids futile (doomed) CAS attempts
3731 // The optimistic form avoids RTS->RTO cache line upgrades.
3732 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3733 andcc (Rscratch, Rscratch, G0) ;
3734 brx (Assembler::notZero, false, Assembler::pn, done) ;
3735 delayed()->nop() ;
3736 // m->owner == null : it's unlocked.
3737 }
3738
3739 // Try to CAS m->owner from null to Self
3740 // Invariant: if we acquire the lock then _recursions should be 0.
3741 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
3742 mov (G2_thread, Rscratch) ;
3743 casn (Rmark, G0, Rscratch) ;
3744 cmp (Rscratch, G0) ;
3745 // ST box->displaced_header = NonZero.
3746 // Any non-zero value suffices:
3747 // unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
3748 st_ptr (Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
3749 // Intentional fall-through into done
3750 }
3751
3752 bind (done) ;
3753 }
3754
3755 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
3756 Register Rbox, Register Rscratch,
3757 bool try_bias) {
3758 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3759
3760 Label done ;
3761
3762 if (EmitSync & 4) {
3763 cmp (SP, G0) ;
3764 return ;
3765 }
3766
3767 if (EmitSync & 8) {
3768 if (try_bias) {
3769 biased_locking_exit(mark_addr, Rscratch, done);
3770 }
3771
3772 // Test first if it is a fast recursive unlock
3773 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
3774 br_null(Rmark, false, Assembler::pt, done);
3775
3776 // Check if it is still a light weight lock, this is is true if we see
3777 // the stack address of the basicLock in the markOop of the object
3778 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3779 casx_under_lock(mark_addr.base(), Rbox, Rmark,
3780 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3781 ba(done, false);
3782 delayed()->cmp(Rbox, Rmark);
3783 bind (done) ;
3784 return ;
3785 }
3786
3787 // Beware ... If the aggregate size of the code emitted by CLO and CUO is
3788 // is too large performance rolls abruptly off a cliff.
3789 // This could be related to inlining policies, code cache management, or
3790 // I$ effects.
3791 Label LStacked ;
3792
3793 if (try_bias) {
3794 // TODO: eliminate redundant LDs of obj->mark
3795 biased_locking_exit(mark_addr, Rscratch, done);
3796 }
3797
3798 ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ;
3799 ld_ptr (Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
3800 andcc (Rscratch, Rscratch, G0);
3801 brx (Assembler::zero, false, Assembler::pn, done);
3802 delayed()-> nop() ; // consider: relocate fetch of mark, above, into this DS
3803 andcc (Rmark, 2, G0) ;
3804 brx (Assembler::zero, false, Assembler::pt, LStacked) ;
3805 delayed()-> nop() ;
3806
3807 // It's inflated
3808 // Conceptually we need a #loadstore|#storestore "release" MEMBAR before
3809 // the ST of 0 into _owner which releases the lock. This prevents loads
3810 // and stores within the critical section from reordering (floating)
3811 // past the store that releases the lock. But TSO is a strong memory model
3812 // and that particular flavor of barrier is a noop, so we can safely elide it.
3813 // Note that we use 1-0 locking by default for the inflated case. We
3814 // close the resultant (and rare) race by having contented threads in
3815 // monitorenter periodically poll _owner.
3816 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3817 ld_ptr (Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
3818 xor3 (Rscratch, G2_thread, Rscratch) ;
3819 orcc (Rbox, Rscratch, Rbox) ;
3820 brx (Assembler::notZero, false, Assembler::pn, done) ;
3821 delayed()->
3822 ld_ptr (Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
3823 ld_ptr (Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
3824 orcc (Rbox, Rscratch, G0) ;
3825 if (EmitSync & 65536) {
3826 Label LSucc ;
3827 brx (Assembler::notZero, false, Assembler::pn, LSucc) ;
3828 delayed()->nop() ;
3829 ba (done, false) ;
3830 delayed()->
3831 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3832
3833 bind (LSucc) ;
3834 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3835 if (os::is_MP()) { membar (StoreLoad) ; }
3836 ld_ptr (Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
3837 andcc (Rscratch, Rscratch, G0) ;
3838 brx (Assembler::notZero, false, Assembler::pt, done) ;
3839 delayed()-> andcc (G0, G0, G0) ;
3840 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
3841 mov (G2_thread, Rscratch) ;
3842 casn (Rmark, G0, Rscratch) ;
3843 // invert icc.zf and goto done
3844 br_notnull(Rscratch, false, Assembler::pt, done, false) ;
3845 delayed() -> cmp (G0, G0) ;
3846 ba (done, false);
3847 delayed() -> cmp (G0, 1) ;
3848 } else {
3849 brx (Assembler::notZero, false, Assembler::pn, done) ;
3850 delayed()->nop() ;
3851 ba (done, false) ;
3852 delayed()->
3853 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3854 }
3855
3856 bind (LStacked) ;
3857 // Consider: we could replace the expensive CAS in the exit
3858 // path with a simple ST of the displaced mark value fetched from
3859 // the on-stack basiclock box. That admits a race where a thread T2
3860 // in the slow lock path -- inflating with monitor M -- could race a
3861 // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
3862 // More precisely T1 in the stack-lock unlock path could "stomp" the
3863 // inflated mark value M installed by T2, resulting in an orphan
3864 // object monitor M and T2 becoming stranded. We can remedy that situation
3865 // by having T2 periodically poll the object's mark word using timed wait
3866 // operations. If T2 discovers that a stomp has occurred it vacates
3867 // the monitor M and wakes any other threads stranded on the now-orphan M.
3868 // In addition the monitor scavenger, which performs deflation,
3869 // would also need to check for orpan monitors and stranded threads.
3870 //
3871 // Finally, inflation is also used when T2 needs to assign a hashCode
3872 // to O and O is stack-locked by T1. The "stomp" race could cause
3873 // an assigned hashCode value to be lost. We can avoid that condition
3874 // and provide the necessary hashCode stability invariants by ensuring
3875 // that hashCode generation is idempotent between copying GCs.
3876 // For example we could compute the hashCode of an object O as
3877 // O's heap address XOR some high quality RNG value that is refreshed
3878 // at GC-time. The monitor scavenger would install the hashCode
3879 // found in any orphan monitors. Again, the mechanism admits a
3880 // lost-update "stomp" WAW race but detects and recovers as needed.
3881 //
3882 // A prototype implementation showed excellent results, although
3883 // the scavenger and timeout code was rather involved.
3884
3885 casn (mark_addr.base(), Rbox, Rscratch) ;
3886 cmp (Rbox, Rscratch);
3887 // Intentional fall through into done ...
3888
3889 bind (done) ;
3890 }
3891
3892
3893
3894 void MacroAssembler::print_CPU_state() {
3895 // %%%%% need to implement this
3896 }
3897
3898 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3899 // %%%%% need to implement this
3900 }
3901
3902 void MacroAssembler::push_IU_state() {
3903 // %%%%% need to implement this
3904 }
3905
3906
3907 void MacroAssembler::pop_IU_state() {
3908 // %%%%% need to implement this
3909 }
3925
3926
3927 void MacroAssembler::pop_CPU_state() {
3928 // %%%%% need to implement this
3929 }
3930
3931
3932
3933 void MacroAssembler::verify_tlab() {
3934 #ifdef ASSERT
3935 if (UseTLAB && VerifyOops) {
3936 Label next, next2, ok;
3937 Register t1 = L0;
3938 Register t2 = L1;
3939 Register t3 = L2;
3940
3941 save_frame(0);
3942 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3943 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
3944 or3(t1, t2, t3);
3945 cmp_and_br(t1, t2, Assembler::greaterEqual, false, Assembler::pn, next);
3946 stop("assert(top >= start)");
3947 should_not_reach_here();
3948
3949 bind(next);
3950 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3951 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
3952 or3(t3, t2, t3);
3953 cmp_and_br(t1, t2, Assembler::lessEqual, false, Assembler::pn, next2);
3954 stop("assert(top <= end)");
3955 should_not_reach_here();
3956
3957 bind(next2);
3958 and3(t3, MinObjAlignmentInBytesMask, t3);
3959 cmp_and_br(t3, 0, Assembler::lessEqual, false, Assembler::pn, ok);
3960 stop("assert(aligned)");
3961 should_not_reach_here();
3962
3963 bind(ok);
3964 restore();
3965 }
3966 #endif
3967 }
3968
3969
3970 void MacroAssembler::eden_allocate(
3971 Register obj, // result: pointer to object after successful allocation
3972 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
3973 int con_size_in_bytes, // object size in bytes if known at compile time
3974 Register t1, // temp register
3975 Register t2, // temp register
3976 Label& slow_case // continuation point if fast allocation fails
3977 ){
3978 // make sure arguments make sense
3979 assert_different_registers(obj, var_size_in_bytes, t1, t2);
3980 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
3981 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
3982
3983 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
3984 // No allocation in the shared eden.
3985 ba(slow_case);
3986 } else {
3987 // get eden boundaries
3988 // note: we need both top & top_addr!
3989 const Register top_addr = t1;
3990 const Register end = t2;
3991
3992 CollectedHeap* ch = Universe::heap();
3993 set((intx)ch->top_addr(), top_addr);
3994 intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
3995 ld_ptr(top_addr, delta, end);
3996 ld_ptr(top_addr, 0, obj);
3997
3998 // try to allocate
3999 Label retry;
4000 bind(retry);
4001 #ifdef ASSERT
4002 // make sure eden top is properly aligned
4003 {
4004 Label L;
4005 btst(MinObjAlignmentInBytesMask, obj);
4099 bind(L);
4100 }
4101 #endif // ASSERT
4102
4103 // update the tlab top pointer
4104 st_ptr(free, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
4105 verify_tlab();
4106 }
4107
4108
4109 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
4110 Register top = O0;
4111 Register t1 = G1;
4112 Register t2 = G3;
4113 Register t3 = O1;
4114 assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
4115 Label do_refill, discard_tlab;
4116
4117 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
4118 // No allocation in the shared eden.
4119 ba(slow_case);
4120 }
4121
4122 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
4123 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
4124 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
4125
4126 // calculate amount of free space
4127 sub(t1, top, t1);
4128 srl_ptr(t1, LogHeapWordSize, t1);
4129
4130 // Retain tlab and allocate object in shared space if
4131 // the amount free in the tlab is too large to discard.
4132 cmp(t1, t2);
4133 brx(Assembler::lessEqual, false, Assembler::pt, discard_tlab);
4134
4135 // increment waste limit to prevent getting stuck on this slow path
4136 delayed()->add(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment(), t2);
4137 st_ptr(t2, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
4138 if (TLABStats) {
4139 // increment number of slow_allocations
4140 ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
4141 add(t2, 1, t2);
4142 stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
4143 }
4144 ba(try_eden);
4145
4146 bind(discard_tlab);
4147 if (TLABStats) {
4148 // increment number of refills
4149 ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
4150 add(t2, 1, t2);
4151 stw(t2, G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()));
4152 // accumulate wastage
4153 ld(G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()), t2);
4154 add(t2, t1, t2);
4155 stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
4156 }
4157
4158 // if tlab is currently allocated (top or end != null) then
4159 // fill [top, end + alignment_reserve) with array object
4160 br_null(top, false, Assembler::pn, do_refill);
4161
4162 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
4163 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
4164 // set klass to intArrayKlass
4165 sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
4166 add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
4167 sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
4168 st(t1, top, arrayOopDesc::length_offset_in_bytes());
4169 set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
4170 ld_ptr(t2, 0, t2);
4171 // store klass last. concurrent gcs assumes klass length is valid if
4172 // klass field is not null.
4173 store_klass(t2, top);
4174 verify_oop(top);
4175
4176 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t1);
4177 sub(top, t1, t1); // size of tlab's allocated portion
4178 incr_allocated_bytes(t1, t2, t3);
4179
4180 // refill the tlab with an eden allocation
4181 bind(do_refill);
4182 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
4183 sll_ptr(t1, LogHeapWordSize, t1);
4184 // allocate new tlab, address returned in top
4185 eden_allocate(top, t1, 0, t2, t3, slow_case);
4186
4187 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset()));
4188 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
4189 #ifdef ASSERT
4190 // check that tlab_size (t1) is still valid
4191 {
4192 Label ok;
4193 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
4194 sll_ptr(t2, LogHeapWordSize, t2);
4195 cmp_and_br(t1, t2, Assembler::equal, false, Assembler::pn, ok);
4196 stop("assert(t1 == tlab_size)");
4197 should_not_reach_here();
4198
4199 bind(ok);
4200 }
4201 #endif // ASSERT
4202 add(top, t1, top); // t1 is tlab_size
4203 sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
4204 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
4205 verify_tlab();
4206 ba(retry);
4207 }
4208
4209 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
4210 Register t1, Register t2) {
4211 // Bump total bytes allocated by this thread
4212 assert(t1->is_global(), "must be global reg"); // so all 64 bits are saved on a context switch
4213 assert_different_registers(size_in_bytes.register_or_noreg(), t1, t2);
4214 // v8 support has gone the way of the dodo
4215 ldx(G2_thread, in_bytes(JavaThread::allocated_bytes_offset()), t1);
4216 add(t1, ensure_simm13_or_reg(size_in_bytes, t2), t1);
4217 stx(t1, G2_thread, in_bytes(JavaThread::allocated_bytes_offset()));
4218 }
4219
4220 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
4221 switch (cond) {
4222 // Note some conditions are synonyms for others
4223 case Assembler::never: return Assembler::always;
4224 case Assembler::zero: return Assembler::notZero;
4225 case Assembler::lessEqual: return Assembler::greater;
4226 case Assembler::less: return Assembler::greaterEqual;
4337 Label refill, restart;
4338 if (with_frame) {
4339 __ save_frame(0);
4340 pre_val = I0; // Was O0 before the save.
4341 } else {
4342 pre_val = O0;
4343 }
4344 int satb_q_index_byte_offset =
4345 in_bytes(JavaThread::satb_mark_queue_offset() +
4346 PtrQueue::byte_offset_of_index());
4347 int satb_q_buf_byte_offset =
4348 in_bytes(JavaThread::satb_mark_queue_offset() +
4349 PtrQueue::byte_offset_of_buf());
4350 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
4351 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
4352 "check sizes in assembly below");
4353
4354 __ bind(restart);
4355 __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
4356
4357 __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill, false);
4358 // If the branch is taken, no harm in executing this in the delay slot.
4359 __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
4360 __ sub(L0, oopSize, L0);
4361
4362 __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0
4363 if (!with_frame) {
4364 // Use return-from-leaf
4365 __ retl();
4366 __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4367 } else {
4368 // Not delayed.
4369 __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4370 }
4371 if (with_frame) {
4372 __ ret();
4373 __ delayed()->restore();
4374 }
4375 __ bind(refill);
4376
4377 address handle_zero =
4452 assert(pre_val == noreg, "check this code");
4453 }
4454
4455 // Is marking active?
4456 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
4457 ld(G2,
4458 in_bytes(JavaThread::satb_mark_queue_offset() +
4459 PtrQueue::byte_offset_of_active()),
4460 tmp);
4461 } else {
4462 guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
4463 "Assumption");
4464 ldsb(G2,
4465 in_bytes(JavaThread::satb_mark_queue_offset() +
4466 PtrQueue::byte_offset_of_active()),
4467 tmp);
4468 }
4469
4470 // Check on whether to annul.
4471 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
4472
4473 // Do we need to load the previous value?
4474 if (obj != noreg) {
4475 // Load the previous value...
4476 if (index == noreg) {
4477 if (Assembler::is_simm13(offset)) {
4478 load_heap_oop(obj, offset, tmp);
4479 } else {
4480 set(offset, tmp);
4481 load_heap_oop(obj, tmp, tmp);
4482 }
4483 } else {
4484 load_heap_oop(obj, index, tmp);
4485 }
4486 // Previous value has been loaded into tmp
4487 pre_val = tmp;
4488 }
4489
4490 assert(pre_val != noreg, "must have a real register");
4491
4492 // Is the previous value null?
4493 // Check on whether to annul.
4494 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
4495
4496 // OK, it's not filtered, so we'll need to call enqueue. In the normal
4497 // case, pre_val will be a scratch G-reg, but there are some cases in
4498 // which it's an O-reg. In the first case, do a normal call. In the
4499 // latter, do a save here and call the frameless version.
4500
4501 guarantee(pre_val->is_global() || pre_val->is_out(),
4502 "Or we need to think harder.");
4503
4504 if (pre_val->is_global() && !preserve_o_regs) {
4505 generate_satb_log_enqueue_if_necessary(true); // with frame
4506
4507 call(satb_log_enqueue_with_frame);
4508 delayed()->mov(pre_val, O0);
4509 } else {
4510 generate_satb_log_enqueue_if_necessary(false); // frameless
4511
4512 save_frame(0);
4513 call(satb_log_enqueue_frameless);
4514 delayed()->mov(pre_val->after_save(), O0);
|
89 const char* Argument::name() const {
90 int nofArgs = sizeof argumentNames / sizeof argumentNames[0];
91 int num = number();
92 if (num >= nofArgs) num = nofArgs - 1;
93 return argumentNames[num][is_in() ? 1 : 0];
94 }
95
96 void Assembler::print_instruction(int inst) {
97 const char* s;
98 switch (inv_op(inst)) {
99 default: s = "????"; break;
100 case call_op: s = "call"; break;
101 case branch_op:
102 switch (inv_op2(inst)) {
103 case fb_op2: s = "fb"; break;
104 case fbp_op2: s = "fbp"; break;
105 case br_op2: s = "br"; break;
106 case bp_op2: s = "bp"; break;
107 case cb_op2: s = "cb"; break;
108 case bpr_op2: {
109 if (is_cbcond(inst)) {
110 s = is_cxb(inst) ? "cxb" : "cwb";
111 } else {
112 s = "bpr";
113 }
114 break;
115 }
116 default: s = "????"; break;
117 }
118 }
119 ::tty->print("%s", s);
120 }
121
122
123 // Patch instruction inst at offset inst_pos to refer to dest_pos
124 // and return the resulting instruction.
125 // We should have pcs, not offsets, but since all is relative, it will work out
126 // OK.
127 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
128
129 int m; // mask for displacement field
130 int v; // new value for displacement field
131 const int word_aligned_ones = -4;
132 switch (inv_op(inst)) {
133 default: ShouldNotReachHere();
134 case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break;
135 case branch_op:
136 switch (inv_op2(inst)) {
137 case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break;
138 case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break;
139 case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
140 case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
141 case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
142 case bpr_op2: {
143 if (is_cbcond(inst)) {
144 m = wdisp10(word_aligned_ones, 0);
145 v = wdisp10(dest_pos, inst_pos);
146 } else {
147 m = wdisp16(word_aligned_ones, 0);
148 v = wdisp16(dest_pos, inst_pos);
149 }
150 break;
151 }
152 default: ShouldNotReachHere();
153 }
154 }
155 return inst & ~m | v;
156 }
157
158 // Return the offset of the branch destionation of instruction inst
159 // at offset pos.
160 // Should have pcs, but since all is relative, it works out.
161 int Assembler::branch_destination(int inst, int pos) {
162 int r;
163 switch (inv_op(inst)) {
164 default: ShouldNotReachHere();
165 case call_op: r = inv_wdisp(inst, pos, 30); break;
166 case branch_op:
167 switch (inv_op2(inst)) {
168 case fbp_op2: r = inv_wdisp( inst, pos, 19); break;
169 case bp_op2: r = inv_wdisp( inst, pos, 19); break;
170 case fb_op2: r = inv_wdisp( inst, pos, 22); break;
171 case br_op2: r = inv_wdisp( inst, pos, 22); break;
172 case cb_op2: r = inv_wdisp( inst, pos, 22); break;
173 case bpr_op2: {
174 if (is_cbcond(inst)) {
175 r = inv_wdisp10(inst, pos);
176 } else {
177 r = inv_wdisp16(inst, pos);
178 }
179 break;
180 }
181 default: ShouldNotReachHere();
182 }
183 }
184 return r;
185 }
186
187 int AbstractAssembler::code_fill_byte() {
188 return 0x00; // illegal instruction 0x00000000
189 }
190
191 Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
192 switch (in) {
193 case rc_z: return equal;
194 case rc_lez: return lessEqual;
974 }
975 }
976
977
978 // %%% maybe get rid of [re]set_last_Java_frame
979 void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_Java_pc) {
980 assert_not_delayed();
981 Address flags(G2_thread, JavaThread::frame_anchor_offset() +
982 JavaFrameAnchor::flags_offset());
983 Address pc_addr(G2_thread, JavaThread::last_Java_pc_offset());
984
985 // Always set last_Java_pc and flags first because once last_Java_sp is visible
986 // has_last_Java_frame is true and users will look at the rest of the fields.
987 // (Note: flags should always be zero before we get here so doesn't need to be set.)
988
989 #ifdef ASSERT
990 // Verify that flags was zeroed on return to Java
991 Label PcOk;
992 save_frame(0); // to avoid clobbering O0
993 ld_ptr(pc_addr, L0);
994 br_null_short(L0, Assembler::pt, PcOk);
995 stop("last_Java_pc not zeroed before leaving Java");
996 bind(PcOk);
997
998 // Verify that flags was zeroed on return to Java
999 Label FlagsOk;
1000 ld(flags, L0);
1001 tst(L0);
1002 br(Assembler::zero, false, Assembler::pt, FlagsOk);
1003 delayed() -> restore();
1004 stop("flags not zeroed before leaving Java");
1005 bind(FlagsOk);
1006 #endif /* ASSERT */
1007 //
1008 // When returning from calling out from Java mode the frame anchor's last_Java_pc
1009 // will always be set to NULL. It is set here so that if we are doing a call to
1010 // native (not VM) that we capture the known pc and don't have to rely on the
1011 // native call having a standard frame linkage where we can find the pc.
1012
1013 if (last_Java_pc->is_valid()) {
1014 st_ptr(last_Java_pc, pc_addr);
1099 set(badHeapWordVal, G3);
1100 set(badHeapWordVal, G4);
1101 set(badHeapWordVal, G5);
1102 #endif
1103
1104 // get oop result if there is one and reset the value in the thread
1105 if (oop_result->is_valid()) {
1106 get_vm_result(oop_result);
1107 }
1108 }
1109
1110 void MacroAssembler::check_and_forward_exception(Register scratch_reg)
1111 {
1112 Label L;
1113
1114 check_and_handle_popframe(scratch_reg);
1115 check_and_handle_earlyret(scratch_reg);
1116
1117 Address exception_addr(G2_thread, Thread::pending_exception_offset());
1118 ld_ptr(exception_addr, scratch_reg);
1119 br_null_short(scratch_reg, pt, L);
1120 // we use O7 linkage so that forward_exception_entry has the issuing PC
1121 call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1122 delayed()->nop();
1123 bind(L);
1124 }
1125
1126
1127 void MacroAssembler::check_and_handle_popframe(Register scratch_reg) {
1128 }
1129
1130
1131 void MacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
1132 }
1133
1134
1135 void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1136 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
1137 }
1138
1139
1873 Register O2_adr = O2;
1874 Register O3_accum = O3;
1875 inc_counter(StubRoutines::verify_oop_count_addr(), O2_adr, O3_accum);
1876 }
1877
1878 Register O2_mask = O2;
1879 Register O3_bits = O3;
1880 Register O4_temp = O4;
1881
1882 // mark lower end of faulting range
1883 assert(_verify_oop_implicit_branch[0] == NULL, "set once");
1884 _verify_oop_implicit_branch[0] = pc();
1885
1886 // We can't check the mark oop because it could be in the process of
1887 // locking or unlocking while this is running.
1888 set(Universe::verify_oop_mask (), O2_mask);
1889 set(Universe::verify_oop_bits (), O3_bits);
1890
1891 // assert((obj & oop_mask) == oop_bits);
1892 and3(O0_obj, O2_mask, O4_temp);
1893 cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail);
1894
1895 if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
1896 // the null_or_fail case is useless; must test for null separately
1897 br_null_short(O0_obj, pn, succeed);
1898 }
1899
1900 // Check the klassOop of this object for being in the right area of memory.
1901 // Cannot do the load in the delay above slot in case O0 is null
1902 load_klass(O0_obj, O0_obj);
1903 // assert((klass & klass_mask) == klass_bits);
1904 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
1905 set(Universe::verify_klass_mask(), O2_mask);
1906 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
1907 set(Universe::verify_klass_bits(), O3_bits);
1908 and3(O0_obj, O2_mask, O4_temp);
1909 cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, fail);
1910 // Check the klass's klass
1911 load_klass(O0_obj, O0_obj);
1912 and3(O0_obj, O2_mask, O4_temp);
1913 cmp(O4_temp, O3_bits);
1914 brx(notEqual, false, pn, fail);
1915 delayed()->wrccr( O5_save_flags ); // Restore CCR's
1916
1917 // mark upper end of faulting range
1918 _verify_oop_implicit_branch[1] = pc();
1919
1920 //-----------------------
1921 // all tests pass
1922 bind(succeed);
1923
1924 // Restore prior 64-bit registers
1925 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+0*8,O0);
1926 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+1*8,O1);
1927 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+2*8,O2);
1928 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+3*8,O3);
1929 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+4*8,O4);
2117 }
2118
2119
2120 // ---------------------------------------------------------
2121 Assembler::RCondition cond2rcond(Assembler::Condition c) {
2122 switch (c) {
2123 /*case zero: */
2124 case Assembler::equal: return Assembler::rc_z;
2125 case Assembler::lessEqual: return Assembler::rc_lez;
2126 case Assembler::less: return Assembler::rc_lz;
2127 /*case notZero:*/
2128 case Assembler::notEqual: return Assembler::rc_nz;
2129 case Assembler::greater: return Assembler::rc_gz;
2130 case Assembler::greaterEqual: return Assembler::rc_gez;
2131 }
2132 ShouldNotReachHere();
2133 return Assembler::rc_z;
2134 }
2135
2136 // compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS
2137 void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {
2138 tst(s1);
2139 br (c, a, p, L);
2140 }
2141
2142 // Compares a pointer register with zero and branches on null.
2143 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
2144 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L, bool emit_delayed_nop ) {
2145 assert_not_delayed();
2146 if (emit_delayed_nop && use_cbc(L)) {
2147 Assembler::cbc(zero, ptr_cc, s1, 0, L);
2148 return;
2149 }
2150 #ifdef _LP64
2151 bpr( rc_z, a, p, s1, L );
2152 #else
2153 tst(s1);
2154 br ( zero, a, p, L );
2155 #endif
2156 // Some callers can fill the delay slot.
2157 if (emit_delayed_nop) {
2158 delayed()->nop();
2159 }
2205 }
2206 // Some callers can fill the delay slot.
2207 if (emit_delayed_nop) {
2208 delayed()->nop();
2209 }
2210 }
2211
2212 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
2213 void MacroAssembler::cmp_and_br(Register s1, Register s2, Condition c,
2214 bool a, Predict p, Label& L) {
2215 assert_not_delayed();
2216 if (use_cbc(L)) {
2217 Assembler::cbc(c, icc, s1, s2, L);
2218 } else {
2219 cmp(s1, s2);
2220 br(c, a, p, L);
2221 delayed()->nop();
2222 }
2223 }
2224
2225 // Compare integer (32 bit) values (icc only).
2226 void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
2227 Predict p, Label& L) {
2228 assert_not_delayed();
2229 if (use_cbcond(L)) {
2230 Assembler::cbcond(c, icc, s1, s2, L);
2231 } else {
2232 cmp(s1, s2);
2233 br(c, false, p, L);
2234 delayed()->nop();
2235 }
2236 }
2237
2238 // Compare integer (32 bit) values (icc only).
2239 void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
2240 Predict p, Label& L) {
2241 assert_not_delayed();
2242 if (is_simm(simm13a,5) && use_cbcond(L)) {
2243 Assembler::cbcond(c, icc, s1, simm13a, L);
2244 } else {
2245 cmp(s1, simm13a);
2246 br(c, false, p, L);
2247 delayed()->nop();
2248 }
2249 }
2250
2251 // Branch that tests xcc in LP64 and icc in !LP64
2252 void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c,
2253 Predict p, Label& L) {
2254 assert_not_delayed();
2255 if (use_cbcond(L)) {
2256 Assembler::cbcond(c, ptr_cc, s1, s2, L);
2257 } else {
2258 cmp(s1, s2);
2259 brx(c, false, p, L);
2260 delayed()->nop();
2261 }
2262 }
2263
2264 // Branch that tests xcc in LP64 and icc in !LP64
2265 void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c,
2266 Predict p, Label& L) {
2267 assert_not_delayed();
2268 if (is_simm(simm13a,5) && use_cbcond(L)) {
2269 Assembler::cbcond(c, ptr_cc, s1, simm13a, L);
2270 } else {
2271 cmp(s1, simm13a);
2272 brx(c, false, p, L);
2273 delayed()->nop();
2274 }
2275 }
2276
2277 // Short branch version for compares a pointer with zero.
2278
2279 void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
2280 assert_not_delayed();
2281 if (use_cbcond(L)) {
2282 Assembler::cbcond(zero, ptr_cc, s1, 0, L);
2283 return;
2284 }
2285 br_null(s1, false, p, L);
2286 delayed()->nop();
2287 }
2288
2289 void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
2290 assert_not_delayed();
2291 if (use_cbcond(L)) {
2292 Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
2293 return;
2294 }
2295 br_notnull(s1, false, p, L);
2296 delayed()->nop();
2297 }
2298
2299 // Unconditional short branch
2300 void MacroAssembler::ba_short(Label& L) {
2301 if (use_cbcond(L)) {
2302 Assembler::cbcond(equal, icc, G0, G0, L);
2303 return;
2304 }
2305 br(always, false, pt, L);
2306 delayed()->nop();
2307 }
2308
2309 // instruction sequences factored across compiler & interpreter
2310
2311
2312 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
2313 Register Rb_hi, Register Rb_low,
2314 Register Rresult) {
2315
2316 Label check_low_parts, done;
2317
2318 cmp(Ra_hi, Rb_hi ); // compare hi parts
2319 br(equal, true, pt, check_low_parts);
2320 delayed()->cmp(Ra_low, Rb_low); // test low parts
2321
2322 // And, with an unsigned comparison, it does not matter if the numbers
2323 // are negative or not.
2324 // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff.
2325 // The second one is bigger (unsignedly).
2326
2327 // Other notes: The first move in each triplet can be unconditional
2328 // (and therefore probably prefetchable).
2329 // And the equals case for the high part does not need testing,
2330 // since that triplet is reached only after finding the high halves differ.
2331
2332 if (VM_Version::v9_instructions_work()) {
2333 mov(-1, Rresult);
2334 ba(done); delayed()-> movcc(greater, false, icc, 1, Rresult);
2335 } else {
2336 br(less, true, pt, done); delayed()-> set(-1, Rresult);
2337 br(greater, true, pt, done); delayed()-> set( 1, Rresult);
2338 }
2339
2340 bind( check_low_parts );
2341
2342 if (VM_Version::v9_instructions_work()) {
2343 mov( -1, Rresult);
2344 movcc(equal, false, icc, 0, Rresult);
2345 movcc(greaterUnsigned, false, icc, 1, Rresult);
2346 } else {
2347 set(-1, Rresult);
2348 br(equal, true, pt, done); delayed()->set( 0, Rresult);
2349 br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult);
2350 }
2351 bind( done );
2352 }
2353
2354 void MacroAssembler::lneg( Register Rhi, Register Rlow ) {
2389
2390 // We get the transfer bits by shifting right by 32-count the low
2391 // register. This is done by shifting right by 31-count and then by one
2392 // more to take care of the special (rare) case where count is zero
2393 // (shifting by 32 would not work).
2394
2395 neg(Ralt_count);
2396
2397 // The order of the next two instructions is critical in the case where
2398 // Rin and Rout are the same and should not be reversed.
2399
2400 srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count
2401 if (Rcount != Rout_low) {
2402 sll(Rin_low, Rcount, Rout_low); // low half
2403 }
2404 sll(Rin_high, Rcount, Rout_high);
2405 if (Rcount == Rout_low) {
2406 sll(Rin_low, Rcount, Rout_low); // low half
2407 }
2408 srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
2409 ba(done);
2410 delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low
2411
2412 // shift >= 32 bits, Ralt_count = Rcount-32
2413 bind(big_shift);
2414 sll(Rin_low, Ralt_count, Rout_high );
2415 clr(Rout_low);
2416
2417 bind(done);
2418 }
2419
2420
2421 void MacroAssembler::lshr( Register Rin_high, Register Rin_low,
2422 Register Rcount,
2423 Register Rout_high, Register Rout_low,
2424 Register Rtemp ) {
2425
2426 Register Ralt_count = Rtemp;
2427 Register Rxfer_bits = Rtemp;
2428
2429 assert( Ralt_count != Rin_high
2450
2451 // We get the transfer bits by shifting left by 32-count the high
2452 // register. This is done by shifting left by 31-count and then by one
2453 // more to take care of the special (rare) case where count is zero
2454 // (shifting by 32 would not work).
2455
2456 neg(Ralt_count);
2457 if (Rcount != Rout_low) {
2458 srl(Rin_low, Rcount, Rout_low);
2459 }
2460
2461 // The order of the next two instructions is critical in the case where
2462 // Rin and Rout are the same and should not be reversed.
2463
2464 sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
2465 sra(Rin_high, Rcount, Rout_high ); // high half
2466 sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
2467 if (Rcount == Rout_low) {
2468 srl(Rin_low, Rcount, Rout_low);
2469 }
2470 ba(done);
2471 delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
2472
2473 // shift >= 32 bits, Ralt_count = Rcount-32
2474 bind(big_shift);
2475
2476 sra(Rin_high, Ralt_count, Rout_low);
2477 sra(Rin_high, 31, Rout_high); // sign into hi
2478
2479 bind( done );
2480 }
2481
2482
2483
2484 void MacroAssembler::lushr( Register Rin_high, Register Rin_low,
2485 Register Rcount,
2486 Register Rout_high, Register Rout_low,
2487 Register Rtemp ) {
2488
2489 Register Ralt_count = Rtemp;
2490 Register Rxfer_bits = Rtemp;
2513
2514 // We get the transfer bits by shifting left by 32-count the high
2515 // register. This is done by shifting left by 31-count and then by one
2516 // more to take care of the special (rare) case where count is zero
2517 // (shifting by 32 would not work).
2518
2519 neg(Ralt_count);
2520 if (Rcount != Rout_low) {
2521 srl(Rin_low, Rcount, Rout_low);
2522 }
2523
2524 // The order of the next two instructions is critical in the case where
2525 // Rin and Rout are the same and should not be reversed.
2526
2527 sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
2528 srl(Rin_high, Rcount, Rout_high ); // high half
2529 sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
2530 if (Rcount == Rout_low) {
2531 srl(Rin_low, Rcount, Rout_low);
2532 }
2533 ba(done);
2534 delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
2535
2536 // shift >= 32 bits, Ralt_count = Rcount-32
2537 bind(big_shift);
2538
2539 srl(Rin_high, Ralt_count, Rout_low);
2540 clr(Rout_high);
2541
2542 bind( done );
2543 }
2544
2545 #ifdef _LP64
2546 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
2547 cmp(Ra, Rb);
2548 mov(-1, Rresult);
2549 movcc(equal, false, xcc, 0, Rresult);
2550 movcc(greater, false, xcc, 1, Rresult);
2551 }
2552 #endif
2553
2777 if (top_reg_after_save == L1) {
2778 ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save);
2779 }
2780
2781 if (ptr_reg_after_save == L2) {
2782 ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save);
2783 }
2784
2785 Label(retry_get_lock);
2786 Label(not_same);
2787 Label(dont_yield);
2788
2789 assert(lock_addr, "lock_address should be non null for v8");
2790 set((intptr_t)lock_addr, lock_ptr_reg);
2791 // Initialize yield counter
2792 mov(G0,yield_reg);
2793 mov(G0, yieldall_reg);
2794 set(StubRoutines::Sparc::locked, lock_reg);
2795
2796 bind(retry_get_lock);
2797 cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield);
2798
2799 if(use_call_vm) {
2800 Untested("Need to verify global reg consistancy");
2801 call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
2802 } else {
2803 // Save the regs and make space for a C call
2804 save(SP, -96, SP);
2805 save_all_globals_into_locals();
2806 call(CAST_FROM_FN_PTR(address,os::yield_all));
2807 delayed()->mov(yieldall_reg, O0);
2808 restore_globals_from_locals();
2809 restore();
2810 }
2811
2812 // reset the counter
2813 mov(G0,yield_reg);
2814 add(yieldall_reg, 1, yieldall_reg);
2815
2816 bind(dont_yield);
2817 // try to get lock
2818 swap(lock_ptr_reg, 0, lock_reg);
2819
2820 // did we get the lock?
2821 cmp(lock_reg, StubRoutines::Sparc::unlocked);
2822 br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
2823 delayed()->add(yield_reg,1,yield_reg);
2824
2825 // yes, got lock. do we have the same top?
2826 ld(top_ptr_reg_after_save, 0, value_reg);
2827 cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same);
2828
2829 // yes, same top.
2830 st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
2831 membar(Assembler::StoreStore);
2832
2833 bind(not_same);
2834 mov(value_reg, ptr_reg_after_save);
2835 st(lock_reg, lock_ptr_reg, 0); // unlock
2836
2837 restore();
2838 }
2839 }
2840
2841 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2842 Register tmp,
2843 int offset) {
2844 intptr_t value = *delayed_value_addr;
2845 if (value != 0)
2846 return RegisterOrConstant(value + offset);
2847
3057 Register super_klass,
3058 Register temp_reg,
3059 Register temp2_reg,
3060 Label& L_success) {
3061 Label L_failure, L_pop_to_failure;
3062 check_klass_subtype_fast_path(sub_klass, super_klass,
3063 temp_reg, temp2_reg,
3064 &L_success, &L_failure, NULL);
3065 Register sub_2 = sub_klass;
3066 Register sup_2 = super_klass;
3067 if (!sub_2->is_global()) sub_2 = L0;
3068 if (!sup_2->is_global()) sup_2 = L1;
3069
3070 save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
3071 check_klass_subtype_slow_path(sub_2, sup_2,
3072 L2, L3, L4, L5,
3073 NULL, &L_pop_to_failure);
3074
3075 // on success:
3076 restore();
3077 ba_short(L_success);
3078
3079 // on failure:
3080 bind(L_pop_to_failure);
3081 restore();
3082 bind(L_failure);
3083 }
3084
3085
3086 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3087 Register super_klass,
3088 Register temp_reg,
3089 Register temp2_reg,
3090 Label* L_success,
3091 Label* L_failure,
3092 Label* L_slow_path,
3093 RegisterOrConstant super_check_offset) {
3094 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
3095 Klass::secondary_super_cache_offset_in_bytes());
3096 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
3097 Klass::super_check_offset_offset_in_bytes());
3136 // super_check_offset is register.
3137 assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset.as_register());
3138 }
3139 ld_ptr(sub_klass, super_check_offset, temp_reg);
3140 cmp(super_klass, temp_reg);
3141
3142 // This check has worked decisively for primary supers.
3143 // Secondary supers are sought in the super_cache ('super_cache_addr').
3144 // (Secondary supers are interfaces and very deeply nested subtypes.)
3145 // This works in the same check above because of a tricky aliasing
3146 // between the super_cache and the primary super display elements.
3147 // (The 'super_check_addr' can address either, as the case requires.)
3148 // Note that the cache is updated below if it does not help us find
3149 // what we need immediately.
3150 // So if it was a primary super, we can just fail immediately.
3151 // Otherwise, it's the slow path for us (no success at this point).
3152
3153 // Hacked ba(), which may only be used just before L_fallthrough.
3154 #define FINAL_JUMP(label) \
3155 if (&(label) != &L_fallthrough) { \
3156 ba(label); delayed()->nop(); \
3157 }
3158
3159 if (super_check_offset.is_register()) {
3160 brx(Assembler::equal, false, Assembler::pn, *L_success);
3161 delayed()->cmp(super_check_offset.as_register(), sc_offset);
3162
3163 if (L_failure == &L_fallthrough) {
3164 brx(Assembler::equal, false, Assembler::pt, *L_slow_path);
3165 delayed()->nop();
3166 } else {
3167 brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
3168 delayed()->nop();
3169 FINAL_JUMP(*L_slow_path);
3170 }
3171 } else if (super_check_offset.as_constant() == sc_offset) {
3172 // Need a slow path; fast failure is impossible.
3173 if (L_slow_path == &L_fallthrough) {
3174 brx(Assembler::equal, false, Assembler::pt, *L_success);
3175 delayed()->nop();
3176 } else {
3268 // Don't use load_heap_oop; we don't want to decode the element.
3269 lduw( scan_temp, elem_offset, scratch_reg );
3270 } else {
3271 ld_ptr( scan_temp, elem_offset, scratch_reg );
3272 }
3273
3274 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
3275 cmp(scratch_reg, search_key);
3276
3277 // A miss means we are NOT a subtype and need to keep looping
3278 brx(Assembler::notEqual, false, Assembler::pn, L_loop);
3279 delayed()->deccc(count_temp); // decrement trip counter in delay slot
3280
3281 // Falling out the bottom means we found a hit; we ARE a subtype
3282 if (decode_super_klass) decode_heap_oop(super_klass);
3283
3284 // Success. Cache the super we found and proceed in triumph.
3285 st_ptr(super_klass, sub_klass, sc_offset);
3286
3287 if (L_success != &L_fallthrough) {
3288 ba(*L_success);
3289 delayed()->nop();
3290 }
3291
3292 bind(L_fallthrough);
3293 }
3294
3295
3296 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
3297 Register temp_reg,
3298 Label& wrong_method_type) {
3299 assert_different_registers(mtype_reg, mh_reg, temp_reg);
3300 // compare method type against that of the receiver
3301 RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
3302 load_heap_oop(mh_reg, mhtype_offset, temp_reg);
3303 cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type);
3304 }
3305
3306
3307 // A method handle has a "vmslots" field which gives the size of its
3308 // argument list in JVM stack slots. This field is either located directly
3309 // in every method handle, or else is indirectly accessed through the
3310 // method handle's MethodType. This macro hides the distinction.
3311 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
3312 Register temp_reg) {
3313 assert_different_registers(vmslots_reg, mh_reg, temp_reg);
3314 // load mh.type.form.vmslots
3315 if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) {
3316 // hoist vmslots into every mh to avoid dependent load chain
3317 ld( Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
3318 } else {
3319 Register temp2_reg = vmslots_reg;
3320 load_heap_oop(Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)), temp2_reg);
3321 load_heap_oop(Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)), temp2_reg);
3322 ld( Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
3323 }
3376 Register temp_reg,
3377 Label& done, Label* slow_case,
3378 BiasedLockingCounters* counters) {
3379 assert(UseBiasedLocking, "why call this otherwise?");
3380
3381 if (PrintBiasedLockingStatistics) {
3382 assert_different_registers(obj_reg, mark_reg, temp_reg, O7);
3383 if (counters == NULL)
3384 counters = BiasedLocking::counters();
3385 }
3386
3387 Label cas_label;
3388
3389 // Biased locking
3390 // See whether the lock is currently biased toward our thread and
3391 // whether the epoch is still valid
3392 // Note that the runtime guarantees sufficient alignment of JavaThread
3393 // pointers to allow age to be placed into low bits
3394 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
3395 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3396 cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label);
3397
3398 load_klass(obj_reg, temp_reg);
3399 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3400 or3(G2_thread, temp_reg, temp_reg);
3401 xor3(mark_reg, temp_reg, temp_reg);
3402 andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
3403 if (counters != NULL) {
3404 cond_inc(Assembler::equal, (address) counters->biased_lock_entry_count_addr(), mark_reg, temp_reg);
3405 // Reload mark_reg as we may need it later
3406 ld_ptr(Address(obj_reg, oopDesc::mark_offset_in_bytes()), mark_reg);
3407 }
3408 brx(Assembler::equal, true, Assembler::pt, done);
3409 delayed()->nop();
3410
3411 Label try_revoke_bias;
3412 Label try_rebias;
3413 Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
3414 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3415
3416 // At this point we know that the header has the bias pattern and
3443 // fails we will go in to the runtime to revoke the object's bias.
3444 // Note that we first construct the presumed unbiased header so we
3445 // don't accidentally blow away another thread's valid bias.
3446 delayed()->and3(mark_reg,
3447 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
3448 mark_reg);
3449 or3(G2_thread, mark_reg, temp_reg);
3450 casn(mark_addr.base(), mark_reg, temp_reg);
3451 // If the biasing toward our thread failed, this means that
3452 // another thread succeeded in biasing it toward itself and we
3453 // need to revoke that bias. The revocation will occur in the
3454 // interpreter runtime in the slow case.
3455 cmp(mark_reg, temp_reg);
3456 if (counters != NULL) {
3457 cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg);
3458 }
3459 if (slow_case != NULL) {
3460 brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3461 delayed()->nop();
3462 }
3463 ba_short(done);
3464
3465 bind(try_rebias);
3466 // At this point we know the epoch has expired, meaning that the
3467 // current "bias owner", if any, is actually invalid. Under these
3468 // circumstances _only_, we are allowed to use the current header's
3469 // value as the comparison value when doing the cas to acquire the
3470 // bias in the current epoch. In other words, we allow transfer of
3471 // the bias from one thread to another directly in this situation.
3472 //
3473 // FIXME: due to a lack of registers we currently blow away the age
3474 // bits in this situation. Should attempt to preserve them.
3475 load_klass(obj_reg, temp_reg);
3476 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3477 or3(G2_thread, temp_reg, temp_reg);
3478 casn(mark_addr.base(), mark_reg, temp_reg);
3479 // If the biasing toward our thread failed, this means that
3480 // another thread succeeded in biasing it toward itself and we
3481 // need to revoke that bias. The revocation will occur in the
3482 // interpreter runtime in the slow case.
3483 cmp(mark_reg, temp_reg);
3484 if (counters != NULL) {
3485 cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg);
3486 }
3487 if (slow_case != NULL) {
3488 brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3489 delayed()->nop();
3490 }
3491 ba_short(done);
3492
3493 bind(try_revoke_bias);
3494 // The prototype mark in the klass doesn't have the bias bit set any
3495 // more, indicating that objects of this data type are not supposed
3496 // to be biased any more. We are going to try to reset the mark of
3497 // this object to the prototype value and fall through to the
3498 // CAS-based locking scheme. Note that if our CAS fails, it means
3499 // that another thread raced us for the privilege of revoking the
3500 // bias of this particular object, so it's okay to continue in the
3501 // normal locking code.
3502 //
3503 // FIXME: due to a lack of registers we currently blow away the age
3504 // bits in this situation. Should attempt to preserve them.
3505 load_klass(obj_reg, temp_reg);
3506 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3507 casn(mark_addr.base(), mark_reg, temp_reg);
3508 // Fall through to the normal CAS-based lock, because no matter what
3509 // the result of the above CAS, some thread must have succeeded in
3510 // removing the bias bit from the object's header.
3511 if (counters != NULL) {
3522 // Note: we do not have to check the thread ID for two reasons.
3523 // First, the interpreter checks for IllegalMonitorStateException at
3524 // a higher level. Second, if the bias was revoked while we held the
3525 // lock, the object could not be rebiased toward another thread, so
3526 // the bias bit would be clear.
3527 ld_ptr(mark_addr, temp_reg);
3528 and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3529 cmp(temp_reg, markOopDesc::biased_lock_pattern);
3530 brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done);
3531 delayed();
3532 if (!allow_delay_slot_filling) {
3533 nop();
3534 }
3535 }
3536
3537
3538 // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
3539 // Solaris/SPARC's "as". Another apt name would be cas_ptr()
3540
3541 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
3542 casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3543 }
3544
3545
3546
3547 // compiler_lock_object() and compiler_unlock_object() are direct transliterations
3548 // of i486.ad fast_lock() and fast_unlock(). See those methods for detailed comments.
3549 // The code could be tightened up considerably.
3550 //
3551 // box->dhw disposition - post-conditions at DONE_LABEL.
3552 // - Successful inflated lock: box->dhw != 0.
3553 // Any non-zero value suffices.
3554 // Consider G2_thread, rsp, boxReg, or unused_mark()
3555 // - Successful Stack-lock: box->dhw == mark.
3556 // box->dhw must contain the displaced mark word value
3557 // - Failure -- icc.ZFlag == 0 and box->dhw is undefined.
3558 // The slow-path fast_enter() and slow_enter() operators
3559 // are responsible for setting box->dhw = NonZero (typically ::unused_mark).
3560 // - Biased: box->dhw is undefined
3561 //
3562 // SPARC refworkload performance - specifically jetstream and scimark - are
3563 // extremely sensitive to the size of the code emitted by compiler_lock_object
3564 // and compiler_unlock_object. Critically, the key factor is code size, not path
3565 // length. (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
3566 // effect).
3567
3568
3569 void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
3570 Register Rbox, Register Rscratch,
3571 BiasedLockingCounters* counters,
3572 bool try_bias) {
3573 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3574
3575 verify_oop(Roop);
3576 Label done ;
3577
3578 if (counters != NULL) {
3579 inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
3580 }
3581
3582 if (EmitSync & 1) {
3583 mov(3, Rscratch);
3584 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3585 cmp(SP, G0);
3586 return ;
3587 }
3588
3589 if (EmitSync & 2) {
3590
3591 // Fetch object's markword
3592 ld_ptr(mark_addr, Rmark);
3593
3594 if (try_bias) {
3595 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3596 }
3597
3598 // Save Rbox in Rscratch to be used for the cas operation
3599 mov(Rbox, Rscratch);
3600
3601 // set Rmark to markOop | markOopDesc::unlocked_value
3602 or3(Rmark, markOopDesc::unlocked_value, Rmark);
3603
3604 // Initialize the box. (Must happen before we update the object mark!)
3605 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3606
3607 // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
3608 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3609 casx_under_lock(mark_addr.base(), Rmark, Rscratch,
3610 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3611
3612 // if compare/exchange succeeded we found an unlocked object and we now have locked it
3613 // hence we are done
3614 cmp(Rmark, Rscratch);
3615 #ifdef _LP64
3616 sub(Rscratch, STACK_BIAS, Rscratch);
3617 #endif
3618 brx(Assembler::equal, false, Assembler::pt, done);
3619 delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot
3620
3621 // we did not find an unlocked object so see if this is a recursive case
3622 // sub(Rscratch, SP, Rscratch);
3623 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3624 andcc(Rscratch, 0xfffff003, Rscratch);
3625 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3626 bind (done);
3627 return ;
3628 }
3629
3630 Label Egress ;
3631
3632 if (EmitSync & 256) {
3633 Label IsInflated ;
3634
3635 ld_ptr(mark_addr, Rmark); // fetch obj->mark
3636 // Triage: biased, stack-locked, neutral, inflated
3637 if (try_bias) {
3638 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3639 // Invariant: if control reaches this point in the emitted stream
3640 // then Rmark has not been modified.
3641 }
3642
3643 // Store mark into displaced mark field in the on-stack basic-lock "box"
3644 // Critically, this must happen before the CAS
3645 // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
3646 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3647 andcc(Rmark, 2, G0);
3648 brx(Assembler::notZero, false, Assembler::pn, IsInflated);
3649 delayed()->
3650
3651 // Try stack-lock acquisition.
3652 // Beware: the 1st instruction is in a delay slot
3653 mov(Rbox, Rscratch);
3654 or3(Rmark, markOopDesc::unlocked_value, Rmark);
3655 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3656 casn(mark_addr.base(), Rmark, Rscratch);
3657 cmp(Rmark, Rscratch);
3658 brx(Assembler::equal, false, Assembler::pt, done);
3659 delayed()->sub(Rscratch, SP, Rscratch);
3660
3661 // Stack-lock attempt failed - check for recursive stack-lock.
3662 // See the comments below about how we might remove this case.
3663 #ifdef _LP64
3664 sub(Rscratch, STACK_BIAS, Rscratch);
3665 #endif
3666 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3667 andcc(Rscratch, 0xfffff003, Rscratch);
3668 br(Assembler::always, false, Assembler::pt, done);
3669 delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3670
3671 bind(IsInflated);
3672 if (EmitSync & 64) {
3673 // If m->owner != null goto IsLocked
3674 // Pessimistic form: Test-and-CAS vs CAS
3675 // The optimistic form avoids RTS->RTO cache line upgrades.
3676 ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3677 andcc(Rscratch, Rscratch, G0);
3678 brx(Assembler::notZero, false, Assembler::pn, done);
3679 delayed()->nop();
3680 // m->owner == null : it's unlocked.
3681 }
3682
3683 // Try to CAS m->owner from null to Self
3684 // Invariant: if we acquire the lock then _recursions should be 0.
3685 add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3686 mov(G2_thread, Rscratch);
3687 casn(Rmark, G0, Rscratch);
3688 cmp(Rscratch, G0);
3689 // Intentional fall-through into done
3690 } else {
3691 // Aggressively avoid the Store-before-CAS penalty
3692 // Defer the store into box->dhw until after the CAS
3693 Label IsInflated, Recursive ;
3694
3695 // Anticipate CAS -- Avoid RTS->RTO upgrade
3696 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
3697
3698 ld_ptr(mark_addr, Rmark); // fetch obj->mark
3699 // Triage: biased, stack-locked, neutral, inflated
3700
3701 if (try_bias) {
3702 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3703 // Invariant: if control reaches this point in the emitted stream
3704 // then Rmark has not been modified.
3705 }
3706 andcc(Rmark, 2, G0);
3707 brx(Assembler::notZero, false, Assembler::pn, IsInflated);
3708 delayed()-> // Beware - dangling delay-slot
3709
3710 // Try stack-lock acquisition.
3711 // Transiently install BUSY (0) encoding in the mark word.
3712 // if the CAS of 0 into the mark was successful then we execute:
3713 // ST box->dhw = mark -- save fetched mark in on-stack basiclock box
3714 // ST obj->mark = box -- overwrite transient 0 value
3715 // This presumes TSO, of course.
3716
3717 mov(0, Rscratch);
3718 or3(Rmark, markOopDesc::unlocked_value, Rmark);
3719 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3720 casn(mark_addr.base(), Rmark, Rscratch);
3721 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
3722 cmp(Rscratch, Rmark);
3723 brx(Assembler::notZero, false, Assembler::pn, Recursive);
3724 delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3725 if (counters != NULL) {
3726 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3727 }
3728 ba(done);
3729 delayed()->st_ptr(Rbox, mark_addr);
3730
3731 bind(Recursive);
3732 // Stack-lock attempt failed - check for recursive stack-lock.
3733 // Tests show that we can remove the recursive case with no impact
3734 // on refworkload 0.83. If we need to reduce the size of the code
3735 // emitted by compiler_lock_object() the recursive case is perfect
3736 // candidate.
3737 //
3738 // A more extreme idea is to always inflate on stack-lock recursion.
3739 // This lets us eliminate the recursive checks in compiler_lock_object
3740 // and compiler_unlock_object and the (box->dhw == 0) encoding.
3741 // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
3742 // and showed a performance *increase*. In the same experiment I eliminated
3743 // the fast-path stack-lock code from the interpreter and always passed
3744 // control to the "slow" operators in synchronizer.cpp.
3745
3746 // RScratch contains the fetched obj->mark value from the failed CASN.
3747 #ifdef _LP64
3748 sub(Rscratch, STACK_BIAS, Rscratch);
3749 #endif
3750 sub(Rscratch, SP, Rscratch);
3751 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3752 andcc(Rscratch, 0xfffff003, Rscratch);
3753 if (counters != NULL) {
3754 // Accounting needs the Rscratch register
3755 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3756 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3757 ba_short(done);
3758 } else {
3759 ba(done);
3760 delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3761 }
3762
3763 bind (IsInflated);
3764 if (EmitSync & 64) {
3765 // If m->owner != null goto IsLocked
3766 // Test-and-CAS vs CAS
3767 // Pessimistic form avoids futile (doomed) CAS attempts
3768 // The optimistic form avoids RTS->RTO cache line upgrades.
3769 ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3770 andcc(Rscratch, Rscratch, G0);
3771 brx(Assembler::notZero, false, Assembler::pn, done);
3772 delayed()->nop();
3773 // m->owner == null : it's unlocked.
3774 }
3775
3776 // Try to CAS m->owner from null to Self
3777 // Invariant: if we acquire the lock then _recursions should be 0.
3778 add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3779 mov(G2_thread, Rscratch);
3780 casn(Rmark, G0, Rscratch);
3781 cmp(Rscratch, G0);
3782 // ST box->displaced_header = NonZero.
3783 // Any non-zero value suffices:
3784 // unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
3785 st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
3786 // Intentional fall-through into done
3787 }
3788
3789 bind (done);
3790 }
3791
3792 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
3793 Register Rbox, Register Rscratch,
3794 bool try_bias) {
3795 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3796
3797 Label done ;
3798
3799 if (EmitSync & 4) {
3800 cmp(SP, G0);
3801 return ;
3802 }
3803
3804 if (EmitSync & 8) {
3805 if (try_bias) {
3806 biased_locking_exit(mark_addr, Rscratch, done);
3807 }
3808
3809 // Test first if it is a fast recursive unlock
3810 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
3811 br_null_short(Rmark, Assembler::pt, done);
3812
3813 // Check if it is still a light weight lock, this is is true if we see
3814 // the stack address of the basicLock in the markOop of the object
3815 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3816 casx_under_lock(mark_addr.base(), Rbox, Rmark,
3817 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3818 ba(done);
3819 delayed()->cmp(Rbox, Rmark);
3820 bind(done);
3821 return ;
3822 }
3823
3824 // Beware ... If the aggregate size of the code emitted by CLO and CUO is
3825 // is too large performance rolls abruptly off a cliff.
3826 // This could be related to inlining policies, code cache management, or
3827 // I$ effects.
3828 Label LStacked ;
3829
3830 if (try_bias) {
3831 // TODO: eliminate redundant LDs of obj->mark
3832 biased_locking_exit(mark_addr, Rscratch, done);
3833 }
3834
3835 ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark);
3836 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
3837 andcc(Rscratch, Rscratch, G0);
3838 brx(Assembler::zero, false, Assembler::pn, done);
3839 delayed()->nop(); // consider: relocate fetch of mark, above, into this DS
3840 andcc(Rmark, 2, G0);
3841 brx(Assembler::zero, false, Assembler::pt, LStacked);
3842 delayed()->nop();
3843
3844 // It's inflated
3845 // Conceptually we need a #loadstore|#storestore "release" MEMBAR before
3846 // the ST of 0 into _owner which releases the lock. This prevents loads
3847 // and stores within the critical section from reordering (floating)
3848 // past the store that releases the lock. But TSO is a strong memory model
3849 // and that particular flavor of barrier is a noop, so we can safely elide it.
3850 // Note that we use 1-0 locking by default for the inflated case. We
3851 // close the resultant (and rare) race by having contented threads in
3852 // monitorenter periodically poll _owner.
3853 ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3854 ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
3855 xor3(Rscratch, G2_thread, Rscratch);
3856 orcc(Rbox, Rscratch, Rbox);
3857 brx(Assembler::notZero, false, Assembler::pn, done);
3858 delayed()->
3859 ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
3860 ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
3861 orcc(Rbox, Rscratch, G0);
3862 if (EmitSync & 65536) {
3863 Label LSucc ;
3864 brx(Assembler::notZero, false, Assembler::pn, LSucc);
3865 delayed()->nop();
3866 ba(done);
3867 delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3868
3869 bind(LSucc);
3870 st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3871 if (os::is_MP()) { membar (StoreLoad); }
3872 ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
3873 andcc(Rscratch, Rscratch, G0);
3874 brx(Assembler::notZero, false, Assembler::pt, done);
3875 delayed()->andcc(G0, G0, G0);
3876 add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3877 mov(G2_thread, Rscratch);
3878 casn(Rmark, G0, Rscratch);
3879 // invert icc.zf and goto done
3880 br_notnull(Rscratch, false, Assembler::pt, done);
3881 delayed()->cmp(G0, G0);
3882 ba(done);
3883 delayed()->cmp(G0, 1);
3884 } else {
3885 brx(Assembler::notZero, false, Assembler::pn, done);
3886 delayed()->nop();
3887 ba(done);
3888 delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3889 }
3890
3891 bind (LStacked);
3892 // Consider: we could replace the expensive CAS in the exit
3893 // path with a simple ST of the displaced mark value fetched from
3894 // the on-stack basiclock box. That admits a race where a thread T2
3895 // in the slow lock path -- inflating with monitor M -- could race a
3896 // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
3897 // More precisely T1 in the stack-lock unlock path could "stomp" the
3898 // inflated mark value M installed by T2, resulting in an orphan
3899 // object monitor M and T2 becoming stranded. We can remedy that situation
3900 // by having T2 periodically poll the object's mark word using timed wait
3901 // operations. If T2 discovers that a stomp has occurred it vacates
3902 // the monitor M and wakes any other threads stranded on the now-orphan M.
3903 // In addition the monitor scavenger, which performs deflation,
3904 // would also need to check for orpan monitors and stranded threads.
3905 //
3906 // Finally, inflation is also used when T2 needs to assign a hashCode
3907 // to O and O is stack-locked by T1. The "stomp" race could cause
3908 // an assigned hashCode value to be lost. We can avoid that condition
3909 // and provide the necessary hashCode stability invariants by ensuring
3910 // that hashCode generation is idempotent between copying GCs.
3911 // For example we could compute the hashCode of an object O as
3912 // O's heap address XOR some high quality RNG value that is refreshed
3913 // at GC-time. The monitor scavenger would install the hashCode
3914 // found in any orphan monitors. Again, the mechanism admits a
3915 // lost-update "stomp" WAW race but detects and recovers as needed.
3916 //
3917 // A prototype implementation showed excellent results, although
3918 // the scavenger and timeout code was rather involved.
3919
3920 casn(mark_addr.base(), Rbox, Rscratch);
3921 cmp(Rbox, Rscratch);
3922 // Intentional fall through into done ...
3923
3924 bind(done);
3925 }
3926
3927
3928
3929 void MacroAssembler::print_CPU_state() {
3930 // %%%%% need to implement this
3931 }
3932
3933 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3934 // %%%%% need to implement this
3935 }
3936
3937 void MacroAssembler::push_IU_state() {
3938 // %%%%% need to implement this
3939 }
3940
3941
3942 void MacroAssembler::pop_IU_state() {
3943 // %%%%% need to implement this
3944 }
3960
3961
3962 void MacroAssembler::pop_CPU_state() {
3963 // %%%%% need to implement this
3964 }
3965
3966
3967
3968 void MacroAssembler::verify_tlab() {
3969 #ifdef ASSERT
3970 if (UseTLAB && VerifyOops) {
3971 Label next, next2, ok;
3972 Register t1 = L0;
3973 Register t2 = L1;
3974 Register t3 = L2;
3975
3976 save_frame(0);
3977 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3978 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
3979 or3(t1, t2, t3);
3980 cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next);
3981 stop("assert(top >= start)");
3982 should_not_reach_here();
3983
3984 bind(next);
3985 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3986 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
3987 or3(t3, t2, t3);
3988 cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2);
3989 stop("assert(top <= end)");
3990 should_not_reach_here();
3991
3992 bind(next2);
3993 and3(t3, MinObjAlignmentInBytesMask, t3);
3994 cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok);
3995 stop("assert(aligned)");
3996 should_not_reach_here();
3997
3998 bind(ok);
3999 restore();
4000 }
4001 #endif
4002 }
4003
4004
4005 void MacroAssembler::eden_allocate(
4006 Register obj, // result: pointer to object after successful allocation
4007 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
4008 int con_size_in_bytes, // object size in bytes if known at compile time
4009 Register t1, // temp register
4010 Register t2, // temp register
4011 Label& slow_case // continuation point if fast allocation fails
4012 ){
4013 // make sure arguments make sense
4014 assert_different_registers(obj, var_size_in_bytes, t1, t2);
4015 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
4016 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
4017
4018 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
4019 // No allocation in the shared eden.
4020 ba_short(slow_case);
4021 } else {
4022 // get eden boundaries
4023 // note: we need both top & top_addr!
4024 const Register top_addr = t1;
4025 const Register end = t2;
4026
4027 CollectedHeap* ch = Universe::heap();
4028 set((intx)ch->top_addr(), top_addr);
4029 intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
4030 ld_ptr(top_addr, delta, end);
4031 ld_ptr(top_addr, 0, obj);
4032
4033 // try to allocate
4034 Label retry;
4035 bind(retry);
4036 #ifdef ASSERT
4037 // make sure eden top is properly aligned
4038 {
4039 Label L;
4040 btst(MinObjAlignmentInBytesMask, obj);
4134 bind(L);
4135 }
4136 #endif // ASSERT
4137
4138 // update the tlab top pointer
4139 st_ptr(free, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
4140 verify_tlab();
4141 }
4142
4143
4144 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
4145 Register top = O0;
4146 Register t1 = G1;
4147 Register t2 = G3;
4148 Register t3 = O1;
4149 assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
4150 Label do_refill, discard_tlab;
4151
4152 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
4153 // No allocation in the shared eden.
4154 ba_short(slow_case);
4155 }
4156
4157 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
4158 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
4159 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
4160
4161 // calculate amount of free space
4162 sub(t1, top, t1);
4163 srl_ptr(t1, LogHeapWordSize, t1);
4164
4165 // Retain tlab and allocate object in shared space if
4166 // the amount free in the tlab is too large to discard.
4167 cmp(t1, t2);
4168 brx(Assembler::lessEqual, false, Assembler::pt, discard_tlab);
4169
4170 // increment waste limit to prevent getting stuck on this slow path
4171 delayed()->add(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment(), t2);
4172 st_ptr(t2, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
4173 if (TLABStats) {
4174 // increment number of slow_allocations
4175 ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
4176 add(t2, 1, t2);
4177 stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
4178 }
4179 ba_short(try_eden);
4180
4181 bind(discard_tlab);
4182 if (TLABStats) {
4183 // increment number of refills
4184 ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
4185 add(t2, 1, t2);
4186 stw(t2, G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()));
4187 // accumulate wastage
4188 ld(G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()), t2);
4189 add(t2, t1, t2);
4190 stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
4191 }
4192
4193 // if tlab is currently allocated (top or end != null) then
4194 // fill [top, end + alignment_reserve) with array object
4195 br_null_short(top, Assembler::pn, do_refill);
4196
4197 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
4198 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
4199 // set klass to intArrayKlass
4200 sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
4201 add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
4202 sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
4203 st(t1, top, arrayOopDesc::length_offset_in_bytes());
4204 set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
4205 ld_ptr(t2, 0, t2);
4206 // store klass last. concurrent gcs assumes klass length is valid if
4207 // klass field is not null.
4208 store_klass(t2, top);
4209 verify_oop(top);
4210
4211 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t1);
4212 sub(top, t1, t1); // size of tlab's allocated portion
4213 incr_allocated_bytes(t1, t2, t3);
4214
4215 // refill the tlab with an eden allocation
4216 bind(do_refill);
4217 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
4218 sll_ptr(t1, LogHeapWordSize, t1);
4219 // allocate new tlab, address returned in top
4220 eden_allocate(top, t1, 0, t2, t3, slow_case);
4221
4222 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset()));
4223 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
4224 #ifdef ASSERT
4225 // check that tlab_size (t1) is still valid
4226 {
4227 Label ok;
4228 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
4229 sll_ptr(t2, LogHeapWordSize, t2);
4230 cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
4231 stop("assert(t1 == tlab_size)");
4232 should_not_reach_here();
4233
4234 bind(ok);
4235 }
4236 #endif // ASSERT
4237 add(top, t1, top); // t1 is tlab_size
4238 sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
4239 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
4240 verify_tlab();
4241 ba_short(retry);
4242 }
4243
4244 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
4245 Register t1, Register t2) {
4246 // Bump total bytes allocated by this thread
4247 assert(t1->is_global(), "must be global reg"); // so all 64 bits are saved on a context switch
4248 assert_different_registers(size_in_bytes.register_or_noreg(), t1, t2);
4249 // v8 support has gone the way of the dodo
4250 ldx(G2_thread, in_bytes(JavaThread::allocated_bytes_offset()), t1);
4251 add(t1, ensure_simm13_or_reg(size_in_bytes, t2), t1);
4252 stx(t1, G2_thread, in_bytes(JavaThread::allocated_bytes_offset()));
4253 }
4254
4255 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
4256 switch (cond) {
4257 // Note some conditions are synonyms for others
4258 case Assembler::never: return Assembler::always;
4259 case Assembler::zero: return Assembler::notZero;
4260 case Assembler::lessEqual: return Assembler::greater;
4261 case Assembler::less: return Assembler::greaterEqual;
4372 Label refill, restart;
4373 if (with_frame) {
4374 __ save_frame(0);
4375 pre_val = I0; // Was O0 before the save.
4376 } else {
4377 pre_val = O0;
4378 }
4379 int satb_q_index_byte_offset =
4380 in_bytes(JavaThread::satb_mark_queue_offset() +
4381 PtrQueue::byte_offset_of_index());
4382 int satb_q_buf_byte_offset =
4383 in_bytes(JavaThread::satb_mark_queue_offset() +
4384 PtrQueue::byte_offset_of_buf());
4385 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
4386 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
4387 "check sizes in assembly below");
4388
4389 __ bind(restart);
4390 __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
4391
4392 __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
4393 // If the branch is taken, no harm in executing this in the delay slot.
4394 __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
4395 __ sub(L0, oopSize, L0);
4396
4397 __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0
4398 if (!with_frame) {
4399 // Use return-from-leaf
4400 __ retl();
4401 __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4402 } else {
4403 // Not delayed.
4404 __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4405 }
4406 if (with_frame) {
4407 __ ret();
4408 __ delayed()->restore();
4409 }
4410 __ bind(refill);
4411
4412 address handle_zero =
4487 assert(pre_val == noreg, "check this code");
4488 }
4489
4490 // Is marking active?
4491 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
4492 ld(G2,
4493 in_bytes(JavaThread::satb_mark_queue_offset() +
4494 PtrQueue::byte_offset_of_active()),
4495 tmp);
4496 } else {
4497 guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
4498 "Assumption");
4499 ldsb(G2,
4500 in_bytes(JavaThread::satb_mark_queue_offset() +
4501 PtrQueue::byte_offset_of_active()),
4502 tmp);
4503 }
4504
4505 // Check on whether to annul.
4506 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
4507 delayed()->nop();
4508
4509 // Do we need to load the previous value?
4510 if (obj != noreg) {
4511 // Load the previous value...
4512 if (index == noreg) {
4513 if (Assembler::is_simm13(offset)) {
4514 load_heap_oop(obj, offset, tmp);
4515 } else {
4516 set(offset, tmp);
4517 load_heap_oop(obj, tmp, tmp);
4518 }
4519 } else {
4520 load_heap_oop(obj, index, tmp);
4521 }
4522 // Previous value has been loaded into tmp
4523 pre_val = tmp;
4524 }
4525
4526 assert(pre_val != noreg, "must have a real register");
4527
4528 // Is the previous value null?
4529 // Check on whether to annul.
4530 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
4531 delayed()->nop();
4532
4533 // OK, it's not filtered, so we'll need to call enqueue. In the normal
4534 // case, pre_val will be a scratch G-reg, but there are some cases in
4535 // which it's an O-reg. In the first case, do a normal call. In the
4536 // latter, do a save here and call the frameless version.
4537
4538 guarantee(pre_val->is_global() || pre_val->is_out(),
4539 "Or we need to think harder.");
4540
4541 if (pre_val->is_global() && !preserve_o_regs) {
4542 generate_satb_log_enqueue_if_necessary(true); // with frame
4543
4544 call(satb_log_enqueue_with_frame);
4545 delayed()->mov(pre_val, O0);
4546 } else {
4547 generate_satb_log_enqueue_if_necessary(false); // frameless
4548
4549 save_frame(0);
4550 call(satb_log_enqueue_frameless);
4551 delayed()->mov(pre_val->after_save(), O0);
|