src/cpu/sparc/vm/assembler_sparc.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 7063628_1 Cdiff src/cpu/sparc/vm/assembler_sparc.cpp

src/cpu/sparc/vm/assembler_sparc.cpp

Print this page

        

*** 104,114 **** case fbp_op2: s = "fbp"; break; case br_op2: s = "br"; break; case bp_op2: s = "bp"; break; case cb_op2: s = "cb"; break; case bpr_op2: { ! if (is_cbc(inst)) { s = is_cxb(inst) ? "cxb" : "cwb"; } else { s = "bpr"; } break; --- 104,114 ---- case fbp_op2: s = "fbp"; break; case br_op2: s = "br"; break; case bp_op2: s = "bp"; break; case cb_op2: s = "cb"; break; case bpr_op2: { ! if (is_cbcond(inst)) { s = is_cxb(inst) ? "cxb" : "cwb"; } else { s = "bpr"; } break;
*** 138,148 **** case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; case bpr_op2: { ! if (is_cbc(inst)) { m = wdisp10(word_aligned_ones, 0); v = wdisp10(dest_pos, inst_pos); } else { m = wdisp16(word_aligned_ones, 0); v = wdisp16(dest_pos, inst_pos); --- 138,148 ---- case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; case bpr_op2: { ! if (is_cbcond(inst)) { m = wdisp10(word_aligned_ones, 0); v = wdisp10(dest_pos, inst_pos); } else { m = wdisp16(word_aligned_ones, 0); v = wdisp16(dest_pos, inst_pos);
*** 169,179 **** case bp_op2: r = inv_wdisp( inst, pos, 19); break; case fb_op2: r = inv_wdisp( inst, pos, 22); break; case br_op2: r = inv_wdisp( inst, pos, 22); break; case cb_op2: r = inv_wdisp( inst, pos, 22); break; case bpr_op2: { ! if (is_cbc(inst)) { r = inv_wdisp10(inst, pos); } else { r = inv_wdisp16(inst, pos); } break; --- 169,179 ---- case bp_op2: r = inv_wdisp( inst, pos, 19); break; case fb_op2: r = inv_wdisp( inst, pos, 22); break; case br_op2: r = inv_wdisp( inst, pos, 22); break; case cb_op2: r = inv_wdisp( inst, pos, 22); break; case bpr_op2: { ! if (is_cbcond(inst)) { r = inv_wdisp10(inst, pos); } else { r = inv_wdisp16(inst, pos); } break;
*** 989,999 **** #ifdef ASSERT // Verify that flags was zeroed on return to Java Label PcOk; save_frame(0); // to avoid clobbering O0 ld_ptr(pc_addr, L0); ! br_null(L0, false, Assembler::pt, PcOk); stop("last_Java_pc not zeroed before leaving Java"); bind(PcOk); // Verify that flags was zeroed on return to Java Label FlagsOk; --- 989,999 ---- #ifdef ASSERT // Verify that flags was zeroed on return to Java Label PcOk; save_frame(0); // to avoid clobbering O0 ld_ptr(pc_addr, L0); ! br_null_short(L0, Assembler::pt, PcOk); stop("last_Java_pc not zeroed before leaving Java"); bind(PcOk); // Verify that flags was zeroed on return to Java Label FlagsOk;
*** 1114,1124 **** check_and_handle_popframe(scratch_reg); check_and_handle_earlyret(scratch_reg); Address exception_addr(G2_thread, Thread::pending_exception_offset()); ld_ptr(exception_addr, scratch_reg); ! br_null(scratch_reg,false,pt,L); // we use O7 linkage so that forward_exception_entry has the issuing PC call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); delayed()->nop(); bind(L); } --- 1114,1124 ---- check_and_handle_popframe(scratch_reg); check_and_handle_earlyret(scratch_reg); Address exception_addr(G2_thread, Thread::pending_exception_offset()); ld_ptr(exception_addr, scratch_reg); ! br_null_short(scratch_reg, pt, L); // we use O7 linkage so that forward_exception_entry has the issuing PC call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); delayed()->nop(); bind(L); }
*** 1888,1902 **** set(Universe::verify_oop_mask (), O2_mask); set(Universe::verify_oop_bits (), O3_bits); // assert((obj & oop_mask) == oop_bits); and3(O0_obj, O2_mask, O4_temp); ! cmp_and_brx(O4_temp, O3_bits, notEqual, false, pn, null_or_fail); if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) { // the null_or_fail case is useless; must test for null separately ! br_null(O0_obj, false, pn, succeed); } // Check the klassOop of this object for being in the right area of memory. // Cannot do the load in the delay above slot in case O0 is null load_klass(O0_obj, O0_obj); --- 1888,1902 ---- set(Universe::verify_oop_mask (), O2_mask); set(Universe::verify_oop_bits (), O3_bits); // assert((obj & oop_mask) == oop_bits); and3(O0_obj, O2_mask, O4_temp); ! cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail); if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) { // the null_or_fail case is useless; must test for null separately ! br_null_short(O0_obj, pn, succeed); } // Check the klassOop of this object for being in the right area of memory. // Cannot do the load in the delay above slot in case O0 is null load_klass(O0_obj, O0_obj);
*** 1904,1914 **** if( Universe::verify_klass_mask() != Universe::verify_oop_mask() ) set(Universe::verify_klass_mask(), O2_mask); if( Universe::verify_klass_bits() != Universe::verify_oop_bits() ) set(Universe::verify_klass_bits(), O3_bits); and3(O0_obj, O2_mask, O4_temp); ! cmp_and_brx(O4_temp, O3_bits, notEqual, false, pn, fail); // Check the klass's klass load_klass(O0_obj, O0_obj); and3(O0_obj, O2_mask, O4_temp); cmp(O4_temp, O3_bits); brx(notEqual, false, pn, fail); --- 1904,1914 ---- if( Universe::verify_klass_mask() != Universe::verify_oop_mask() ) set(Universe::verify_klass_mask(), O2_mask); if( Universe::verify_klass_bits() != Universe::verify_oop_bits() ) set(Universe::verify_klass_bits(), O3_bits); and3(O0_obj, O2_mask, O4_temp); ! cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, fail); // Check the klass's klass load_klass(O0_obj, O0_obj); and3(O0_obj, O2_mask, O4_temp); cmp(O4_temp, O3_bits); brx(notEqual, false, pn, fail);
*** 2132,2150 **** ShouldNotReachHere(); return Assembler::rc_z; } // compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS ! void MacroAssembler::br_zero(Register s1, Label& L) { ! assert_not_delayed(); ! if (use_cbc(L)) { ! Assembler::cbc(zero, icc, s1, 0, L); ! } else { tst(s1); ! br (zero, false, pt, L); ! delayed()->nop(); ! } } // Compares a pointer register with zero and branches on null. // Does a test & branch on 32-bit systems and a register-branch on 64-bit. void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L, bool emit_delayed_nop ) { --- 2132,2144 ---- ShouldNotReachHere(); return Assembler::rc_z; } // compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS ! void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) { tst(s1); ! br (c, a, p, L); } // Compares a pointer register with zero and branches on null. // Does a test & branch on 32-bit systems and a register-branch on 64-bit. void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L, bool emit_delayed_nop ) {
*** 2226,2272 **** br(c, a, p, L); delayed()->nop(); } } ! void MacroAssembler::cmp_and_br(Register s1, int simm13a, Condition c, ! bool a, Predict p, Label& L) { assert_not_delayed(); ! if (is_simm(simm13a,5) && use_cbc(L)) { ! Assembler::cbc(c, icc, s1, simm13a, L); } else { cmp(s1, simm13a); ! br(c, a, p, L); delayed()->nop(); } } // Branch that tests xcc in LP64 and icc in !LP64 ! void MacroAssembler::cmp_and_brx(Register s1, Register s2, Condition c, ! bool a, Predict p, Label& L) { assert_not_delayed(); ! if (use_cbc(L)) { ! Assembler::cbc(c, ptr_cc, s1, s2, L); } else { cmp(s1, s2); ! brx(c, a, p, L); delayed()->nop(); } } ! void MacroAssembler::cmp_and_brx(Register s1, int simm13a, Condition c, ! bool a, Predict p, Label& L) { assert_not_delayed(); ! if (is_simm(simm13a,5) && use_cbc(L)) { ! Assembler::cbc(c, ptr_cc, s1, simm13a, L); } else { cmp(s1, simm13a); ! brx(c, a, p, L); delayed()->nop(); } } // instruction sequences factored across compiler & interpreter void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low, Register Rb_hi, Register Rb_low, --- 2220,2313 ---- br(c, a, p, L); delayed()->nop(); } } ! // Compare integer (32 bit) values (icc only). ! void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c, ! Predict p, Label& L) { assert_not_delayed(); ! if (use_cbcond(L)) { ! Assembler::cbcond(c, icc, s1, s2, L); } else { + cmp(s1, s2); + br(c, false, p, L); + delayed()->nop(); + } + } + + // Compare integer (32 bit) values (icc only). + void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (is_simm(simm13a,5) && use_cbcond(L)) { + Assembler::cbcond(c, icc, s1, simm13a, L); + } else { cmp(s1, simm13a); ! br(c, false, p, L); delayed()->nop(); } } // Branch that tests xcc in LP64 and icc in !LP64 ! void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c, ! Predict p, Label& L) { assert_not_delayed(); ! if (use_cbcond(L)) { ! Assembler::cbcond(c, ptr_cc, s1, s2, L); } else { cmp(s1, s2); ! brx(c, false, p, L); delayed()->nop(); } } ! // Branch that tests xcc in LP64 and icc in !LP64 ! void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c, ! Predict p, Label& L) { assert_not_delayed(); ! if (is_simm(simm13a,5) && use_cbcond(L)) { ! Assembler::cbcond(c, ptr_cc, s1, simm13a, L); } else { cmp(s1, simm13a); ! brx(c, false, p, L); delayed()->nop(); } } + // Short branch version for compares a pointer with zero. + + void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(zero, ptr_cc, s1, 0, L); + return; + } + br_null(s1, false, p, L); + delayed()->nop(); + } + + void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(notZero, ptr_cc, s1, 0, L); + return; + } + br_notnull(s1, false, p, L); + delayed()->nop(); + } + + // Unconditional short branch + void MacroAssembler::ba_short(Label& L) { + if (use_cbcond(L)) { + Assembler::cbcond(equal, icc, G0, G0, L); + return; + } + br(always, false, pt, L); + delayed()->nop(); + } + // instruction sequences factored across compiler & interpreter void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low, Register Rb_hi, Register Rb_low,
*** 2288,2298 **** // And the equals case for the high part does not need testing, // since that triplet is reached only after finding the high halves differ. if (VM_Version::v9_instructions_work()) { mov(-1, Rresult); ! ba(done, false); delayed()-> movcc(greater, false, icc, 1, Rresult); } else { br(less, true, pt, done); delayed()-> set(-1, Rresult); br(greater, true, pt, done); delayed()-> set( 1, Rresult); } --- 2329,2339 ---- // And the equals case for the high part does not need testing, // since that triplet is reached only after finding the high halves differ. if (VM_Version::v9_instructions_work()) { mov(-1, Rresult); ! ba(done); delayed()-> movcc(greater, false, icc, 1, Rresult); } else { br(less, true, pt, done); delayed()-> set(-1, Rresult); br(greater, true, pt, done); delayed()-> set( 1, Rresult); }
*** 2363,2373 **** sll(Rin_high, Rcount, Rout_high); if (Rcount == Rout_low) { sll(Rin_low, Rcount, Rout_low); // low half } srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more ! ba(done, false); delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low // shift >= 32 bits, Ralt_count = Rcount-32 bind(big_shift); sll(Rin_low, Ralt_count, Rout_high ); --- 2404,2414 ---- sll(Rin_high, Rcount, Rout_high); if (Rcount == Rout_low) { sll(Rin_low, Rcount, Rout_low); // low half } srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more ! ba(done); delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low // shift >= 32 bits, Ralt_count = Rcount-32 bind(big_shift); sll(Rin_low, Ralt_count, Rout_high );
*** 2424,2434 **** sra(Rin_high, Rcount, Rout_high ); // high half sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more if (Rcount == Rout_low) { srl(Rin_low, Rcount, Rout_low); } ! ba(done, false); delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high // shift >= 32 bits, Ralt_count = Rcount-32 bind(big_shift); --- 2465,2475 ---- sra(Rin_high, Rcount, Rout_high ); // high half sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more if (Rcount == Rout_low) { srl(Rin_low, Rcount, Rout_low); } ! ba(done); delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high // shift >= 32 bits, Ralt_count = Rcount-32 bind(big_shift);
*** 2487,2497 **** srl(Rin_high, Rcount, Rout_high ); // high half sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more if (Rcount == Rout_low) { srl(Rin_low, Rcount, Rout_low); } ! ba(done, false); delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high // shift >= 32 bits, Ralt_count = Rcount-32 bind(big_shift); --- 2528,2538 ---- srl(Rin_high, Rcount, Rout_high ); // high half sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more if (Rcount == Rout_low) { srl(Rin_low, Rcount, Rout_low); } ! ba(done); delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high // shift >= 32 bits, Ralt_count = Rcount-32 bind(big_shift);
*** 2751,2761 **** mov(G0,yield_reg); mov(G0, yieldall_reg); set(StubRoutines::Sparc::locked, lock_reg); bind(retry_get_lock); ! cmp_and_br(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, false, Assembler::pt, dont_yield); if(use_call_vm) { Untested("Need to verify global reg consistancy"); call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg); } else { --- 2792,2802 ---- mov(G0,yield_reg); mov(G0, yieldall_reg); set(StubRoutines::Sparc::locked, lock_reg); bind(retry_get_lock); ! cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield); if(use_call_vm) { Untested("Need to verify global reg consistancy"); call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg); } else {
*** 2781,2791 **** br(Assembler::notEqual, true, Assembler::pn, retry_get_lock); delayed()->add(yield_reg,1,yield_reg); // yes, got lock. do we have the same top? ld(top_ptr_reg_after_save, 0, value_reg); ! cmp_and_br(value_reg, top_reg_after_save, Assembler::notEqual, false, Assembler::pt, not_same); // yes, same top. st(ptr_reg_after_save, top_ptr_reg_after_save, 0); membar(Assembler::StoreStore); --- 2822,2832 ---- br(Assembler::notEqual, true, Assembler::pn, retry_get_lock); delayed()->add(yield_reg,1,yield_reg); // yes, got lock. do we have the same top? ld(top_ptr_reg_after_save, 0, value_reg); ! cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same); // yes, same top. st(ptr_reg_after_save, top_ptr_reg_after_save, 0); membar(Assembler::StoreStore);
*** 3031,3041 **** L2, L3, L4, L5, NULL, &L_pop_to_failure); // on success: restore(); ! ba(L_success); // on failure: bind(L_pop_to_failure); restore(); bind(L_failure); --- 3072,3082 ---- L2, L3, L4, L5, NULL, &L_pop_to_failure); // on success: restore(); ! ba_short(L_success); // on failure: bind(L_pop_to_failure); restore(); bind(L_failure);
*** 3110,3121 **** // Otherwise, it's the slow path for us (no success at this point). // Hacked ba(), which may only be used just before L_fallthrough. #define FINAL_JUMP(label) \ if (&(label) != &L_fallthrough) { \ ! ba(label, false); \ ! delayed()->nop(); \ } if (super_check_offset.is_register()) { brx(Assembler::equal, false, Assembler::pn, *L_success); delayed()->cmp(super_check_offset.as_register(), sc_offset); --- 3151,3161 ---- // Otherwise, it's the slow path for us (no success at this point). // Hacked ba(), which may only be used just before L_fallthrough. #define FINAL_JUMP(label) \ if (&(label) != &L_fallthrough) { \ ! ba(label); delayed()->nop(); \ } if (super_check_offset.is_register()) { brx(Assembler::equal, false, Assembler::pn, *L_success); delayed()->cmp(super_check_offset.as_register(), sc_offset);
*** 3243,3253 **** // Success. Cache the super we found and proceed in triumph. st_ptr(super_klass, sub_klass, sc_offset); if (L_success != &L_fallthrough) { ! ba(*L_success, false); delayed()->nop(); } bind(L_fallthrough); } --- 3283,3293 ---- // Success. Cache the super we found and proceed in triumph. st_ptr(super_klass, sub_klass, sc_offset); if (L_success != &L_fallthrough) { ! ba(*L_success); delayed()->nop(); } bind(L_fallthrough); }
*** 3258,3268 **** Label& wrong_method_type) { assert_different_registers(mtype_reg, mh_reg, temp_reg); // compare method type against that of the receiver RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg); load_heap_oop(mh_reg, mhtype_offset, temp_reg); ! cmp_and_brx(temp_reg, mtype_reg, Assembler::notEqual, false, Assembler::pn, wrong_method_type); } // A method handle has a "vmslots" field which gives the size of its // argument list in JVM stack slots. This field is either located directly --- 3298,3308 ---- Label& wrong_method_type) { assert_different_registers(mtype_reg, mh_reg, temp_reg); // compare method type against that of the receiver RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg); load_heap_oop(mh_reg, mhtype_offset, temp_reg); ! cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type); } // A method handle has a "vmslots" field which gives the size of its // argument list in JVM stack slots. This field is either located directly
*** 3351,3361 **** // whether the epoch is still valid // Note that the runtime guarantees sufficient alignment of JavaThread // pointers to allow age to be placed into low bits assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); ! cmp_and_brx(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, false, Assembler::pn, cas_label); load_klass(obj_reg, temp_reg); ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); or3(G2_thread, temp_reg, temp_reg); xor3(mark_reg, temp_reg, temp_reg); --- 3391,3401 ---- // whether the epoch is still valid // Note that the runtime guarantees sufficient alignment of JavaThread // pointers to allow age to be placed into low bits assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); ! cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label); load_klass(obj_reg, temp_reg); ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); or3(G2_thread, temp_reg, temp_reg); xor3(mark_reg, temp_reg, temp_reg);
*** 3418,3428 **** } if (slow_case != NULL) { brx(Assembler::notEqual, true, Assembler::pn, *slow_case); delayed()->nop(); } ! ba(done); bind(try_rebias); // At this point we know the epoch has expired, meaning that the // current "bias owner", if any, is actually invalid. Under these // circumstances _only_, we are allowed to use the current header's --- 3458,3468 ---- } if (slow_case != NULL) { brx(Assembler::notEqual, true, Assembler::pn, *slow_case); delayed()->nop(); } ! ba_short(done); bind(try_rebias); // At this point we know the epoch has expired, meaning that the // current "bias owner", if any, is actually invalid. Under these // circumstances _only_, we are allowed to use the current header's
*** 3446,3456 **** } if (slow_case != NULL) { brx(Assembler::notEqual, true, Assembler::pn, *slow_case); delayed()->nop(); } ! ba(done); bind(try_revoke_bias); // The prototype mark in the klass doesn't have the bias bit set any // more, indicating that objects of this data type are not supposed // to be biased any more. We are going to try to reset the mark of --- 3486,3496 ---- } if (slow_case != NULL) { brx(Assembler::notEqual, true, Assembler::pn, *slow_case); delayed()->nop(); } ! ba_short(done); bind(try_revoke_bias); // The prototype mark in the klass doesn't have the bias bit set any // more, indicating that objects of this data type are not supposed // to be biased any more. We are going to try to reset the mark of
*** 3497,3507 **** // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by // Solaris/SPARC's "as". Another apt name would be cas_ptr() void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) { ! casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()) ; } // compiler_lock_object() and compiler_unlock_object() are direct transliterations --- 3537,3547 ---- // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by // Solaris/SPARC's "as". Another apt name would be cas_ptr() void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) { ! casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); } // compiler_lock_object() and compiler_unlock_object() are direct transliterations
*** 3538,3550 **** if (counters != NULL) { inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch); } if (EmitSync & 1) { ! mov (3, Rscratch) ; ! st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); ! cmp (SP, G0) ; return ; } if (EmitSync & 2) { --- 3578,3590 ---- if (counters != NULL) { inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch); } if (EmitSync & 1) { ! mov(3, Rscratch); ! st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); ! cmp(SP, G0); return ; } if (EmitSync & 2) {
*** 3581,3600 **** // we did not find an unlocked object so see if this is a recursive case // sub(Rscratch, SP, Rscratch); assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); andcc(Rscratch, 0xfffff003, Rscratch); st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); ! bind (done) ; return ; } Label Egress ; if (EmitSync & 256) { Label IsInflated ; ! ld_ptr (mark_addr, Rmark); // fetch obj->mark // Triage: biased, stack-locked, neutral, inflated if (try_bias) { biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); // Invariant: if control reaches this point in the emitted stream // then Rmark has not been modified. --- 3621,3640 ---- // we did not find an unlocked object so see if this is a recursive case // sub(Rscratch, SP, Rscratch); assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); andcc(Rscratch, 0xfffff003, Rscratch); st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); ! bind (done); return ; } Label Egress ; if (EmitSync & 256) { Label IsInflated ; ! ld_ptr(mark_addr, Rmark); // fetch obj->mark // Triage: biased, stack-locked, neutral, inflated if (try_bias) { biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); // Invariant: if control reaches this point in the emitted stream // then Rmark has not been modified.
*** 3601,3698 **** } // Store mark into displaced mark field in the on-stack basic-lock "box" // Critically, this must happen before the CAS // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty. ! st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); ! andcc (Rmark, 2, G0) ; ! brx (Assembler::notZero, false, Assembler::pn, IsInflated) ; ! delayed() -> // Try stack-lock acquisition. // Beware: the 1st instruction is in a delay slot ! mov (Rbox, Rscratch); ! or3 (Rmark, markOopDesc::unlocked_value, Rmark); ! assert (mark_addr.disp() == 0, "cas must take a zero displacement"); ! casn (mark_addr.base(), Rmark, Rscratch) ; ! cmp (Rmark, Rscratch); ! brx (Assembler::equal, false, Assembler::pt, done); delayed()->sub(Rscratch, SP, Rscratch); // Stack-lock attempt failed - check for recursive stack-lock. // See the comments below about how we might remove this case. #ifdef _LP64 ! sub (Rscratch, STACK_BIAS, Rscratch); #endif assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); ! andcc (Rscratch, 0xfffff003, Rscratch); ! br (Assembler::always, false, Assembler::pt, done) ; ! delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); ! bind (IsInflated) ; if (EmitSync & 64) { // If m->owner != null goto IsLocked // Pessimistic form: Test-and-CAS vs CAS // The optimistic form avoids RTS->RTO cache line upgrades. ! ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); ! andcc (Rscratch, Rscratch, G0) ; ! brx (Assembler::notZero, false, Assembler::pn, done) ; ! delayed()->nop() ; // m->owner == null : it's unlocked. } // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. ! add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; ! mov (G2_thread, Rscratch) ; ! casn (Rmark, G0, Rscratch) ; ! cmp (Rscratch, G0) ; // Intentional fall-through into done } else { // Aggressively avoid the Store-before-CAS penalty // Defer the store into box->dhw until after the CAS Label IsInflated, Recursive ; // Anticipate CAS -- Avoid RTS->RTO upgrade ! // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; ! ld_ptr (mark_addr, Rmark); // fetch obj->mark // Triage: biased, stack-locked, neutral, inflated if (try_bias) { biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); // Invariant: if control reaches this point in the emitted stream // then Rmark has not been modified. } ! andcc (Rmark, 2, G0) ; ! brx (Assembler::notZero, false, Assembler::pn, IsInflated) ; delayed()-> // Beware - dangling delay-slot // Try stack-lock acquisition. // Transiently install BUSY (0) encoding in the mark word. // if the CAS of 0 into the mark was successful then we execute: // ST box->dhw = mark -- save fetched mark in on-stack basiclock box // ST obj->mark = box -- overwrite transient 0 value // This presumes TSO, of course. ! mov (0, Rscratch) ; ! or3 (Rmark, markOopDesc::unlocked_value, Rmark); ! assert (mark_addr.disp() == 0, "cas must take a zero displacement"); ! casn (mark_addr.base(), Rmark, Rscratch) ; ! // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; ! cmp (Rscratch, Rmark) ; ! brx (Assembler::notZero, false, Assembler::pn, Recursive) ; ! delayed() -> ! st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); if (counters != NULL) { cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); } ! br (Assembler::always, false, Assembler::pt, done); ! delayed() -> ! st_ptr (Rbox, mark_addr) ; ! bind (Recursive) ; // Stack-lock attempt failed - check for recursive stack-lock. // Tests show that we can remove the recursive case with no impact // on refworkload 0.83. If we need to reduce the size of the code // emitted by compiler_lock_object() the recursive case is perfect // candidate. --- 3641,3736 ---- } // Store mark into displaced mark field in the on-stack basic-lock "box" // Critically, this must happen before the CAS // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty. ! st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); ! andcc(Rmark, 2, G0); ! brx(Assembler::notZero, false, Assembler::pn, IsInflated); ! delayed()-> // Try stack-lock acquisition. // Beware: the 1st instruction is in a delay slot ! mov(Rbox, Rscratch); ! or3(Rmark, markOopDesc::unlocked_value, Rmark); ! assert(mark_addr.disp() == 0, "cas must take a zero displacement"); ! casn(mark_addr.base(), Rmark, Rscratch); ! cmp(Rmark, Rscratch); ! brx(Assembler::equal, false, Assembler::pt, done); delayed()->sub(Rscratch, SP, Rscratch); // Stack-lock attempt failed - check for recursive stack-lock. // See the comments below about how we might remove this case. #ifdef _LP64 ! sub(Rscratch, STACK_BIAS, Rscratch); #endif assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); ! andcc(Rscratch, 0xfffff003, Rscratch); ! br(Assembler::always, false, Assembler::pt, done); ! delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); ! bind(IsInflated); if (EmitSync & 64) { // If m->owner != null goto IsLocked // Pessimistic form: Test-and-CAS vs CAS // The optimistic form avoids RTS->RTO cache line upgrades. ! ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); ! andcc(Rscratch, Rscratch, G0); ! brx(Assembler::notZero, false, Assembler::pn, done); ! delayed()->nop(); // m->owner == null : it's unlocked. } // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. ! add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); ! mov(G2_thread, Rscratch); ! casn(Rmark, G0, Rscratch); ! cmp(Rscratch, G0); // Intentional fall-through into done } else { // Aggressively avoid the Store-before-CAS penalty // Defer the store into box->dhw until after the CAS Label IsInflated, Recursive ; // Anticipate CAS -- Avoid RTS->RTO upgrade ! // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); ! ld_ptr(mark_addr, Rmark); // fetch obj->mark // Triage: biased, stack-locked, neutral, inflated if (try_bias) { biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); // Invariant: if control reaches this point in the emitted stream // then Rmark has not been modified. } ! andcc(Rmark, 2, G0); ! brx(Assembler::notZero, false, Assembler::pn, IsInflated); delayed()-> // Beware - dangling delay-slot // Try stack-lock acquisition. // Transiently install BUSY (0) encoding in the mark word. // if the CAS of 0 into the mark was successful then we execute: // ST box->dhw = mark -- save fetched mark in on-stack basiclock box // ST obj->mark = box -- overwrite transient 0 value // This presumes TSO, of course. ! mov(0, Rscratch); ! or3(Rmark, markOopDesc::unlocked_value, Rmark); ! assert(mark_addr.disp() == 0, "cas must take a zero displacement"); ! casn(mark_addr.base(), Rmark, Rscratch); ! // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); ! cmp(Rscratch, Rmark); ! brx(Assembler::notZero, false, Assembler::pn, Recursive); ! delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); if (counters != NULL) { cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); } ! ba(done); ! delayed()->st_ptr(Rbox, mark_addr); ! bind(Recursive); // Stack-lock attempt failed - check for recursive stack-lock. // Tests show that we can remove the recursive case with no impact // on refworkload 0.83. If we need to reduce the size of the code // emitted by compiler_lock_object() the recursive case is perfect // candidate.
*** 3705,3757 **** // the fast-path stack-lock code from the interpreter and always passed // control to the "slow" operators in synchronizer.cpp. // RScratch contains the fetched obj->mark value from the failed CASN. #ifdef _LP64 ! sub (Rscratch, STACK_BIAS, Rscratch); #endif sub(Rscratch, SP, Rscratch); assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); ! andcc (Rscratch, 0xfffff003, Rscratch); if (counters != NULL) { // Accounting needs the Rscratch register ! st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); ! br (Assembler::always, false, Assembler::pt, done) ; ! delayed()->nop() ; } else { ! br (Assembler::always, false, Assembler::pt, done) ; ! delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); } ! bind (IsInflated) ; if (EmitSync & 64) { // If m->owner != null goto IsLocked // Test-and-CAS vs CAS // Pessimistic form avoids futile (doomed) CAS attempts // The optimistic form avoids RTS->RTO cache line upgrades. ! ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); ! andcc (Rscratch, Rscratch, G0) ; ! brx (Assembler::notZero, false, Assembler::pn, done) ; ! delayed()->nop() ; // m->owner == null : it's unlocked. } // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. ! add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; ! mov (G2_thread, Rscratch) ; ! casn (Rmark, G0, Rscratch) ; ! cmp (Rscratch, G0) ; // ST box->displaced_header = NonZero. // Any non-zero value suffices: // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. ! st_ptr (Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); // Intentional fall-through into done } ! bind (done) ; } void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch, bool try_bias) { --- 3743,3794 ---- // the fast-path stack-lock code from the interpreter and always passed // control to the "slow" operators in synchronizer.cpp. // RScratch contains the fetched obj->mark value from the failed CASN. #ifdef _LP64 ! sub(Rscratch, STACK_BIAS, Rscratch); #endif sub(Rscratch, SP, Rscratch); assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); ! andcc(Rscratch, 0xfffff003, Rscratch); if (counters != NULL) { // Accounting needs the Rscratch register ! st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); ! ba_short(done); } else { ! ba(done); ! delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); } ! bind (IsInflated); if (EmitSync & 64) { // If m->owner != null goto IsLocked // Test-and-CAS vs CAS // Pessimistic form avoids futile (doomed) CAS attempts // The optimistic form avoids RTS->RTO cache line upgrades. ! ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); ! andcc(Rscratch, Rscratch, G0); ! brx(Assembler::notZero, false, Assembler::pn, done); ! delayed()->nop(); // m->owner == null : it's unlocked. } // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. ! add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); ! mov(G2_thread, Rscratch); ! casn(Rmark, G0, Rscratch); ! cmp(Rscratch, G0); // ST box->displaced_header = NonZero. // Any non-zero value suffices: // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. ! st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); // Intentional fall-through into done } ! bind (done); } void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch, bool try_bias) {
*** 3758,3768 **** Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); Label done ; if (EmitSync & 4) { ! cmp (SP, G0) ; return ; } if (EmitSync & 8) { if (try_bias) { --- 3795,3805 ---- Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); Label done ; if (EmitSync & 4) { ! cmp(SP, G0); return ; } if (EmitSync & 8) { if (try_bias) {
*** 3769,3788 **** biased_locking_exit(mark_addr, Rscratch, done); } // Test first if it is a fast recursive unlock ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); ! br_null(Rmark, false, Assembler::pt, done); // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markOop of the object assert(mark_addr.disp() == 0, "cas must take a zero displacement"); casx_under_lock(mark_addr.base(), Rbox, Rmark, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); ! ba(done, false); delayed()->cmp(Rbox, Rmark); ! bind (done) ; return ; } // Beware ... If the aggregate size of the code emitted by CLO and CUO is // is too large performance rolls abruptly off a cliff. --- 3806,3825 ---- biased_locking_exit(mark_addr, Rscratch, done); } // Test first if it is a fast recursive unlock ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); ! br_null_short(Rmark, Assembler::pt, done); // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markOop of the object assert(mark_addr.disp() == 0, "cas must take a zero displacement"); casx_under_lock(mark_addr.base(), Rbox, Rmark, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); ! ba(done); delayed()->cmp(Rbox, Rmark); ! bind(done); return ; } // Beware ... If the aggregate size of the code emitted by CLO and CUO is // is too large performance rolls abruptly off a cliff.
*** 3793,3810 **** if (try_bias) { // TODO: eliminate redundant LDs of obj->mark biased_locking_exit(mark_addr, Rscratch, done); } ! ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ; ! ld_ptr (Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); ! andcc (Rscratch, Rscratch, G0); ! brx (Assembler::zero, false, Assembler::pn, done); ! delayed()-> nop() ; // consider: relocate fetch of mark, above, into this DS ! andcc (Rmark, 2, G0) ; ! brx (Assembler::zero, false, Assembler::pt, LStacked) ; ! delayed()-> nop() ; // It's inflated // Conceptually we need a #loadstore|#storestore "release" MEMBAR before // the ST of 0 into _owner which releases the lock. This prevents loads // and stores within the critical section from reordering (floating) --- 3830,3847 ---- if (try_bias) { // TODO: eliminate redundant LDs of obj->mark biased_locking_exit(mark_addr, Rscratch, done); } ! ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark); ! ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); ! andcc(Rscratch, Rscratch, G0); ! brx(Assembler::zero, false, Assembler::pn, done); ! delayed()->nop(); // consider: relocate fetch of mark, above, into this DS ! andcc(Rmark, 2, G0); ! brx(Assembler::zero, false, Assembler::pt, LStacked); ! delayed()->nop(); // It's inflated // Conceptually we need a #loadstore|#storestore "release" MEMBAR before // the ST of 0 into _owner which releases the lock. This prevents loads // and stores within the critical section from reordering (floating)
*** 3811,3861 **** // past the store that releases the lock. But TSO is a strong memory model // and that particular flavor of barrier is a noop, so we can safely elide it. // Note that we use 1-0 locking by default for the inflated case. We // close the resultant (and rare) race by having contented threads in // monitorenter periodically poll _owner. ! ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); ! ld_ptr (Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox); ! xor3 (Rscratch, G2_thread, Rscratch) ; ! orcc (Rbox, Rscratch, Rbox) ; ! brx (Assembler::notZero, false, Assembler::pn, done) ; delayed()-> ! ld_ptr (Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch); ! ld_ptr (Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox); ! orcc (Rbox, Rscratch, G0) ; if (EmitSync & 65536) { Label LSucc ; ! brx (Assembler::notZero, false, Assembler::pn, LSucc) ; ! delayed()->nop() ; ! ba (done, false) ; ! delayed()-> ! st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); ! bind (LSucc) ; ! st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); ! if (os::is_MP()) { membar (StoreLoad) ; } ! ld_ptr (Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch); ! andcc (Rscratch, Rscratch, G0) ; ! brx (Assembler::notZero, false, Assembler::pt, done) ; ! delayed()-> andcc (G0, G0, G0) ; ! add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; ! mov (G2_thread, Rscratch) ; ! casn (Rmark, G0, Rscratch) ; // invert icc.zf and goto done ! br_notnull(Rscratch, false, Assembler::pt, done, false) ; ! delayed() -> cmp (G0, G0) ; ! ba (done, false); ! delayed() -> cmp (G0, 1) ; } else { ! brx (Assembler::notZero, false, Assembler::pn, done) ; ! delayed()->nop() ; ! ba (done, false) ; ! delayed()-> ! st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); } ! bind (LStacked) ; // Consider: we could replace the expensive CAS in the exit // path with a simple ST of the displaced mark value fetched from // the on-stack basiclock box. That admits a race where a thread T2 // in the slow lock path -- inflating with monitor M -- could race a // thread T1 in the fast unlock path, resulting in a missed wakeup for T2. --- 3848,3896 ---- // past the store that releases the lock. But TSO is a strong memory model // and that particular flavor of barrier is a noop, so we can safely elide it. // Note that we use 1-0 locking by default for the inflated case. We // close the resultant (and rare) race by having contented threads in // monitorenter periodically poll _owner. ! ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); ! ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox); ! xor3(Rscratch, G2_thread, Rscratch); ! orcc(Rbox, Rscratch, Rbox); ! brx(Assembler::notZero, false, Assembler::pn, done); delayed()-> ! ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch); ! ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox); ! orcc(Rbox, Rscratch, G0); if (EmitSync & 65536) { Label LSucc ; ! brx(Assembler::notZero, false, Assembler::pn, LSucc); ! delayed()->nop(); ! ba(done); ! delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); ! bind(LSucc); ! st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); ! if (os::is_MP()) { membar (StoreLoad); } ! ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch); ! andcc(Rscratch, Rscratch, G0); ! brx(Assembler::notZero, false, Assembler::pt, done); ! delayed()->andcc(G0, G0, G0); ! add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); ! mov(G2_thread, Rscratch); ! casn(Rmark, G0, Rscratch); // invert icc.zf and goto done ! br_notnull(Rscratch, false, Assembler::pt, done); ! delayed()->cmp(G0, G0); ! ba(done); ! delayed()->cmp(G0, 1); } else { ! brx(Assembler::notZero, false, Assembler::pn, done); ! delayed()->nop(); ! ba(done); ! delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); } ! bind (LStacked); // Consider: we could replace the expensive CAS in the exit // path with a simple ST of the displaced mark value fetched from // the on-stack basiclock box. That admits a race where a thread T2 // in the slow lock path -- inflating with monitor M -- could race a // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
*** 3880,3894 **** // lost-update "stomp" WAW race but detects and recovers as needed. // // A prototype implementation showed excellent results, although // the scavenger and timeout code was rather involved. ! casn (mark_addr.base(), Rbox, Rscratch) ; ! cmp (Rbox, Rscratch); // Intentional fall through into done ... ! bind (done) ; } void MacroAssembler::print_CPU_state() { --- 3915,3929 ---- // lost-update "stomp" WAW race but detects and recovers as needed. // // A prototype implementation showed excellent results, although // the scavenger and timeout code was rather involved. ! casn(mark_addr.base(), Rbox, Rscratch); ! cmp(Rbox, Rscratch); // Intentional fall through into done ... ! bind(done); } void MacroAssembler::print_CPU_state() {
*** 3940,3964 **** save_frame(0); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2); or3(t1, t2, t3); ! cmp_and_br(t1, t2, Assembler::greaterEqual, false, Assembler::pn, next); stop("assert(top >= start)"); should_not_reach_here(); bind(next); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2); or3(t3, t2, t3); ! cmp_and_br(t1, t2, Assembler::lessEqual, false, Assembler::pn, next2); stop("assert(top <= end)"); should_not_reach_here(); bind(next2); and3(t3, MinObjAlignmentInBytesMask, t3); ! cmp_and_br(t3, 0, Assembler::lessEqual, false, Assembler::pn, ok); stop("assert(aligned)"); should_not_reach_here(); bind(ok); restore(); --- 3975,3999 ---- save_frame(0); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2); or3(t1, t2, t3); ! cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next); stop("assert(top >= start)"); should_not_reach_here(); bind(next); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2); or3(t3, t2, t3); ! cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2); stop("assert(top <= end)"); should_not_reach_here(); bind(next2); and3(t3, MinObjAlignmentInBytesMask, t3); ! cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok); stop("assert(aligned)"); should_not_reach_here(); bind(ok); restore();
*** 3980,3990 **** assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { // No allocation in the shared eden. ! ba(slow_case); } else { // get eden boundaries // note: we need both top & top_addr! const Register top_addr = t1; const Register end = t2; --- 4015,4025 ---- assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { // No allocation in the shared eden. ! ba_short(slow_case); } else { // get eden boundaries // note: we need both top & top_addr! const Register top_addr = t1; const Register end = t2;
*** 4114,4124 **** assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */); Label do_refill, discard_tlab; if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { // No allocation in the shared eden. ! ba(slow_case); } ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2); --- 4149,4159 ---- assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */); Label do_refill, discard_tlab; if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { // No allocation in the shared eden. ! ba_short(slow_case); } ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
*** 4139,4149 **** // increment number of slow_allocations ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2); add(t2, 1, t2); stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset())); } ! ba(try_eden); bind(discard_tlab); if (TLABStats) { // increment number of refills ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2); --- 4174,4184 ---- // increment number of slow_allocations ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2); add(t2, 1, t2); stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset())); } ! ba_short(try_eden); bind(discard_tlab); if (TLABStats) { // increment number of refills ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
*** 4155,4165 **** stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); } // if tlab is currently allocated (top or end != null) then // fill [top, end + alignment_reserve) with array object ! br_null(top, false, Assembler::pn, do_refill); set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word // set klass to intArrayKlass sub(t1, typeArrayOopDesc::header_size(T_INT), t1); --- 4190,4200 ---- stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); } // if tlab is currently allocated (top or end != null) then // fill [top, end + alignment_reserve) with array object ! br_null_short(top, Assembler::pn, do_refill); set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word // set klass to intArrayKlass sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
*** 4190,4200 **** // check that tlab_size (t1) is still valid { Label ok; ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2); sll_ptr(t2, LogHeapWordSize, t2); ! cmp_and_br(t1, t2, Assembler::equal, false, Assembler::pn, ok); stop("assert(t1 == tlab_size)"); should_not_reach_here(); bind(ok); } --- 4225,4235 ---- // check that tlab_size (t1) is still valid { Label ok; ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2); sll_ptr(t2, LogHeapWordSize, t2); ! cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok); stop("assert(t1 == tlab_size)"); should_not_reach_here(); bind(ok); }
*** 4201,4211 **** #endif // ASSERT add(top, t1, top); // t1 is tlab_size sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top); st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset())); verify_tlab(); ! ba(retry); } void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2) { // Bump total bytes allocated by this thread --- 4236,4246 ---- #endif // ASSERT add(top, t1, top); // t1 is tlab_size sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top); st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset())); verify_tlab(); ! ba_short(retry); } void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2) { // Bump total bytes allocated by this thread
*** 4352,4362 **** "check sizes in assembly below"); __ bind(restart); __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); ! __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill, false); // If the branch is taken, no harm in executing this in the delay slot. __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); __ sub(L0, oopSize, L0); __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0 --- 4387,4397 ---- "check sizes in assembly below"); __ bind(restart); __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); ! __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); // If the branch is taken, no harm in executing this in the delay slot. __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); __ sub(L0, oopSize, L0); __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0
*** 4467,4476 **** --- 4502,4512 ---- tmp); } // Check on whether to annul. br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); + delayed()->nop(); // Do we need to load the previous value? if (obj != noreg) { // Load the previous value... if (index == noreg) {
*** 4490,4499 **** --- 4526,4536 ---- assert(pre_val != noreg, "must have a real register"); // Is the previous value null? // Check on whether to annul. br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); + delayed()->nop(); // OK, it's not filtered, so we'll need to call enqueue. In the normal // case, pre_val will be a scratch G-reg, but there are some cases in // which it's an O-reg. In the first case, do a normal call. In the // latter, do a save here and call the frameless version.
src/cpu/sparc/vm/assembler_sparc.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File