src/cpu/sparc/vm/macroAssembler_sparc.cpp

Print this page

        

*** 116,126 **** switch (inv_op2(inst)) { case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; - case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; case bpr_op2: { if (is_cbcond(inst)) { m = wdisp10(word_aligned_ones, 0); v = wdisp10(dest_pos, inst_pos); } else { --- 116,125 ----
*** 147,157 **** switch (inv_op2(inst)) { case fbp_op2: r = inv_wdisp( inst, pos, 19); break; case bp_op2: r = inv_wdisp( inst, pos, 19); break; case fb_op2: r = inv_wdisp( inst, pos, 22); break; case br_op2: r = inv_wdisp( inst, pos, 22); break; - case cb_op2: r = inv_wdisp( inst, pos, 22); break; case bpr_op2: { if (is_cbcond(inst)) { r = inv_wdisp10(inst, pos); } else { r = inv_wdisp16(inst, pos); --- 146,155 ----
*** 323,338 **** // We want to use ST_BREAKPOINT here, but the debugger is confused by it. void MacroAssembler::breakpoint_trap() { trap(ST_RESERVED_FOR_USER_0); } - // flush windows (except current) using flushw instruction if avail. - void MacroAssembler::flush_windows() { - if (VM_Version::v9_instructions_work()) flushw(); - else flush_windows_trap(); - } - // Write serialization page so VM thread can do a pseudo remote membar // We use the current thread pointer to calculate a thread specific // offset to write to within the page. This minimizes bus traffic // due to cache line collision. void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { --- 321,330 ----
*** 357,444 **** void MacroAssembler::leave() { Unimplemented(); } void MacroAssembler::mult(Register s1, Register s2, Register d) { - if(VM_Version::v9_instructions_work()) { mulx (s1, s2, d); - } else { - smul (s1, s2, d); - } } void MacroAssembler::mult(Register s1, int simm13a, Register d) { - if(VM_Version::v9_instructions_work()) { mulx (s1, simm13a, d); - } else { - smul (s1, simm13a, d); - } - } - - - #ifdef ASSERT - void MacroAssembler::read_ccr_v8_assert(Register ccr_save) { - const Register s1 = G3_scratch; - const Register s2 = G4_scratch; - Label get_psr_test; - // Get the condition codes the V8 way. - read_ccr_trap(s1); - mov(ccr_save, s2); - // This is a test of V8 which has icc but not xcc - // so mask off the xcc bits - and3(s2, 0xf, s2); - // Compare condition codes from the V8 and V9 ways. - subcc(s2, s1, G0); - br(Assembler::notEqual, true, Assembler::pt, get_psr_test); - delayed()->breakpoint_trap(); - bind(get_psr_test); - } - - void MacroAssembler::write_ccr_v8_assert(Register ccr_save) { - const Register s1 = G3_scratch; - const Register s2 = G4_scratch; - Label set_psr_test; - // Write out the saved condition codes the V8 way - write_ccr_trap(ccr_save, s1, s2); - // Read back the condition codes using the V9 instruction - rdccr(s1); - mov(ccr_save, s2); - // This is a test of V8 which has icc but not xcc - // so mask off the xcc bits - and3(s2, 0xf, s2); - and3(s1, 0xf, s1); - // Compare the V8 way with the V9 way. - subcc(s2, s1, G0); - br(Assembler::notEqual, true, Assembler::pt, set_psr_test); - delayed()->breakpoint_trap(); - bind(set_psr_test); - } - #else - #define read_ccr_v8_assert(x) - #define write_ccr_v8_assert(x) - #endif // ASSERT - - void MacroAssembler::read_ccr(Register ccr_save) { - if (VM_Version::v9_instructions_work()) { - rdccr(ccr_save); - // Test code sequence used on V8. Do not move above rdccr. - read_ccr_v8_assert(ccr_save); - } else { - read_ccr_trap(ccr_save); - } - } - - void MacroAssembler::write_ccr(Register ccr_save) { - if (VM_Version::v9_instructions_work()) { - // Test code sequence used on V8. Do not move below wrccr. - write_ccr_v8_assert(ccr_save); - wrccr(ccr_save); - } else { - const Register temp_reg1 = G3_scratch; - const Register temp_reg2 = G4_scratch; - write_ccr_trap(ccr_save, temp_reg1, temp_reg2); - } } // Calls to C land --- 349,363 ----
*** 1325,1346 **** s->cr(); } void RegistersForDebugging::save_registers(MacroAssembler* a) { a->sub(FP, round_to(sizeof(RegistersForDebugging), sizeof(jdouble)) - STACK_BIAS, O0); ! a->flush_windows(); int i; for (i = 0; i < 8; ++i) { a->ld_ptr(as_iRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, i_offset(i)); a->ld_ptr(as_lRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, l_offset(i)); a->st_ptr(as_oRegister(i)->after_save(), O0, o_offset(i)); a->st_ptr(as_gRegister(i)->after_save(), O0, g_offset(i)); } for (i = 0; i < 32; ++i) { a->stf(FloatRegisterImpl::S, as_FloatRegister(i), O0, f_offset(i)); } ! for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { a->stf(FloatRegisterImpl::D, as_FloatRegister(i), O0, d_offset(i)); } } void RegistersForDebugging::restore_registers(MacroAssembler* a, Register r) { --- 1244,1265 ---- s->cr(); } void RegistersForDebugging::save_registers(MacroAssembler* a) { a->sub(FP, round_to(sizeof(RegistersForDebugging), sizeof(jdouble)) - STACK_BIAS, O0); ! a->flushw(); int i; for (i = 0; i < 8; ++i) { a->ld_ptr(as_iRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, i_offset(i)); a->ld_ptr(as_lRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, l_offset(i)); a->st_ptr(as_oRegister(i)->after_save(), O0, o_offset(i)); a->st_ptr(as_gRegister(i)->after_save(), O0, g_offset(i)); } for (i = 0; i < 32; ++i) { a->stf(FloatRegisterImpl::S, as_FloatRegister(i), O0, f_offset(i)); } ! for (i = 0; i < 64; i += 2) { a->stf(FloatRegisterImpl::D, as_FloatRegister(i), O0, d_offset(i)); } } void RegistersForDebugging::restore_registers(MacroAssembler* a, Register r) {
*** 1348,1358 **** a->ld_ptr(r, g_offset(i), as_gRegister(i)); } for (int j = 0; j < 32; ++j) { a->ldf(FloatRegisterImpl::S, O0, f_offset(j), as_FloatRegister(j)); } ! for (int k = 0; k < (VM_Version::v9_instructions_work() ? 64 : 32); k += 2) { a->ldf(FloatRegisterImpl::D, O0, d_offset(k), as_FloatRegister(k)); } } --- 1267,1277 ---- a->ld_ptr(r, g_offset(i), as_gRegister(i)); } for (int j = 0; j < 32; ++j) { a->ldf(FloatRegisterImpl::S, O0, f_offset(j), as_FloatRegister(j)); } ! for (int k = 0; k < 64; k += 2) { a->ldf(FloatRegisterImpl::D, O0, d_offset(k), as_FloatRegister(k)); } }
*** 1463,1474 **** // receives an oop in O0. Must restore O0 & O7 from TLS. Must not smash ANY // registers, including flags. May not use a register 'save', as this blows // the high bits of the O-regs if they contain Long values. Acts as a 'leaf' // call. void MacroAssembler::verify_oop_subroutine() { - assert( VM_Version::v9_instructions_work(), "VerifyOops not supported for V8" ); - // Leaf call; no frame. Label succeed, fail, null_or_fail; // O0 and O7 were saved already (O0 in O0's TLS home, O7 in O5's TLS home). // O0 is now the oop to be checked. O7 is the return address. --- 1382,1391 ----
*** 1868,1897 **** // Other notes: The first move in each triplet can be unconditional // (and therefore probably prefetchable). // And the equals case for the high part does not need testing, // since that triplet is reached only after finding the high halves differ. - if (VM_Version::v9_instructions_work()) { mov(-1, Rresult); ! ba(done); delayed()-> movcc(greater, false, icc, 1, Rresult); ! } else { ! br(less, true, pt, done); delayed()-> set(-1, Rresult); ! br(greater, true, pt, done); delayed()-> set( 1, Rresult); ! } ! bind( check_low_parts ); - if (VM_Version::v9_instructions_work()) { mov( -1, Rresult); movcc(equal, false, icc, 0, Rresult); movcc(greaterUnsigned, false, icc, 1, Rresult); ! } else { ! set(-1, Rresult); ! br(equal, true, pt, done); delayed()->set( 0, Rresult); ! br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult); ! } ! bind( done ); } void MacroAssembler::lneg( Register Rhi, Register Rlow ) { subcc( G0, Rlow, Rlow ); subc( G0, Rhi, Rhi ); --- 1785,1805 ---- // Other notes: The first move in each triplet can be unconditional // (and therefore probably prefetchable). // And the equals case for the high part does not need testing, // since that triplet is reached only after finding the high halves differ. mov(-1, Rresult); ! ba(done); ! delayed()->movcc(greater, false, icc, 1, Rresult); ! bind(check_low_parts); mov( -1, Rresult); movcc(equal, false, icc, 0, Rresult); movcc(greaterUnsigned, false, icc, 1, Rresult); ! ! bind(done); } void MacroAssembler::lneg( Register Rhi, Register Rlow ) { subcc( G0, Rlow, Rlow ); subc( G0, Rhi, Rhi );
*** 2115,2236 **** void MacroAssembler::float_cmp( bool is_float, int unordered_result, FloatRegister Fa, FloatRegister Fb, Register Rresult) { ! ! fcmp(is_float ? FloatRegisterImpl::S : FloatRegisterImpl::D, fcc0, Fa, Fb); ! ! Condition lt = unordered_result == -1 ? f_unorderedOrLess : f_less; ! Condition eq = f_equal; ! Condition gt = unordered_result == 1 ? f_unorderedOrGreater : f_greater; ! ! if (VM_Version::v9_instructions_work()) { ! ! mov(-1, Rresult); ! movcc(eq, true, fcc0, 0, Rresult); ! movcc(gt, true, fcc0, 1, Rresult); ! } else { ! Label done; ! ! set( -1, Rresult ); ! //fb(lt, true, pn, done); delayed()->set( -1, Rresult ); ! fb( eq, true, pn, done); delayed()->set( 0, Rresult ); ! fb( gt, true, pn, done); delayed()->set( 1, Rresult ); ! ! bind (done); } - } - ! void MacroAssembler::fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) ! { ! if (VM_Version::v9_instructions_work()) { ! Assembler::fneg(w, s, d); ! } else { ! if (w == FloatRegisterImpl::S) { ! Assembler::fneg(w, s, d); ! } else if (w == FloatRegisterImpl::D) { ! // number() does a sanity check on the alignment. ! assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) && ! ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check"); ! ! Assembler::fneg(FloatRegisterImpl::S, s, d); ! Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); ! } else { ! assert(w == FloatRegisterImpl::Q, "Invalid float register width"); ! ! // number() does a sanity check on the alignment. ! assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) && ! ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check"); ! ! Assembler::fneg(FloatRegisterImpl::S, s, d); ! Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); ! Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor()); ! Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor()); ! } ! } ! } ! ! void MacroAssembler::fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) ! { ! if (VM_Version::v9_instructions_work()) { ! Assembler::fmov(w, s, d); ! } else { ! if (w == FloatRegisterImpl::S) { ! Assembler::fmov(w, s, d); ! } else if (w == FloatRegisterImpl::D) { ! // number() does a sanity check on the alignment. ! assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) && ! ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check"); ! ! Assembler::fmov(FloatRegisterImpl::S, s, d); ! Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); } else { ! assert(w == FloatRegisterImpl::Q, "Invalid float register width"); ! ! // number() does a sanity check on the alignment. ! assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) && ! ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check"); ! ! Assembler::fmov(FloatRegisterImpl::S, s, d); ! Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); ! Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor()); ! Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor()); ! } } } - void MacroAssembler::fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) - { - if (VM_Version::v9_instructions_work()) { - Assembler::fabs(w, s, d); - } else { - if (w == FloatRegisterImpl::S) { - Assembler::fabs(w, s, d); - } else if (w == FloatRegisterImpl::D) { - // number() does a sanity check on the alignment. - assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) && - ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check"); - - Assembler::fabs(FloatRegisterImpl::S, s, d); - Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); - } else { - assert(w == FloatRegisterImpl::Q, "Invalid float register width"); - - // number() does a sanity check on the alignment. - assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) && - ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check"); - - Assembler::fabs(FloatRegisterImpl::S, s, d); - Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); - Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor()); - Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor()); - } - } - } void MacroAssembler::save_all_globals_into_locals() { mov(G1,L1); mov(G2,L2); mov(G3,L3); --- 2023,2049 ---- void MacroAssembler::float_cmp( bool is_float, int unordered_result, FloatRegister Fa, FloatRegister Fb, Register Rresult) { ! if (is_float) { ! fcmp(FloatRegisterImpl::S, fcc0, Fa, Fb); } else { ! fcmp(FloatRegisterImpl::D, fcc0, Fa, Fb); } ! if (unordered_result == 1) { ! mov( -1, Rresult); ! movcc(f_equal, true, fcc0, 0, Rresult); ! movcc(f_unorderedOrGreater, true, fcc0, 1, Rresult); } else { ! mov( -1, Rresult); ! movcc(f_equal, true, fcc0, 0, Rresult); ! movcc(f_greater, true, fcc0, 1, Rresult); } } void MacroAssembler::save_all_globals_into_locals() { mov(G1,L1); mov(G2,L2); mov(G3,L3);
*** 2248,2386 **** mov(L5,G5); mov(L6,G6); mov(L7,G7); } - // Use for 64 bit operation. - void MacroAssembler::casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm) - { - // store ptr_reg as the new top value - #ifdef _LP64 - casx(top_ptr_reg, top_reg, ptr_reg); - #else - cas_under_lock(top_ptr_reg, top_reg, ptr_reg, lock_addr, use_call_vm); - #endif // _LP64 - } - - // [RGV] This routine does not handle 64 bit operations. - // use casx_under_lock() or casx directly!!! - void MacroAssembler::cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm) - { - // store ptr_reg as the new top value - if (VM_Version::v9_instructions_work()) { - cas(top_ptr_reg, top_reg, ptr_reg); - } else { - - // If the register is not an out nor global, it is not visible - // after the save. Allocate a register for it, save its - // value in the register save area (the save may not flush - // registers to the save area). - - Register top_ptr_reg_after_save; - Register top_reg_after_save; - Register ptr_reg_after_save; - - if (top_ptr_reg->is_out() || top_ptr_reg->is_global()) { - top_ptr_reg_after_save = top_ptr_reg->after_save(); - } else { - Address reg_save_addr = top_ptr_reg->address_in_saved_window(); - top_ptr_reg_after_save = L0; - st(top_ptr_reg, reg_save_addr); - } - - if (top_reg->is_out() || top_reg->is_global()) { - top_reg_after_save = top_reg->after_save(); - } else { - Address reg_save_addr = top_reg->address_in_saved_window(); - top_reg_after_save = L1; - st(top_reg, reg_save_addr); - } - - if (ptr_reg->is_out() || ptr_reg->is_global()) { - ptr_reg_after_save = ptr_reg->after_save(); - } else { - Address reg_save_addr = ptr_reg->address_in_saved_window(); - ptr_reg_after_save = L2; - st(ptr_reg, reg_save_addr); - } - - const Register& lock_reg = L3; - const Register& lock_ptr_reg = L4; - const Register& value_reg = L5; - const Register& yield_reg = L6; - const Register& yieldall_reg = L7; - - save_frame(); - - if (top_ptr_reg_after_save == L0) { - ld(top_ptr_reg->address_in_saved_window().after_save(), top_ptr_reg_after_save); - } - - if (top_reg_after_save == L1) { - ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save); - } - - if (ptr_reg_after_save == L2) { - ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save); - } - - Label(retry_get_lock); - Label(not_same); - Label(dont_yield); - - assert(lock_addr, "lock_address should be non null for v8"); - set((intptr_t)lock_addr, lock_ptr_reg); - // Initialize yield counter - mov(G0,yield_reg); - mov(G0, yieldall_reg); - set(StubRoutines::Sparc::locked, lock_reg); - - bind(retry_get_lock); - cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield); - - if(use_call_vm) { - Untested("Need to verify global reg consistancy"); - call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg); - } else { - // Save the regs and make space for a C call - save(SP, -96, SP); - save_all_globals_into_locals(); - call(CAST_FROM_FN_PTR(address,os::yield_all)); - delayed()->mov(yieldall_reg, O0); - restore_globals_from_locals(); - restore(); - } - - // reset the counter - mov(G0,yield_reg); - add(yieldall_reg, 1, yieldall_reg); - - bind(dont_yield); - // try to get lock - Assembler::swap(lock_ptr_reg, 0, lock_reg); - - // did we get the lock? - cmp(lock_reg, StubRoutines::Sparc::unlocked); - br(Assembler::notEqual, true, Assembler::pn, retry_get_lock); - delayed()->add(yield_reg,1,yield_reg); - - // yes, got lock. do we have the same top? - ld(top_ptr_reg_after_save, 0, value_reg); - cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same); - - // yes, same top. - st(ptr_reg_after_save, top_ptr_reg_after_save, 0); - membar(Assembler::StoreStore); - - bind(not_same); - mov(value_reg, ptr_reg_after_save); - st(lock_reg, lock_ptr_reg, 0); // unlock - - restore(); - } - } - RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset) { intptr_t value = *delayed_value_addr; if (value != 0) --- 2061,2070 ----
*** 2968,2978 **** // don't accidentally blow away another thread's valid bias. delayed()->and3(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place, mark_reg); or3(G2_thread, mark_reg, temp_reg); ! casn(mark_addr.base(), mark_reg, temp_reg); // If the biasing toward our thread failed, this means that // another thread succeeded in biasing it toward itself and we // need to revoke that bias. The revocation will occur in the // interpreter runtime in the slow case. cmp(mark_reg, temp_reg); --- 2652,2662 ---- // don't accidentally blow away another thread's valid bias. delayed()->and3(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place, mark_reg); or3(G2_thread, mark_reg, temp_reg); ! cas_ptr(mark_addr.base(), mark_reg, temp_reg); // If the biasing toward our thread failed, this means that // another thread succeeded in biasing it toward itself and we // need to revoke that bias. The revocation will occur in the // interpreter runtime in the slow case. cmp(mark_reg, temp_reg);
*** 2996,3006 **** // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. load_klass(obj_reg, temp_reg); ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); or3(G2_thread, temp_reg, temp_reg); ! casn(mark_addr.base(), mark_reg, temp_reg); // If the biasing toward our thread failed, this means that // another thread succeeded in biasing it toward itself and we // need to revoke that bias. The revocation will occur in the // interpreter runtime in the slow case. cmp(mark_reg, temp_reg); --- 2680,2690 ---- // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. load_klass(obj_reg, temp_reg); ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); or3(G2_thread, temp_reg, temp_reg); ! cas_ptr(mark_addr.base(), mark_reg, temp_reg); // If the biasing toward our thread failed, this means that // another thread succeeded in biasing it toward itself and we // need to revoke that bias. The revocation will occur in the // interpreter runtime in the slow case. cmp(mark_reg, temp_reg);
*** 3025,3035 **** // // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. load_klass(obj_reg, temp_reg); ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); ! casn(mark_addr.base(), mark_reg, temp_reg); // Fall through to the normal CAS-based lock, because no matter what // the result of the above CAS, some thread must have succeeded in // removing the bias bit from the object's header. if (counters != NULL) { cmp(mark_reg, temp_reg); --- 2709,2719 ---- // // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. load_klass(obj_reg, temp_reg); ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); ! cas_ptr(mark_addr.base(), mark_reg, temp_reg); // Fall through to the normal CAS-based lock, because no matter what // the result of the above CAS, some thread must have succeeded in // removing the bias bit from the object's header. if (counters != NULL) { cmp(mark_reg, temp_reg);
*** 3056,3074 **** nop(); } } - // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by - // Solaris/SPARC's "as". Another apt name would be cas_ptr() - - void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) { - casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); - } - - - // compiler_lock_object() and compiler_unlock_object() are direct transliterations // of i486.ad fast_lock() and fast_unlock(). See those methods for detailed comments. // The code could be tightened up considerably. // // box->dhw disposition - post-conditions at DONE_LABEL. --- 2740,2749 ----
*** 3127,3138 **** // Initialize the box. (Must happen before we update the object mark!) st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); // compare object markOop with Rmark and if equal exchange Rscratch with object markOop assert(mark_addr.disp() == 0, "cas must take a zero displacement"); ! casx_under_lock(mark_addr.base(), Rmark, Rscratch, ! (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); // if compare/exchange succeeded we found an unlocked object and we now have locked it // hence we are done cmp(Rmark, Rscratch); #ifdef _LP64 --- 2802,2812 ---- // Initialize the box. (Must happen before we update the object mark!) st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); // compare object markOop with Rmark and if equal exchange Rscratch with object markOop assert(mark_addr.disp() == 0, "cas must take a zero displacement"); ! cas_ptr(mark_addr.base(), Rmark, Rscratch); // if compare/exchange succeeded we found an unlocked object and we now have locked it // hence we are done cmp(Rmark, Rscratch); #ifdef _LP64
*** 3174,3184 **** // Try stack-lock acquisition. // Beware: the 1st instruction is in a delay slot mov(Rbox, Rscratch); or3(Rmark, markOopDesc::unlocked_value, Rmark); assert(mark_addr.disp() == 0, "cas must take a zero displacement"); ! casn(mark_addr.base(), Rmark, Rscratch); cmp(Rmark, Rscratch); brx(Assembler::equal, false, Assembler::pt, done); delayed()->sub(Rscratch, SP, Rscratch); // Stack-lock attempt failed - check for recursive stack-lock. --- 2848,2858 ---- // Try stack-lock acquisition. // Beware: the 1st instruction is in a delay slot mov(Rbox, Rscratch); or3(Rmark, markOopDesc::unlocked_value, Rmark); assert(mark_addr.disp() == 0, "cas must take a zero displacement"); ! cas_ptr(mark_addr.base(), Rmark, Rscratch); cmp(Rmark, Rscratch); brx(Assembler::equal, false, Assembler::pt, done); delayed()->sub(Rscratch, SP, Rscratch); // Stack-lock attempt failed - check for recursive stack-lock.
*** 3205,3215 **** // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); mov(G2_thread, Rscratch); ! casn(Rmark, G0, Rscratch); cmp(Rscratch, G0); // Intentional fall-through into done } else { // Aggressively avoid the Store-before-CAS penalty // Defer the store into box->dhw until after the CAS --- 2879,2889 ---- // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); mov(G2_thread, Rscratch); ! cas_ptr(Rmark, G0, Rscratch); cmp(Rscratch, G0); // Intentional fall-through into done } else { // Aggressively avoid the Store-before-CAS penalty // Defer the store into box->dhw until after the CAS
*** 3238,3248 **** // This presumes TSO, of course. mov(0, Rscratch); or3(Rmark, markOopDesc::unlocked_value, Rmark); assert(mark_addr.disp() == 0, "cas must take a zero displacement"); ! casn(mark_addr.base(), Rmark, Rscratch); // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); cmp(Rscratch, Rmark); brx(Assembler::notZero, false, Assembler::pn, Recursive); delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); if (counters != NULL) { --- 2912,2922 ---- // This presumes TSO, of course. mov(0, Rscratch); or3(Rmark, markOopDesc::unlocked_value, Rmark); assert(mark_addr.disp() == 0, "cas must take a zero displacement"); ! cas_ptr(mark_addr.base(), Rmark, Rscratch); // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); cmp(Rscratch, Rmark); brx(Assembler::notZero, false, Assembler::pn, Recursive); delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); if (counters != NULL) {
*** 3264,3274 **** // A brief experiment - requiring changes to synchronizer.cpp, interpreter, // and showed a performance *increase*. In the same experiment I eliminated // the fast-path stack-lock code from the interpreter and always passed // control to the "slow" operators in synchronizer.cpp. ! // RScratch contains the fetched obj->mark value from the failed CASN. #ifdef _LP64 sub(Rscratch, STACK_BIAS, Rscratch); #endif sub(Rscratch, SP, Rscratch); assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); --- 2938,2948 ---- // A brief experiment - requiring changes to synchronizer.cpp, interpreter, // and showed a performance *increase*. In the same experiment I eliminated // the fast-path stack-lock code from the interpreter and always passed // control to the "slow" operators in synchronizer.cpp. ! // RScratch contains the fetched obj->mark value from the failed CAS. #ifdef _LP64 sub(Rscratch, STACK_BIAS, Rscratch); #endif sub(Rscratch, SP, Rscratch); assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
*** 3298,3308 **** // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); mov(G2_thread, Rscratch); ! casn(Rmark, G0, Rscratch); cmp(Rscratch, G0); // ST box->displaced_header = NonZero. // Any non-zero value suffices: // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); --- 2972,2982 ---- // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); mov(G2_thread, Rscratch); ! cas_ptr(Rmark, G0, Rscratch); cmp(Rscratch, G0); // ST box->displaced_header = NonZero. // Any non-zero value suffices: // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
*** 3334,3345 **** br_null_short(Rmark, Assembler::pt, done); // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markOop of the object assert(mark_addr.disp() == 0, "cas must take a zero displacement"); ! casx_under_lock(mark_addr.base(), Rbox, Rmark, ! (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); ba(done); delayed()->cmp(Rbox, Rmark); bind(done); return ; } --- 3008,3018 ---- br_null_short(Rmark, Assembler::pt, done); // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markOop of the object assert(mark_addr.disp() == 0, "cas must take a zero displacement"); ! cas_ptr(mark_addr.base(), Rbox, Rmark); ba(done); delayed()->cmp(Rbox, Rmark); bind(done); return ; }
*** 3396,3406 **** andcc(Rscratch, Rscratch, G0); brx(Assembler::notZero, false, Assembler::pt, done); delayed()->andcc(G0, G0, G0); add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); mov(G2_thread, Rscratch); ! casn(Rmark, G0, Rscratch); // invert icc.zf and goto done br_notnull(Rscratch, false, Assembler::pt, done); delayed()->cmp(G0, G0); ba(done); delayed()->cmp(G0, 1); --- 3069,3079 ---- andcc(Rscratch, Rscratch, G0); brx(Assembler::notZero, false, Assembler::pt, done); delayed()->andcc(G0, G0, G0); add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); mov(G2_thread, Rscratch); ! cas_ptr(Rmark, G0, Rscratch); // invert icc.zf and goto done br_notnull(Rscratch, false, Assembler::pt, done); delayed()->cmp(G0, G0); ba(done); delayed()->cmp(G0, 1);
*** 3438,3448 **** // lost-update "stomp" WAW race but detects and recovers as needed. // // A prototype implementation showed excellent results, although // the scavenger and timeout code was rather involved. ! casn(mark_addr.base(), Rbox, Rscratch); cmp(Rbox, Rscratch); // Intentional fall through into done ... bind(done); } --- 3111,3121 ---- // lost-update "stomp" WAW race but detects and recovers as needed. // // A prototype implementation showed excellent results, although // the scavenger and timeout code was rather involved. ! cas_ptr(mark_addr.base(), Rbox, Rscratch); cmp(Rbox, Rscratch); // Intentional fall through into done ... bind(done); }
*** 3581,3591 **** delayed()->add(obj, con_size_in_bytes, end); } // Compare obj with the value at top_addr; if still equal, swap the value of // end with the value at top_addr. If not equal, read the value at top_addr // into end. ! casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); // if someone beat us on the allocation, try again, otherwise continue cmp(obj, end); brx(Assembler::notEqual, false, Assembler::pn, retry); delayed()->mov(end, obj); // nop if successfull since obj == end --- 3254,3264 ---- delayed()->add(obj, con_size_in_bytes, end); } // Compare obj with the value at top_addr; if still equal, swap the value of // end with the value at top_addr. If not equal, read the value at top_addr // into end. ! cas_ptr(top_addr, obj, end); // if someone beat us on the allocation, try again, otherwise continue cmp(obj, end); brx(Assembler::notEqual, false, Assembler::pn, retry); delayed()->mov(end, obj); // nop if successfull since obj == end