src/cpu/sparc/vm/assembler_sparc.cpp
Index
Unified diffs
Context diffs
Sdiffs
Wdiffs
Patch
New
Old
Previous File
Next File
7063628_1 Cdiff src/cpu/sparc/vm/assembler_sparc.cpp
src/cpu/sparc/vm/assembler_sparc.cpp
Print this page
*** 104,114 ****
case fbp_op2: s = "fbp"; break;
case br_op2: s = "br"; break;
case bp_op2: s = "bp"; break;
case cb_op2: s = "cb"; break;
case bpr_op2: {
! if (is_cbc(inst)) {
s = is_cxb(inst) ? "cxb" : "cwb";
} else {
s = "bpr";
}
break;
--- 104,114 ----
case fbp_op2: s = "fbp"; break;
case br_op2: s = "br"; break;
case bp_op2: s = "bp"; break;
case cb_op2: s = "cb"; break;
case bpr_op2: {
! if (is_cbcond(inst)) {
s = is_cxb(inst) ? "cxb" : "cwb";
} else {
s = "bpr";
}
break;
*** 138,148 ****
case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break;
case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
case bpr_op2: {
! if (is_cbc(inst)) {
m = wdisp10(word_aligned_ones, 0);
v = wdisp10(dest_pos, inst_pos);
} else {
m = wdisp16(word_aligned_ones, 0);
v = wdisp16(dest_pos, inst_pos);
--- 138,148 ----
case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break;
case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
case bpr_op2: {
! if (is_cbcond(inst)) {
m = wdisp10(word_aligned_ones, 0);
v = wdisp10(dest_pos, inst_pos);
} else {
m = wdisp16(word_aligned_ones, 0);
v = wdisp16(dest_pos, inst_pos);
*** 169,179 ****
case bp_op2: r = inv_wdisp( inst, pos, 19); break;
case fb_op2: r = inv_wdisp( inst, pos, 22); break;
case br_op2: r = inv_wdisp( inst, pos, 22); break;
case cb_op2: r = inv_wdisp( inst, pos, 22); break;
case bpr_op2: {
! if (is_cbc(inst)) {
r = inv_wdisp10(inst, pos);
} else {
r = inv_wdisp16(inst, pos);
}
break;
--- 169,179 ----
case bp_op2: r = inv_wdisp( inst, pos, 19); break;
case fb_op2: r = inv_wdisp( inst, pos, 22); break;
case br_op2: r = inv_wdisp( inst, pos, 22); break;
case cb_op2: r = inv_wdisp( inst, pos, 22); break;
case bpr_op2: {
! if (is_cbcond(inst)) {
r = inv_wdisp10(inst, pos);
} else {
r = inv_wdisp16(inst, pos);
}
break;
*** 989,999 ****
#ifdef ASSERT
// Verify that flags was zeroed on return to Java
Label PcOk;
save_frame(0); // to avoid clobbering O0
ld_ptr(pc_addr, L0);
! br_null(L0, false, Assembler::pt, PcOk);
stop("last_Java_pc not zeroed before leaving Java");
bind(PcOk);
// Verify that flags was zeroed on return to Java
Label FlagsOk;
--- 989,999 ----
#ifdef ASSERT
// Verify that flags was zeroed on return to Java
Label PcOk;
save_frame(0); // to avoid clobbering O0
ld_ptr(pc_addr, L0);
! br_null_short(L0, Assembler::pt, PcOk);
stop("last_Java_pc not zeroed before leaving Java");
bind(PcOk);
// Verify that flags was zeroed on return to Java
Label FlagsOk;
*** 1114,1124 ****
check_and_handle_popframe(scratch_reg);
check_and_handle_earlyret(scratch_reg);
Address exception_addr(G2_thread, Thread::pending_exception_offset());
ld_ptr(exception_addr, scratch_reg);
! br_null(scratch_reg,false,pt,L);
// we use O7 linkage so that forward_exception_entry has the issuing PC
call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
delayed()->nop();
bind(L);
}
--- 1114,1124 ----
check_and_handle_popframe(scratch_reg);
check_and_handle_earlyret(scratch_reg);
Address exception_addr(G2_thread, Thread::pending_exception_offset());
ld_ptr(exception_addr, scratch_reg);
! br_null_short(scratch_reg, pt, L);
// we use O7 linkage so that forward_exception_entry has the issuing PC
call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
delayed()->nop();
bind(L);
}
*** 1888,1902 ****
set(Universe::verify_oop_mask (), O2_mask);
set(Universe::verify_oop_bits (), O3_bits);
// assert((obj & oop_mask) == oop_bits);
and3(O0_obj, O2_mask, O4_temp);
! cmp_and_brx(O4_temp, O3_bits, notEqual, false, pn, null_or_fail);
if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
// the null_or_fail case is useless; must test for null separately
! br_null(O0_obj, false, pn, succeed);
}
// Check the klassOop of this object for being in the right area of memory.
// Cannot do the load in the delay above slot in case O0 is null
load_klass(O0_obj, O0_obj);
--- 1888,1902 ----
set(Universe::verify_oop_mask (), O2_mask);
set(Universe::verify_oop_bits (), O3_bits);
// assert((obj & oop_mask) == oop_bits);
and3(O0_obj, O2_mask, O4_temp);
! cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail);
if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
// the null_or_fail case is useless; must test for null separately
! br_null_short(O0_obj, pn, succeed);
}
// Check the klassOop of this object for being in the right area of memory.
// Cannot do the load in the delay above slot in case O0 is null
load_klass(O0_obj, O0_obj);
*** 1904,1914 ****
if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
set(Universe::verify_klass_mask(), O2_mask);
if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
set(Universe::verify_klass_bits(), O3_bits);
and3(O0_obj, O2_mask, O4_temp);
! cmp_and_brx(O4_temp, O3_bits, notEqual, false, pn, fail);
// Check the klass's klass
load_klass(O0_obj, O0_obj);
and3(O0_obj, O2_mask, O4_temp);
cmp(O4_temp, O3_bits);
brx(notEqual, false, pn, fail);
--- 1904,1914 ----
if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
set(Universe::verify_klass_mask(), O2_mask);
if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
set(Universe::verify_klass_bits(), O3_bits);
and3(O0_obj, O2_mask, O4_temp);
! cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, fail);
// Check the klass's klass
load_klass(O0_obj, O0_obj);
and3(O0_obj, O2_mask, O4_temp);
cmp(O4_temp, O3_bits);
brx(notEqual, false, pn, fail);
*** 2132,2150 ****
ShouldNotReachHere();
return Assembler::rc_z;
}
// compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS
! void MacroAssembler::br_zero(Register s1, Label& L) {
! assert_not_delayed();
! if (use_cbc(L)) {
! Assembler::cbc(zero, icc, s1, 0, L);
! } else {
tst(s1);
! br (zero, false, pt, L);
! delayed()->nop();
! }
}
// Compares a pointer register with zero and branches on null.
// Does a test & branch on 32-bit systems and a register-branch on 64-bit.
void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L, bool emit_delayed_nop ) {
--- 2132,2144 ----
ShouldNotReachHere();
return Assembler::rc_z;
}
// compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS
! void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {
tst(s1);
! br (c, a, p, L);
}
// Compares a pointer register with zero and branches on null.
// Does a test & branch on 32-bit systems and a register-branch on 64-bit.
void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L, bool emit_delayed_nop ) {
*** 2226,2272 ****
br(c, a, p, L);
delayed()->nop();
}
}
! void MacroAssembler::cmp_and_br(Register s1, int simm13a, Condition c,
! bool a, Predict p, Label& L) {
assert_not_delayed();
! if (is_simm(simm13a,5) && use_cbc(L)) {
! Assembler::cbc(c, icc, s1, simm13a, L);
} else {
cmp(s1, simm13a);
! br(c, a, p, L);
delayed()->nop();
}
}
// Branch that tests xcc in LP64 and icc in !LP64
! void MacroAssembler::cmp_and_brx(Register s1, Register s2, Condition c,
! bool a, Predict p, Label& L) {
assert_not_delayed();
! if (use_cbc(L)) {
! Assembler::cbc(c, ptr_cc, s1, s2, L);
} else {
cmp(s1, s2);
! brx(c, a, p, L);
delayed()->nop();
}
}
! void MacroAssembler::cmp_and_brx(Register s1, int simm13a, Condition c,
! bool a, Predict p, Label& L) {
assert_not_delayed();
! if (is_simm(simm13a,5) && use_cbc(L)) {
! Assembler::cbc(c, ptr_cc, s1, simm13a, L);
} else {
cmp(s1, simm13a);
! brx(c, a, p, L);
delayed()->nop();
}
}
// instruction sequences factored across compiler & interpreter
void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
Register Rb_hi, Register Rb_low,
--- 2220,2313 ----
br(c, a, p, L);
delayed()->nop();
}
}
! // Compare integer (32 bit) values (icc only).
! void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
! Predict p, Label& L) {
assert_not_delayed();
! if (use_cbcond(L)) {
! Assembler::cbcond(c, icc, s1, s2, L);
} else {
+ cmp(s1, s2);
+ br(c, false, p, L);
+ delayed()->nop();
+ }
+ }
+
+ // Compare integer (32 bit) values (icc only).
+ void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
+ Predict p, Label& L) {
+ assert_not_delayed();
+ if (is_simm(simm13a,5) && use_cbcond(L)) {
+ Assembler::cbcond(c, icc, s1, simm13a, L);
+ } else {
cmp(s1, simm13a);
! br(c, false, p, L);
delayed()->nop();
}
}
// Branch that tests xcc in LP64 and icc in !LP64
! void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c,
! Predict p, Label& L) {
assert_not_delayed();
! if (use_cbcond(L)) {
! Assembler::cbcond(c, ptr_cc, s1, s2, L);
} else {
cmp(s1, s2);
! brx(c, false, p, L);
delayed()->nop();
}
}
! // Branch that tests xcc in LP64 and icc in !LP64
! void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c,
! Predict p, Label& L) {
assert_not_delayed();
! if (is_simm(simm13a,5) && use_cbcond(L)) {
! Assembler::cbcond(c, ptr_cc, s1, simm13a, L);
} else {
cmp(s1, simm13a);
! brx(c, false, p, L);
delayed()->nop();
}
}
+ // Short branch version for compares a pointer with zero.
+
+ void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
+ assert_not_delayed();
+ if (use_cbcond(L)) {
+ Assembler::cbcond(zero, ptr_cc, s1, 0, L);
+ return;
+ }
+ br_null(s1, false, p, L);
+ delayed()->nop();
+ }
+
+ void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
+ assert_not_delayed();
+ if (use_cbcond(L)) {
+ Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
+ return;
+ }
+ br_notnull(s1, false, p, L);
+ delayed()->nop();
+ }
+
+ // Unconditional short branch
+ void MacroAssembler::ba_short(Label& L) {
+ if (use_cbcond(L)) {
+ Assembler::cbcond(equal, icc, G0, G0, L);
+ return;
+ }
+ br(always, false, pt, L);
+ delayed()->nop();
+ }
+
// instruction sequences factored across compiler & interpreter
void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
Register Rb_hi, Register Rb_low,
*** 2288,2298 ****
// And the equals case for the high part does not need testing,
// since that triplet is reached only after finding the high halves differ.
if (VM_Version::v9_instructions_work()) {
mov(-1, Rresult);
! ba(done, false); delayed()-> movcc(greater, false, icc, 1, Rresult);
} else {
br(less, true, pt, done); delayed()-> set(-1, Rresult);
br(greater, true, pt, done); delayed()-> set( 1, Rresult);
}
--- 2329,2339 ----
// And the equals case for the high part does not need testing,
// since that triplet is reached only after finding the high halves differ.
if (VM_Version::v9_instructions_work()) {
mov(-1, Rresult);
! ba(done); delayed()-> movcc(greater, false, icc, 1, Rresult);
} else {
br(less, true, pt, done); delayed()-> set(-1, Rresult);
br(greater, true, pt, done); delayed()-> set( 1, Rresult);
}
*** 2363,2373 ****
sll(Rin_high, Rcount, Rout_high);
if (Rcount == Rout_low) {
sll(Rin_low, Rcount, Rout_low); // low half
}
srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
! ba(done, false);
delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low
// shift >= 32 bits, Ralt_count = Rcount-32
bind(big_shift);
sll(Rin_low, Ralt_count, Rout_high );
--- 2404,2414 ----
sll(Rin_high, Rcount, Rout_high);
if (Rcount == Rout_low) {
sll(Rin_low, Rcount, Rout_low); // low half
}
srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
! ba(done);
delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low
// shift >= 32 bits, Ralt_count = Rcount-32
bind(big_shift);
sll(Rin_low, Ralt_count, Rout_high );
*** 2424,2434 ****
sra(Rin_high, Rcount, Rout_high ); // high half
sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
if (Rcount == Rout_low) {
srl(Rin_low, Rcount, Rout_low);
}
! ba(done, false);
delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
// shift >= 32 bits, Ralt_count = Rcount-32
bind(big_shift);
--- 2465,2475 ----
sra(Rin_high, Rcount, Rout_high ); // high half
sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
if (Rcount == Rout_low) {
srl(Rin_low, Rcount, Rout_low);
}
! ba(done);
delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
// shift >= 32 bits, Ralt_count = Rcount-32
bind(big_shift);
*** 2487,2497 ****
srl(Rin_high, Rcount, Rout_high ); // high half
sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
if (Rcount == Rout_low) {
srl(Rin_low, Rcount, Rout_low);
}
! ba(done, false);
delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
// shift >= 32 bits, Ralt_count = Rcount-32
bind(big_shift);
--- 2528,2538 ----
srl(Rin_high, Rcount, Rout_high ); // high half
sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
if (Rcount == Rout_low) {
srl(Rin_low, Rcount, Rout_low);
}
! ba(done);
delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
// shift >= 32 bits, Ralt_count = Rcount-32
bind(big_shift);
*** 2751,2761 ****
mov(G0,yield_reg);
mov(G0, yieldall_reg);
set(StubRoutines::Sparc::locked, lock_reg);
bind(retry_get_lock);
! cmp_and_br(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, false, Assembler::pt, dont_yield);
if(use_call_vm) {
Untested("Need to verify global reg consistancy");
call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
} else {
--- 2792,2802 ----
mov(G0,yield_reg);
mov(G0, yieldall_reg);
set(StubRoutines::Sparc::locked, lock_reg);
bind(retry_get_lock);
! cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield);
if(use_call_vm) {
Untested("Need to verify global reg consistancy");
call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
} else {
*** 2781,2791 ****
br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
delayed()->add(yield_reg,1,yield_reg);
// yes, got lock. do we have the same top?
ld(top_ptr_reg_after_save, 0, value_reg);
! cmp_and_br(value_reg, top_reg_after_save, Assembler::notEqual, false, Assembler::pt, not_same);
// yes, same top.
st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
membar(Assembler::StoreStore);
--- 2822,2832 ----
br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
delayed()->add(yield_reg,1,yield_reg);
// yes, got lock. do we have the same top?
ld(top_ptr_reg_after_save, 0, value_reg);
! cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same);
// yes, same top.
st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
membar(Assembler::StoreStore);
*** 3031,3041 ****
L2, L3, L4, L5,
NULL, &L_pop_to_failure);
// on success:
restore();
! ba(L_success);
// on failure:
bind(L_pop_to_failure);
restore();
bind(L_failure);
--- 3072,3082 ----
L2, L3, L4, L5,
NULL, &L_pop_to_failure);
// on success:
restore();
! ba_short(L_success);
// on failure:
bind(L_pop_to_failure);
restore();
bind(L_failure);
*** 3110,3121 ****
// Otherwise, it's the slow path for us (no success at this point).
// Hacked ba(), which may only be used just before L_fallthrough.
#define FINAL_JUMP(label) \
if (&(label) != &L_fallthrough) { \
! ba(label, false); \
! delayed()->nop(); \
}
if (super_check_offset.is_register()) {
brx(Assembler::equal, false, Assembler::pn, *L_success);
delayed()->cmp(super_check_offset.as_register(), sc_offset);
--- 3151,3161 ----
// Otherwise, it's the slow path for us (no success at this point).
// Hacked ba(), which may only be used just before L_fallthrough.
#define FINAL_JUMP(label) \
if (&(label) != &L_fallthrough) { \
! ba(label); delayed()->nop(); \
}
if (super_check_offset.is_register()) {
brx(Assembler::equal, false, Assembler::pn, *L_success);
delayed()->cmp(super_check_offset.as_register(), sc_offset);
*** 3243,3253 ****
// Success. Cache the super we found and proceed in triumph.
st_ptr(super_klass, sub_klass, sc_offset);
if (L_success != &L_fallthrough) {
! ba(*L_success, false);
delayed()->nop();
}
bind(L_fallthrough);
}
--- 3283,3293 ----
// Success. Cache the super we found and proceed in triumph.
st_ptr(super_klass, sub_klass, sc_offset);
if (L_success != &L_fallthrough) {
! ba(*L_success);
delayed()->nop();
}
bind(L_fallthrough);
}
*** 3258,3268 ****
Label& wrong_method_type) {
assert_different_registers(mtype_reg, mh_reg, temp_reg);
// compare method type against that of the receiver
RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
load_heap_oop(mh_reg, mhtype_offset, temp_reg);
! cmp_and_brx(temp_reg, mtype_reg, Assembler::notEqual, false, Assembler::pn, wrong_method_type);
}
// A method handle has a "vmslots" field which gives the size of its
// argument list in JVM stack slots. This field is either located directly
--- 3298,3308 ----
Label& wrong_method_type) {
assert_different_registers(mtype_reg, mh_reg, temp_reg);
// compare method type against that of the receiver
RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
load_heap_oop(mh_reg, mhtype_offset, temp_reg);
! cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type);
}
// A method handle has a "vmslots" field which gives the size of its
// argument list in JVM stack slots. This field is either located directly
*** 3351,3361 ****
// whether the epoch is still valid
// Note that the runtime guarantees sufficient alignment of JavaThread
// pointers to allow age to be placed into low bits
assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
! cmp_and_brx(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, false, Assembler::pn, cas_label);
load_klass(obj_reg, temp_reg);
ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
or3(G2_thread, temp_reg, temp_reg);
xor3(mark_reg, temp_reg, temp_reg);
--- 3391,3401 ----
// whether the epoch is still valid
// Note that the runtime guarantees sufficient alignment of JavaThread
// pointers to allow age to be placed into low bits
assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
! cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label);
load_klass(obj_reg, temp_reg);
ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
or3(G2_thread, temp_reg, temp_reg);
xor3(mark_reg, temp_reg, temp_reg);
*** 3418,3428 ****
}
if (slow_case != NULL) {
brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
delayed()->nop();
}
! ba(done);
bind(try_rebias);
// At this point we know the epoch has expired, meaning that the
// current "bias owner", if any, is actually invalid. Under these
// circumstances _only_, we are allowed to use the current header's
--- 3458,3468 ----
}
if (slow_case != NULL) {
brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
delayed()->nop();
}
! ba_short(done);
bind(try_rebias);
// At this point we know the epoch has expired, meaning that the
// current "bias owner", if any, is actually invalid. Under these
// circumstances _only_, we are allowed to use the current header's
*** 3446,3456 ****
}
if (slow_case != NULL) {
brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
delayed()->nop();
}
! ba(done);
bind(try_revoke_bias);
// The prototype mark in the klass doesn't have the bias bit set any
// more, indicating that objects of this data type are not supposed
// to be biased any more. We are going to try to reset the mark of
--- 3486,3496 ----
}
if (slow_case != NULL) {
brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
delayed()->nop();
}
! ba_short(done);
bind(try_revoke_bias);
// The prototype mark in the klass doesn't have the bias bit set any
// more, indicating that objects of this data type are not supposed
// to be biased any more. We are going to try to reset the mark of
*** 3497,3507 ****
// CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
// Solaris/SPARC's "as". Another apt name would be cas_ptr()
void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
! casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()) ;
}
// compiler_lock_object() and compiler_unlock_object() are direct transliterations
--- 3537,3547 ----
// CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
// Solaris/SPARC's "as". Another apt name would be cas_ptr()
void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
! casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
}
// compiler_lock_object() and compiler_unlock_object() are direct transliterations
*** 3538,3550 ****
if (counters != NULL) {
inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
}
if (EmitSync & 1) {
! mov (3, Rscratch) ;
! st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
! cmp (SP, G0) ;
return ;
}
if (EmitSync & 2) {
--- 3578,3590 ----
if (counters != NULL) {
inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
}
if (EmitSync & 1) {
! mov(3, Rscratch);
! st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
! cmp(SP, G0);
return ;
}
if (EmitSync & 2) {
*** 3581,3600 ****
// we did not find an unlocked object so see if this is a recursive case
// sub(Rscratch, SP, Rscratch);
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
andcc(Rscratch, 0xfffff003, Rscratch);
st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
! bind (done) ;
return ;
}
Label Egress ;
if (EmitSync & 256) {
Label IsInflated ;
! ld_ptr (mark_addr, Rmark); // fetch obj->mark
// Triage: biased, stack-locked, neutral, inflated
if (try_bias) {
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
// Invariant: if control reaches this point in the emitted stream
// then Rmark has not been modified.
--- 3621,3640 ----
// we did not find an unlocked object so see if this is a recursive case
// sub(Rscratch, SP, Rscratch);
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
andcc(Rscratch, 0xfffff003, Rscratch);
st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
! bind (done);
return ;
}
Label Egress ;
if (EmitSync & 256) {
Label IsInflated ;
! ld_ptr(mark_addr, Rmark); // fetch obj->mark
// Triage: biased, stack-locked, neutral, inflated
if (try_bias) {
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
// Invariant: if control reaches this point in the emitted stream
// then Rmark has not been modified.
*** 3601,3698 ****
}
// Store mark into displaced mark field in the on-stack basic-lock "box"
// Critically, this must happen before the CAS
// Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
! st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
! andcc (Rmark, 2, G0) ;
! brx (Assembler::notZero, false, Assembler::pn, IsInflated) ;
! delayed() ->
// Try stack-lock acquisition.
// Beware: the 1st instruction is in a delay slot
! mov (Rbox, Rscratch);
! or3 (Rmark, markOopDesc::unlocked_value, Rmark);
! assert (mark_addr.disp() == 0, "cas must take a zero displacement");
! casn (mark_addr.base(), Rmark, Rscratch) ;
! cmp (Rmark, Rscratch);
! brx (Assembler::equal, false, Assembler::pt, done);
delayed()->sub(Rscratch, SP, Rscratch);
// Stack-lock attempt failed - check for recursive stack-lock.
// See the comments below about how we might remove this case.
#ifdef _LP64
! sub (Rscratch, STACK_BIAS, Rscratch);
#endif
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
! andcc (Rscratch, 0xfffff003, Rscratch);
! br (Assembler::always, false, Assembler::pt, done) ;
! delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
! bind (IsInflated) ;
if (EmitSync & 64) {
// If m->owner != null goto IsLocked
// Pessimistic form: Test-and-CAS vs CAS
// The optimistic form avoids RTS->RTO cache line upgrades.
! ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
! andcc (Rscratch, Rscratch, G0) ;
! brx (Assembler::notZero, false, Assembler::pn, done) ;
! delayed()->nop() ;
// m->owner == null : it's unlocked.
}
// Try to CAS m->owner from null to Self
// Invariant: if we acquire the lock then _recursions should be 0.
! add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
! mov (G2_thread, Rscratch) ;
! casn (Rmark, G0, Rscratch) ;
! cmp (Rscratch, G0) ;
// Intentional fall-through into done
} else {
// Aggressively avoid the Store-before-CAS penalty
// Defer the store into box->dhw until after the CAS
Label IsInflated, Recursive ;
// Anticipate CAS -- Avoid RTS->RTO upgrade
! // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
! ld_ptr (mark_addr, Rmark); // fetch obj->mark
// Triage: biased, stack-locked, neutral, inflated
if (try_bias) {
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
// Invariant: if control reaches this point in the emitted stream
// then Rmark has not been modified.
}
! andcc (Rmark, 2, G0) ;
! brx (Assembler::notZero, false, Assembler::pn, IsInflated) ;
delayed()-> // Beware - dangling delay-slot
// Try stack-lock acquisition.
// Transiently install BUSY (0) encoding in the mark word.
// if the CAS of 0 into the mark was successful then we execute:
// ST box->dhw = mark -- save fetched mark in on-stack basiclock box
// ST obj->mark = box -- overwrite transient 0 value
// This presumes TSO, of course.
! mov (0, Rscratch) ;
! or3 (Rmark, markOopDesc::unlocked_value, Rmark);
! assert (mark_addr.disp() == 0, "cas must take a zero displacement");
! casn (mark_addr.base(), Rmark, Rscratch) ;
! // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
! cmp (Rscratch, Rmark) ;
! brx (Assembler::notZero, false, Assembler::pn, Recursive) ;
! delayed() ->
! st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
if (counters != NULL) {
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
}
! br (Assembler::always, false, Assembler::pt, done);
! delayed() ->
! st_ptr (Rbox, mark_addr) ;
! bind (Recursive) ;
// Stack-lock attempt failed - check for recursive stack-lock.
// Tests show that we can remove the recursive case with no impact
// on refworkload 0.83. If we need to reduce the size of the code
// emitted by compiler_lock_object() the recursive case is perfect
// candidate.
--- 3641,3736 ----
}
// Store mark into displaced mark field in the on-stack basic-lock "box"
// Critically, this must happen before the CAS
// Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
! st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
! andcc(Rmark, 2, G0);
! brx(Assembler::notZero, false, Assembler::pn, IsInflated);
! delayed()->
// Try stack-lock acquisition.
// Beware: the 1st instruction is in a delay slot
! mov(Rbox, Rscratch);
! or3(Rmark, markOopDesc::unlocked_value, Rmark);
! assert(mark_addr.disp() == 0, "cas must take a zero displacement");
! casn(mark_addr.base(), Rmark, Rscratch);
! cmp(Rmark, Rscratch);
! brx(Assembler::equal, false, Assembler::pt, done);
delayed()->sub(Rscratch, SP, Rscratch);
// Stack-lock attempt failed - check for recursive stack-lock.
// See the comments below about how we might remove this case.
#ifdef _LP64
! sub(Rscratch, STACK_BIAS, Rscratch);
#endif
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
! andcc(Rscratch, 0xfffff003, Rscratch);
! br(Assembler::always, false, Assembler::pt, done);
! delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
! bind(IsInflated);
if (EmitSync & 64) {
// If m->owner != null goto IsLocked
// Pessimistic form: Test-and-CAS vs CAS
// The optimistic form avoids RTS->RTO cache line upgrades.
! ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
! andcc(Rscratch, Rscratch, G0);
! brx(Assembler::notZero, false, Assembler::pn, done);
! delayed()->nop();
// m->owner == null : it's unlocked.
}
// Try to CAS m->owner from null to Self
// Invariant: if we acquire the lock then _recursions should be 0.
! add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
! mov(G2_thread, Rscratch);
! casn(Rmark, G0, Rscratch);
! cmp(Rscratch, G0);
// Intentional fall-through into done
} else {
// Aggressively avoid the Store-before-CAS penalty
// Defer the store into box->dhw until after the CAS
Label IsInflated, Recursive ;
// Anticipate CAS -- Avoid RTS->RTO upgrade
! // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
! ld_ptr(mark_addr, Rmark); // fetch obj->mark
// Triage: biased, stack-locked, neutral, inflated
if (try_bias) {
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
// Invariant: if control reaches this point in the emitted stream
// then Rmark has not been modified.
}
! andcc(Rmark, 2, G0);
! brx(Assembler::notZero, false, Assembler::pn, IsInflated);
delayed()-> // Beware - dangling delay-slot
// Try stack-lock acquisition.
// Transiently install BUSY (0) encoding in the mark word.
// if the CAS of 0 into the mark was successful then we execute:
// ST box->dhw = mark -- save fetched mark in on-stack basiclock box
// ST obj->mark = box -- overwrite transient 0 value
// This presumes TSO, of course.
! mov(0, Rscratch);
! or3(Rmark, markOopDesc::unlocked_value, Rmark);
! assert(mark_addr.disp() == 0, "cas must take a zero displacement");
! casn(mark_addr.base(), Rmark, Rscratch);
! // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
! cmp(Rscratch, Rmark);
! brx(Assembler::notZero, false, Assembler::pn, Recursive);
! delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
if (counters != NULL) {
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
}
! ba(done);
! delayed()->st_ptr(Rbox, mark_addr);
! bind(Recursive);
// Stack-lock attempt failed - check for recursive stack-lock.
// Tests show that we can remove the recursive case with no impact
// on refworkload 0.83. If we need to reduce the size of the code
// emitted by compiler_lock_object() the recursive case is perfect
// candidate.
*** 3705,3757 ****
// the fast-path stack-lock code from the interpreter and always passed
// control to the "slow" operators in synchronizer.cpp.
// RScratch contains the fetched obj->mark value from the failed CASN.
#ifdef _LP64
! sub (Rscratch, STACK_BIAS, Rscratch);
#endif
sub(Rscratch, SP, Rscratch);
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
! andcc (Rscratch, 0xfffff003, Rscratch);
if (counters != NULL) {
// Accounting needs the Rscratch register
! st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
! br (Assembler::always, false, Assembler::pt, done) ;
! delayed()->nop() ;
} else {
! br (Assembler::always, false, Assembler::pt, done) ;
! delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
}
! bind (IsInflated) ;
if (EmitSync & 64) {
// If m->owner != null goto IsLocked
// Test-and-CAS vs CAS
// Pessimistic form avoids futile (doomed) CAS attempts
// The optimistic form avoids RTS->RTO cache line upgrades.
! ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
! andcc (Rscratch, Rscratch, G0) ;
! brx (Assembler::notZero, false, Assembler::pn, done) ;
! delayed()->nop() ;
// m->owner == null : it's unlocked.
}
// Try to CAS m->owner from null to Self
// Invariant: if we acquire the lock then _recursions should be 0.
! add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
! mov (G2_thread, Rscratch) ;
! casn (Rmark, G0, Rscratch) ;
! cmp (Rscratch, G0) ;
// ST box->displaced_header = NonZero.
// Any non-zero value suffices:
// unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
! st_ptr (Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
// Intentional fall-through into done
}
! bind (done) ;
}
void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
Register Rbox, Register Rscratch,
bool try_bias) {
--- 3743,3794 ----
// the fast-path stack-lock code from the interpreter and always passed
// control to the "slow" operators in synchronizer.cpp.
// RScratch contains the fetched obj->mark value from the failed CASN.
#ifdef _LP64
! sub(Rscratch, STACK_BIAS, Rscratch);
#endif
sub(Rscratch, SP, Rscratch);
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
! andcc(Rscratch, 0xfffff003, Rscratch);
if (counters != NULL) {
// Accounting needs the Rscratch register
! st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
! ba_short(done);
} else {
! ba(done);
! delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
}
! bind (IsInflated);
if (EmitSync & 64) {
// If m->owner != null goto IsLocked
// Test-and-CAS vs CAS
// Pessimistic form avoids futile (doomed) CAS attempts
// The optimistic form avoids RTS->RTO cache line upgrades.
! ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
! andcc(Rscratch, Rscratch, G0);
! brx(Assembler::notZero, false, Assembler::pn, done);
! delayed()->nop();
// m->owner == null : it's unlocked.
}
// Try to CAS m->owner from null to Self
// Invariant: if we acquire the lock then _recursions should be 0.
! add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
! mov(G2_thread, Rscratch);
! casn(Rmark, G0, Rscratch);
! cmp(Rscratch, G0);
// ST box->displaced_header = NonZero.
// Any non-zero value suffices:
// unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
! st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
// Intentional fall-through into done
}
! bind (done);
}
void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
Register Rbox, Register Rscratch,
bool try_bias) {
*** 3758,3768 ****
Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
Label done ;
if (EmitSync & 4) {
! cmp (SP, G0) ;
return ;
}
if (EmitSync & 8) {
if (try_bias) {
--- 3795,3805 ----
Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
Label done ;
if (EmitSync & 4) {
! cmp(SP, G0);
return ;
}
if (EmitSync & 8) {
if (try_bias) {
*** 3769,3788 ****
biased_locking_exit(mark_addr, Rscratch, done);
}
// Test first if it is a fast recursive unlock
ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
! br_null(Rmark, false, Assembler::pt, done);
// Check if it is still a light weight lock, this is is true if we see
// the stack address of the basicLock in the markOop of the object
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
casx_under_lock(mark_addr.base(), Rbox, Rmark,
(address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
! ba(done, false);
delayed()->cmp(Rbox, Rmark);
! bind (done) ;
return ;
}
// Beware ... If the aggregate size of the code emitted by CLO and CUO is
// is too large performance rolls abruptly off a cliff.
--- 3806,3825 ----
biased_locking_exit(mark_addr, Rscratch, done);
}
// Test first if it is a fast recursive unlock
ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
! br_null_short(Rmark, Assembler::pt, done);
// Check if it is still a light weight lock, this is is true if we see
// the stack address of the basicLock in the markOop of the object
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
casx_under_lock(mark_addr.base(), Rbox, Rmark,
(address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
! ba(done);
delayed()->cmp(Rbox, Rmark);
! bind(done);
return ;
}
// Beware ... If the aggregate size of the code emitted by CLO and CUO is
// is too large performance rolls abruptly off a cliff.
*** 3793,3810 ****
if (try_bias) {
// TODO: eliminate redundant LDs of obj->mark
biased_locking_exit(mark_addr, Rscratch, done);
}
! ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ;
! ld_ptr (Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
! andcc (Rscratch, Rscratch, G0);
! brx (Assembler::zero, false, Assembler::pn, done);
! delayed()-> nop() ; // consider: relocate fetch of mark, above, into this DS
! andcc (Rmark, 2, G0) ;
! brx (Assembler::zero, false, Assembler::pt, LStacked) ;
! delayed()-> nop() ;
// It's inflated
// Conceptually we need a #loadstore|#storestore "release" MEMBAR before
// the ST of 0 into _owner which releases the lock. This prevents loads
// and stores within the critical section from reordering (floating)
--- 3830,3847 ----
if (try_bias) {
// TODO: eliminate redundant LDs of obj->mark
biased_locking_exit(mark_addr, Rscratch, done);
}
! ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark);
! ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
! andcc(Rscratch, Rscratch, G0);
! brx(Assembler::zero, false, Assembler::pn, done);
! delayed()->nop(); // consider: relocate fetch of mark, above, into this DS
! andcc(Rmark, 2, G0);
! brx(Assembler::zero, false, Assembler::pt, LStacked);
! delayed()->nop();
// It's inflated
// Conceptually we need a #loadstore|#storestore "release" MEMBAR before
// the ST of 0 into _owner which releases the lock. This prevents loads
// and stores within the critical section from reordering (floating)
*** 3811,3861 ****
// past the store that releases the lock. But TSO is a strong memory model
// and that particular flavor of barrier is a noop, so we can safely elide it.
// Note that we use 1-0 locking by default for the inflated case. We
// close the resultant (and rare) race by having contented threads in
// monitorenter periodically poll _owner.
! ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
! ld_ptr (Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
! xor3 (Rscratch, G2_thread, Rscratch) ;
! orcc (Rbox, Rscratch, Rbox) ;
! brx (Assembler::notZero, false, Assembler::pn, done) ;
delayed()->
! ld_ptr (Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
! ld_ptr (Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
! orcc (Rbox, Rscratch, G0) ;
if (EmitSync & 65536) {
Label LSucc ;
! brx (Assembler::notZero, false, Assembler::pn, LSucc) ;
! delayed()->nop() ;
! ba (done, false) ;
! delayed()->
! st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
! bind (LSucc) ;
! st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
! if (os::is_MP()) { membar (StoreLoad) ; }
! ld_ptr (Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
! andcc (Rscratch, Rscratch, G0) ;
! brx (Assembler::notZero, false, Assembler::pt, done) ;
! delayed()-> andcc (G0, G0, G0) ;
! add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
! mov (G2_thread, Rscratch) ;
! casn (Rmark, G0, Rscratch) ;
// invert icc.zf and goto done
! br_notnull(Rscratch, false, Assembler::pt, done, false) ;
! delayed() -> cmp (G0, G0) ;
! ba (done, false);
! delayed() -> cmp (G0, 1) ;
} else {
! brx (Assembler::notZero, false, Assembler::pn, done) ;
! delayed()->nop() ;
! ba (done, false) ;
! delayed()->
! st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
}
! bind (LStacked) ;
// Consider: we could replace the expensive CAS in the exit
// path with a simple ST of the displaced mark value fetched from
// the on-stack basiclock box. That admits a race where a thread T2
// in the slow lock path -- inflating with monitor M -- could race a
// thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
--- 3848,3896 ----
// past the store that releases the lock. But TSO is a strong memory model
// and that particular flavor of barrier is a noop, so we can safely elide it.
// Note that we use 1-0 locking by default for the inflated case. We
// close the resultant (and rare) race by having contented threads in
// monitorenter periodically poll _owner.
! ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
! ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
! xor3(Rscratch, G2_thread, Rscratch);
! orcc(Rbox, Rscratch, Rbox);
! brx(Assembler::notZero, false, Assembler::pn, done);
delayed()->
! ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
! ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
! orcc(Rbox, Rscratch, G0);
if (EmitSync & 65536) {
Label LSucc ;
! brx(Assembler::notZero, false, Assembler::pn, LSucc);
! delayed()->nop();
! ba(done);
! delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
! bind(LSucc);
! st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
! if (os::is_MP()) { membar (StoreLoad); }
! ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
! andcc(Rscratch, Rscratch, G0);
! brx(Assembler::notZero, false, Assembler::pt, done);
! delayed()->andcc(G0, G0, G0);
! add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
! mov(G2_thread, Rscratch);
! casn(Rmark, G0, Rscratch);
// invert icc.zf and goto done
! br_notnull(Rscratch, false, Assembler::pt, done);
! delayed()->cmp(G0, G0);
! ba(done);
! delayed()->cmp(G0, 1);
} else {
! brx(Assembler::notZero, false, Assembler::pn, done);
! delayed()->nop();
! ba(done);
! delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
}
! bind (LStacked);
// Consider: we could replace the expensive CAS in the exit
// path with a simple ST of the displaced mark value fetched from
// the on-stack basiclock box. That admits a race where a thread T2
// in the slow lock path -- inflating with monitor M -- could race a
// thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
*** 3880,3894 ****
// lost-update "stomp" WAW race but detects and recovers as needed.
//
// A prototype implementation showed excellent results, although
// the scavenger and timeout code was rather involved.
! casn (mark_addr.base(), Rbox, Rscratch) ;
! cmp (Rbox, Rscratch);
// Intentional fall through into done ...
! bind (done) ;
}
void MacroAssembler::print_CPU_state() {
--- 3915,3929 ----
// lost-update "stomp" WAW race but detects and recovers as needed.
//
// A prototype implementation showed excellent results, although
// the scavenger and timeout code was rather involved.
! casn(mark_addr.base(), Rbox, Rscratch);
! cmp(Rbox, Rscratch);
// Intentional fall through into done ...
! bind(done);
}
void MacroAssembler::print_CPU_state() {
*** 3940,3964 ****
save_frame(0);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
or3(t1, t2, t3);
! cmp_and_br(t1, t2, Assembler::greaterEqual, false, Assembler::pn, next);
stop("assert(top >= start)");
should_not_reach_here();
bind(next);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
or3(t3, t2, t3);
! cmp_and_br(t1, t2, Assembler::lessEqual, false, Assembler::pn, next2);
stop("assert(top <= end)");
should_not_reach_here();
bind(next2);
and3(t3, MinObjAlignmentInBytesMask, t3);
! cmp_and_br(t3, 0, Assembler::lessEqual, false, Assembler::pn, ok);
stop("assert(aligned)");
should_not_reach_here();
bind(ok);
restore();
--- 3975,3999 ----
save_frame(0);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
or3(t1, t2, t3);
! cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next);
stop("assert(top >= start)");
should_not_reach_here();
bind(next);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
or3(t3, t2, t3);
! cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2);
stop("assert(top <= end)");
should_not_reach_here();
bind(next2);
and3(t3, MinObjAlignmentInBytesMask, t3);
! cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok);
stop("assert(aligned)");
should_not_reach_here();
bind(ok);
restore();
*** 3980,3990 ****
assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
// No allocation in the shared eden.
! ba(slow_case);
} else {
// get eden boundaries
// note: we need both top & top_addr!
const Register top_addr = t1;
const Register end = t2;
--- 4015,4025 ----
assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
// No allocation in the shared eden.
! ba_short(slow_case);
} else {
// get eden boundaries
// note: we need both top & top_addr!
const Register top_addr = t1;
const Register end = t2;
*** 4114,4124 ****
assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
Label do_refill, discard_tlab;
if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
// No allocation in the shared eden.
! ba(slow_case);
}
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
--- 4149,4159 ----
assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
Label do_refill, discard_tlab;
if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
// No allocation in the shared eden.
! ba_short(slow_case);
}
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
*** 4139,4149 ****
// increment number of slow_allocations
ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
add(t2, 1, t2);
stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
}
! ba(try_eden);
bind(discard_tlab);
if (TLABStats) {
// increment number of refills
ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
--- 4174,4184 ----
// increment number of slow_allocations
ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
add(t2, 1, t2);
stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
}
! ba_short(try_eden);
bind(discard_tlab);
if (TLABStats) {
// increment number of refills
ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
*** 4155,4165 ****
stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
}
// if tlab is currently allocated (top or end != null) then
// fill [top, end + alignment_reserve) with array object
! br_null(top, false, Assembler::pn, do_refill);
set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
// set klass to intArrayKlass
sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
--- 4190,4200 ----
stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
}
// if tlab is currently allocated (top or end != null) then
// fill [top, end + alignment_reserve) with array object
! br_null_short(top, Assembler::pn, do_refill);
set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
// set klass to intArrayKlass
sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
*** 4190,4200 ****
// check that tlab_size (t1) is still valid
{
Label ok;
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
sll_ptr(t2, LogHeapWordSize, t2);
! cmp_and_br(t1, t2, Assembler::equal, false, Assembler::pn, ok);
stop("assert(t1 == tlab_size)");
should_not_reach_here();
bind(ok);
}
--- 4225,4235 ----
// check that tlab_size (t1) is still valid
{
Label ok;
ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
sll_ptr(t2, LogHeapWordSize, t2);
! cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
stop("assert(t1 == tlab_size)");
should_not_reach_here();
bind(ok);
}
*** 4201,4211 ****
#endif // ASSERT
add(top, t1, top); // t1 is tlab_size
sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
verify_tlab();
! ba(retry);
}
void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
Register t1, Register t2) {
// Bump total bytes allocated by this thread
--- 4236,4246 ----
#endif // ASSERT
add(top, t1, top); // t1 is tlab_size
sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
verify_tlab();
! ba_short(retry);
}
void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
Register t1, Register t2) {
// Bump total bytes allocated by this thread
*** 4352,4362 ****
"check sizes in assembly below");
__ bind(restart);
__ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
! __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill, false);
// If the branch is taken, no harm in executing this in the delay slot.
__ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
__ sub(L0, oopSize, L0);
__ st_ptr(pre_val, L1, L0); // [_buf + index] := I0
--- 4387,4397 ----
"check sizes in assembly below");
__ bind(restart);
__ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
! __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
// If the branch is taken, no harm in executing this in the delay slot.
__ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
__ sub(L0, oopSize, L0);
__ st_ptr(pre_val, L1, L0); // [_buf + index] := I0
*** 4467,4476 ****
--- 4502,4512 ----
tmp);
}
// Check on whether to annul.
br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+ delayed()->nop();
// Do we need to load the previous value?
if (obj != noreg) {
// Load the previous value...
if (index == noreg) {
*** 4490,4499 ****
--- 4526,4536 ----
assert(pre_val != noreg, "must have a real register");
// Is the previous value null?
// Check on whether to annul.
br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
+ delayed()->nop();
// OK, it's not filtered, so we'll need to call enqueue. In the normal
// case, pre_val will be a scratch G-reg, but there are some cases in
// which it's an O-reg. In the first case, do a normal call. In the
// latter, do a save here and call the frameless version.
src/cpu/sparc/vm/assembler_sparc.cpp
Index
Unified diffs
Context diffs
Sdiffs
Wdiffs
Patch
New
Old
Previous File
Next File