3002 // TODO: eliminate redundant LDs of obj->mark 3003 biased_locking_exit(mark_addr, Rscratch, done); 3004 } 3005 3006 ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark); 3007 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); 3008 andcc(Rscratch, Rscratch, G0); 3009 brx(Assembler::zero, false, Assembler::pn, done); 3010 delayed()->nop(); // consider: relocate fetch of mark, above, into this DS 3011 andcc(Rmark, 2, G0); 3012 brx(Assembler::zero, false, Assembler::pt, LStacked); 3013 delayed()->nop(); 3014 3015 // It's inflated 3016 // Conceptually we need a #loadstore|#storestore "release" MEMBAR before 3017 // the ST of 0 into _owner which releases the lock. This prevents loads 3018 // and stores within the critical section from reordering (floating) 3019 // past the store that releases the lock. But TSO is a strong memory model 3020 // and that particular flavor of barrier is a noop, so we can safely elide it. 3021 // Note that we use 1-0 locking by default for the inflated case. We 3022 // close the resultant (and rare) race by having contented threads in 3023 // monitorenter periodically poll _owner. 3024 ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch); 3025 ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), Rbox); 3026 xor3(Rscratch, G2_thread, Rscratch); 3027 orcc(Rbox, Rscratch, Rbox); 3028 brx(Assembler::notZero, false, Assembler::pn, done); 3029 delayed()-> 3030 ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList), Rscratch); 3031 ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq), Rbox); 3032 orcc(Rbox, Rscratch, G0); 3033 if (EmitSync & 65536) { 3034 Label LSucc ; 3035 brx(Assembler::notZero, false, Assembler::pn, LSucc); 3036 delayed()->nop(); 3037 ba(done); 3038 delayed()->st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)); 3039 3040 bind(LSucc); 3041 st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)); 3042 if (os::is_MP()) { membar (StoreLoad); } 3043 ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ), Rscratch); 3044 andcc(Rscratch, Rscratch, G0); 3045 brx(Assembler::notZero, false, Assembler::pt, done); 3046 delayed()->andcc(G0, G0, G0); 3047 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); 3048 mov(G2_thread, Rscratch); 3049 cas_ptr(Rmark, G0, Rscratch); 3050 // invert icc.zf and goto done 3051 br_notnull(Rscratch, false, Assembler::pt, done); 3052 delayed()->cmp(G0, G0); 3053 ba(done); 3054 delayed()->cmp(G0, 1); 3055 } else { 3056 brx(Assembler::notZero, false, Assembler::pn, done); 3057 delayed()->nop(); 3058 ba(done); 3059 delayed()->st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)); 3060 } 3061 3062 bind (LStacked); 3063 // Consider: we could replace the expensive CAS in the exit 3064 // path with a simple ST of the displaced mark value fetched from 3065 // the on-stack basiclock box. That admits a race where a thread T2 3066 // in the slow lock path -- inflating with monitor M -- could race a 3067 // thread T1 in the fast unlock path, resulting in a missed wakeup for T2. 3068 // More precisely T1 in the stack-lock unlock path could "stomp" the 3069 // inflated mark value M installed by T2, resulting in an orphan 3070 // object monitor M and T2 becoming stranded. We can remedy that situation 3071 // by having T2 periodically poll the object's mark word using timed wait 3072 // operations. If T2 discovers that a stomp has occurred it vacates 3073 // the monitor M and wakes any other threads stranded on the now-orphan M. 3074 // In addition the monitor scavenger, which performs deflation, 3075 // would also need to check for orpan monitors and stranded threads. 3076 // 3077 // Finally, inflation is also used when T2 needs to assign a hashCode 3078 // to O and O is stack-locked by T1. The "stomp" race could cause 3079 // an assigned hashCode value to be lost. We can avoid that condition 3080 // and provide the necessary hashCode stability invariants by ensuring | 3002 // TODO: eliminate redundant LDs of obj->mark 3003 biased_locking_exit(mark_addr, Rscratch, done); 3004 } 3005 3006 ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark); 3007 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); 3008 andcc(Rscratch, Rscratch, G0); 3009 brx(Assembler::zero, false, Assembler::pn, done); 3010 delayed()->nop(); // consider: relocate fetch of mark, above, into this DS 3011 andcc(Rmark, 2, G0); 3012 brx(Assembler::zero, false, Assembler::pt, LStacked); 3013 delayed()->nop(); 3014 3015 // It's inflated 3016 // Conceptually we need a #loadstore|#storestore "release" MEMBAR before 3017 // the ST of 0 into _owner which releases the lock. This prevents loads 3018 // and stores within the critical section from reordering (floating) 3019 // past the store that releases the lock. But TSO is a strong memory model 3020 // and that particular flavor of barrier is a noop, so we can safely elide it. 3021 // Note that we use 1-0 locking by default for the inflated case. We 3022 // close the resultant (and rare) race by having contended threads in 3023 // monitorenter periodically poll _owner. 3024 3025 if (EmitSync & 1024) { 3026 // Emit code to check that _owner == Self 3027 // We could fold the _owner test into subsequent code more efficiently 3028 // than using a stand-alone check, but since _owner checking is off by 3029 // default we don't bother. 3030 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), Rscratch); 3031 orcc(Rscratch, G0, G0); 3032 brx(Assembler::notZero, false, Assembler::pn, done); 3033 delayed()->nop(); 3034 } 3035 3036 if (EmitSync & 512) { 3037 // classic lock release code absent 1-0 locking 3038 // m->Owner = null; 3039 // membar #storeload 3040 // if (m->cxq|m->EntryList) == null goto Success 3041 // if (m->succ != null) goto Success 3042 // if CAS (&m->Owner,0,Self) != 0 goto Success 3043 // goto SlowPath 3044 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox); 3045 orcc(Rbox, G0, G0); 3046 brx(Assembler::notZero, false, Assembler::pn, done); 3047 delayed()->nop(); 3048 st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 3049 if (os::is_MP()) { membar(StoreLoad); } 3050 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch); 3051 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox); 3052 orcc(Rbox, Rscratch, G0); 3053 brx(Assembler::zero, false, Assembler::pt, done); 3054 delayed()-> 3055 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); 3056 andcc(Rscratch, Rscratch, G0); 3057 brx(Assembler::notZero, false, Assembler::pt, done); 3058 delayed()->andcc(G0, G0, G0); 3059 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); 3060 mov(G2_thread, Rscratch); 3061 cas_ptr(Rmark, G0, Rscratch); 3062 cmp(Rscratch, G0); 3063 // invert icc.zf and goto done 3064 brx(Assembler::notZero, false, Assembler::pt, done); 3065 delayed()->cmp(G0, G0); 3066 br(Assembler::always, false, Assembler::pt, done); 3067 delayed()->cmp(G0, 1); 3068 } else { 3069 // 1-0 form : avoids CAS and MEMBAR in the common case 3070 // Do not bother to ratify that m->Owner == Self. 3071 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox); 3072 orcc(Rbox, G0, G0); 3073 brx(Assembler::notZero, false, Assembler::pn, done); 3074 delayed()-> 3075 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch); 3076 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox); 3077 orcc(Rbox, Rscratch, G0); 3078 if (EmitSync & 16384) { 3079 // As an optional optimization, if (EntryList|cxq) != null and _succ is null then 3080 // we should transfer control directly to the slow-path. 3081 // This test makes the reacquire operation below very infrequent. 3082 // The logic is equivalent to : 3083 // if (cxq|EntryList) == null : Owner=null; goto Success 3084 // if succ == null : goto SlowPath 3085 // Owner=null; membar #storeload 3086 // if succ != null : goto Success 3087 // if CAS(&Owner,null,Self) != null goto Success 3088 // goto SlowPath 3089 brx(Assembler::zero, true, Assembler::pt, done); 3090 delayed()-> 3091 st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 3092 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); 3093 andcc(Rscratch, Rscratch, G0) ; 3094 brx(Assembler::zero, false, Assembler::pt, done); 3095 delayed()->orcc(G0, 1, G0); 3096 st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 3097 } else { 3098 brx(Assembler::zero, false, Assembler::pt, done); 3099 delayed()-> 3100 st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 3101 } 3102 if (os::is_MP()) { membar(StoreLoad); } 3103 // Check that _succ is (or remains) non-zero 3104 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); 3105 andcc(Rscratch, Rscratch, G0); 3106 brx(Assembler::notZero, false, Assembler::pt, done); 3107 delayed()->andcc(G0, G0, G0); 3108 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); 3109 mov(G2_thread, Rscratch); 3110 cas_ptr(Rmark, G0, Rscratch); 3111 cmp(Rscratch, G0); 3112 // invert icc.zf and goto done 3113 // A slightly better v8+/v9 idiom would be the following: 3114 // movrnz Rscratch,1,Rscratch 3115 // ba done 3116 // xorcc Rscratch,1,G0 3117 // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register 3118 brx(Assembler::notZero, false, Assembler::pt, done); 3119 delayed()->cmp(G0, G0); 3120 br(Assembler::always, false, Assembler::pt, done); 3121 delayed()->cmp(G0, 1); 3122 } 3123 3124 bind (LStacked); 3125 // Consider: we could replace the expensive CAS in the exit 3126 // path with a simple ST of the displaced mark value fetched from 3127 // the on-stack basiclock box. That admits a race where a thread T2 3128 // in the slow lock path -- inflating with monitor M -- could race a 3129 // thread T1 in the fast unlock path, resulting in a missed wakeup for T2. 3130 // More precisely T1 in the stack-lock unlock path could "stomp" the 3131 // inflated mark value M installed by T2, resulting in an orphan 3132 // object monitor M and T2 becoming stranded. We can remedy that situation 3133 // by having T2 periodically poll the object's mark word using timed wait 3134 // operations. If T2 discovers that a stomp has occurred it vacates 3135 // the monitor M and wakes any other threads stranded on the now-orphan M. 3136 // In addition the monitor scavenger, which performs deflation, 3137 // would also need to check for orpan monitors and stranded threads. 3138 // 3139 // Finally, inflation is also used when T2 needs to assign a hashCode 3140 // to O and O is stack-locked by T1. The "stomp" race could cause 3141 // an assigned hashCode value to be lost. We can avoid that condition 3142 // and provide the necessary hashCode stability invariants by ensuring |