src/cpu/x86/vm/x86_32.ad
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8033805 Cdiff src/cpu/x86/vm/x86_32.ad

src/cpu/x86/vm/x86_32.ad

Print this page

        

*** 2916,3461 **** emit_opcode(cbuf,0x83); // SBB hi,0 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); emit_d8 (cbuf,0 ); %} - - // Because the transitions from emitted code to the runtime - // monitorenter/exit helper stubs are so slow it's critical that - // we inline both the stack-locking fast-path and the inflated fast path. - // - // See also: cmpFastLock and cmpFastUnlock. - // - // What follows is a specialized inline transliteration of the code - // in slow_enter() and slow_exit(). If we're concerned about I$ bloat - // another option would be to emit TrySlowEnter and TrySlowExit methods - // at startup-time. These methods would accept arguments as - // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure - // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply - // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. - // In practice, however, the # of lock sites is bounded and is usually small. - // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer - // if the processor uses simple bimodal branch predictors keyed by EIP - // Since the helper routines would be called from multiple synchronization - // sites. - // - // An even better approach would be write "MonitorEnter()" and "MonitorExit()" - // in java - using j.u.c and unsafe - and just bind the lock and unlock sites - // to those specialized methods. That'd give us a mostly platform-independent - // implementation that the JITs could optimize and inline at their pleasure. - // Done correctly, the only time we'd need to cross to native could would be - // to park() or unpark() threads. We'd also need a few more unsafe operators - // to (a) prevent compiler-JIT reordering of non-volatile accesses, and - // (b) explicit barriers or fence operations. - // - // TODO: - // - // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). - // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. - // Given TLAB allocation, Self is usually manifested in a register, so passing it into - // the lock operators would typically be faster than reifying Self. - // - // * Ideally I'd define the primitives as: - // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. - // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED - // Unfortunately ADLC bugs prevent us from expressing the ideal form. - // Instead, we're stuck with a rather awkward and brittle register assignments below. - // Furthermore the register assignments are overconstrained, possibly resulting in - // sub-optimal code near the synchronization site. - // - // * Eliminate the sp-proximity tests and just use "== Self" tests instead. - // Alternately, use a better sp-proximity test. - // - // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. - // Either one is sufficient to uniquely identify a thread. - // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. - // - // * Intrinsify notify() and notifyAll() for the common cases where the - // object is locked by the calling thread but the waitlist is empty. - // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). - // - // * use jccb and jmpb instead of jcc and jmp to improve code density. - // But beware of excessive branch density on AMD Opterons. - // - // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success - // or failure of the fast-path. If the fast-path fails then we pass - // control to the slow-path, typically in C. In Fast_Lock and - // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 - // will emit a conditional branch immediately after the node. - // So we have branches to branches and lots of ICC.ZF games. - // Instead, it might be better to have C2 pass a "FailureLabel" - // into Fast_Lock and Fast_Unlock. In the case of success, control - // will drop through the node. ICC.ZF is undefined at exit. - // In the case of failure, the node will branch directly to the - // FailureLabel - - - // obj: object to lock - // box: on-stack box address (displaced header location) - KILLED - // rax,: tmp -- KILLED - // scr: tmp -- KILLED - enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{ - - Register objReg = as_Register($obj$$reg); - Register boxReg = as_Register($box$$reg); - Register tmpReg = as_Register($tmp$$reg); - Register scrReg = as_Register($scr$$reg); - - // Ensure the register assignents are disjoint - guarantee (objReg != boxReg, "") ; - guarantee (objReg != tmpReg, "") ; - guarantee (objReg != scrReg, "") ; - guarantee (boxReg != tmpReg, "") ; - guarantee (boxReg != scrReg, "") ; - guarantee (tmpReg == as_Register(EAX_enc), "") ; - - MacroAssembler masm(&cbuf); - - if (_counters != NULL) { - masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr())); - } - if (EmitSync & 1) { - // set box->dhw = unused_mark (3) - // Force all sync thru slow-path: slow_enter() and slow_exit() - masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ; - masm.cmpptr (rsp, (int32_t)0) ; - } else - if (EmitSync & 2) { - Label DONE_LABEL ; - if (UseBiasedLocking) { - // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. - masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); - } - - masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword - masm.orptr (tmpReg, 0x1); - masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - masm.jcc(Assembler::equal, DONE_LABEL); - // Recursive locking - masm.subptr(tmpReg, rsp); - masm.andptr(tmpReg, (int32_t) 0xFFFFF003 ); - masm.movptr(Address(boxReg, 0), tmpReg); - masm.bind(DONE_LABEL) ; - } else { - // Possible cases that we'll encounter in fast_lock - // ------------------------------------------------ - // * Inflated - // -- unlocked - // -- Locked - // = by self - // = by other - // * biased - // -- by Self - // -- by other - // * neutral - // * stack-locked - // -- by self - // = sp-proximity test hits - // = sp-proximity test generates false-negative - // -- by other - // - - Label IsInflated, DONE_LABEL, PopDone ; - - // TODO: optimize away redundant LDs of obj->mark and improve the markword triage - // order to reduce the number of conditional branches in the most common cases. - // Beware -- there's a subtle invariant that fetch of the markword - // at [FETCH], below, will never observe a biased encoding (*101b). - // If this invariant is not held we risk exclusion (safety) failure. - if (UseBiasedLocking && !UseOptoBiasInlining) { - masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); - } - - masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH] - masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral) - masm.jccb (Assembler::notZero, IsInflated) ; - - // Attempt stack-locking ... - masm.orptr (tmpReg, 0x1); - masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - if (_counters != NULL) { - masm.cond_inc32(Assembler::equal, - ExternalAddress((address)_counters->fast_path_entry_count_addr())); - } - masm.jccb (Assembler::equal, DONE_LABEL); - - // Recursive locking - masm.subptr(tmpReg, rsp); - masm.andptr(tmpReg, 0xFFFFF003 ); - masm.movptr(Address(boxReg, 0), tmpReg); - if (_counters != NULL) { - masm.cond_inc32(Assembler::equal, - ExternalAddress((address)_counters->fast_path_entry_count_addr())); - } - masm.jmp (DONE_LABEL) ; - - masm.bind (IsInflated) ; - - // The object is inflated. - // - // TODO-FIXME: eliminate the ugly use of manifest constants: - // Use markOopDesc::monitor_value instead of "2". - // use markOop::unused_mark() instead of "3". - // The tmpReg value is an objectMonitor reference ORed with - // markOopDesc::monitor_value (2). We can either convert tmpReg to an - // objectmonitor pointer by masking off the "2" bit or we can just - // use tmpReg as an objectmonitor pointer but bias the objectmonitor - // field offsets with "-2" to compensate for and annul the low-order tag bit. - // - // I use the latter as it avoids AGI stalls. - // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]" - // instead of "mov r, [tmpReg+OFFSETOF(Owner)]". - // - #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2) - - // boxReg refers to the on-stack BasicLock in the current frame. - // We'd like to write: - // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices. - // This is convenient but results a ST-before-CAS penalty. The following CAS suffers - // additional latency as we have another ST in the store buffer that must drain. - - if (EmitSync & 8192) { - masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty - masm.get_thread (scrReg) ; - masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] - masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - } else - if ((EmitSync & 128) == 0) { // avoid ST-before-CAS - masm.movptr(scrReg, boxReg) ; - masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] - - // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes - if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { - // prefetchw [eax + Offset(_owner)-2] - masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); - } - - if ((EmitSync & 64) == 0) { - // Optimistic form: consider XORL tmpReg,tmpReg - masm.movptr(tmpReg, NULL_WORD) ; - } else { - // Can suffer RTS->RTO upgrades on shared or cold $ lines - // Test-And-CAS instead of CAS - masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner - masm.testptr(tmpReg, tmpReg) ; // Locked ? - masm.jccb (Assembler::notZero, DONE_LABEL) ; - } - - // Appears unlocked - try to swing _owner from null to non-null. - // Ideally, I'd manifest "Self" with get_thread and then attempt - // to CAS the register containing Self into m->Owner. - // But we don't have enough registers, so instead we can either try to CAS - // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds - // we later store "Self" into m->Owner. Transiently storing a stack address - // (rsp or the address of the box) into m->owner is harmless. - // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3 - masm.jccb (Assembler::notZero, DONE_LABEL) ; - masm.get_thread (scrReg) ; // beware: clobbers ICCs - masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; - masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success - - // If the CAS fails we can either retry or pass control to the slow-path. - // We use the latter tactic. - // Pass the CAS result in the icc.ZFlag into DONE_LABEL - // If the CAS was successful ... - // Self has acquired the lock - // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. - // Intentional fall-through into DONE_LABEL ... - } else { - masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty - masm.movptr(boxReg, tmpReg) ; - - // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes - if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { - // prefetchw [eax + Offset(_owner)-2] - masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); - } - - if ((EmitSync & 64) == 0) { - // Optimistic form - masm.xorptr (tmpReg, tmpReg) ; - } else { - // Can suffer RTS->RTO upgrades on shared or cold $ lines - masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner - masm.testptr(tmpReg, tmpReg) ; // Locked ? - masm.jccb (Assembler::notZero, DONE_LABEL) ; - } - - // Appears unlocked - try to swing _owner from null to non-null. - // Use either "Self" (in scr) or rsp as thread identity in _owner. - // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. - masm.get_thread (scrReg) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - - // If the CAS fails we can either retry or pass control to the slow-path. - // We use the latter tactic. - // Pass the CAS result in the icc.ZFlag into DONE_LABEL - // If the CAS was successful ... - // Self has acquired the lock - // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. - // Intentional fall-through into DONE_LABEL ... - } - - // DONE_LABEL is a hot target - we'd really like to place it at the - // start of cache line by padding with NOPs. - // See the AMD and Intel software optimization manuals for the - // most efficient "long" NOP encodings. - // Unfortunately none of our alignment mechanisms suffice. - masm.bind(DONE_LABEL); - - // Avoid branch-to-branch on AMD processors - // This appears to be superstition. - if (EmitSync & 32) masm.nop() ; - - - // At DONE_LABEL the icc ZFlag is set as follows ... - // Fast_Unlock uses the same protocol. - // ZFlag == 1 -> Success - // ZFlag == 0 -> Failure - force control through the slow-path - } - %} - - // obj: object to unlock - // box: box address (displaced header location), killed. Must be EAX. - // rbx,: killed tmp; cannot be obj nor box. - // - // Some commentary on balanced locking: - // - // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. - // Methods that don't have provably balanced locking are forced to run in the - // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. - // The interpreter provides two properties: - // I1: At return-time the interpreter automatically and quietly unlocks any - // objects acquired the current activation (frame). Recall that the - // interpreter maintains an on-stack list of locks currently held by - // a frame. - // I2: If a method attempts to unlock an object that is not held by the - // the frame the interpreter throws IMSX. - // - // Lets say A(), which has provably balanced locking, acquires O and then calls B(). - // B() doesn't have provably balanced locking so it runs in the interpreter. - // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O - // is still locked by A(). - // - // The only other source of unbalanced locking would be JNI. The "Java Native Interface: - // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter - // should not be unlocked by "normal" java-level locking and vice-versa. The specification - // doesn't specify what will occur if a program engages in such mixed-mode locking, however. - - enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{ - - Register objReg = as_Register($obj$$reg); - Register boxReg = as_Register($box$$reg); - Register tmpReg = as_Register($tmp$$reg); - - guarantee (objReg != boxReg, "") ; - guarantee (objReg != tmpReg, "") ; - guarantee (boxReg != tmpReg, "") ; - guarantee (boxReg == as_Register(EAX_enc), "") ; - MacroAssembler masm(&cbuf); - - if (EmitSync & 4) { - // Disable - inhibit all inlining. Force control through the slow-path - masm.cmpptr (rsp, 0) ; - } else - if (EmitSync & 8) { - Label DONE_LABEL ; - if (UseBiasedLocking) { - masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - // classic stack-locking code ... - masm.movptr(tmpReg, Address(boxReg, 0)) ; - masm.testptr(tmpReg, tmpReg) ; - masm.jcc (Assembler::zero, DONE_LABEL) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box - masm.bind(DONE_LABEL); - } else { - Label DONE_LABEL, Stacked, CheckSucc, Inflated ; - - // Critically, the biased locking test must have precedence over - // and appear before the (box->dhw == 0) recursive stack-lock test. - if (UseBiasedLocking && !UseOptoBiasInlining) { - masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - - masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header - masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword - masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock - - masm.testptr(tmpReg, 0x02) ; // Inflated? - masm.jccb (Assembler::zero, Stacked) ; - - masm.bind (Inflated) ; - // It's inflated. - // Despite our balanced locking property we still check that m->_owner == Self - // as java routines or native JNI code called by this thread might - // have released the lock. - // Refer to the comments in synchronizer.cpp for how we might encode extra - // state in _succ so we can avoid fetching EntryList|cxq. - // - // I'd like to add more cases in fast_lock() and fast_unlock() -- - // such as recursive enter and exit -- but we have to be wary of - // I$ bloat, T$ effects and BP$ effects. - // - // If there's no contention try a 1-0 exit. That is, exit without - // a costly MEMBAR or CAS. See synchronizer.cpp for details on how - // we detect and recover from the race that the 1-0 exit admits. - // - // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier - // before it STs null into _owner, releasing the lock. Updates - // to data protected by the critical section must be visible before - // we drop the lock (and thus before any other thread could acquire - // the lock and observe the fields protected by the lock). - // IA32's memory-model is SPO, so STs are ordered with respect to - // each other and there's no need for an explicit barrier (fence). - // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. - - masm.get_thread (boxReg) ; - if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { - // prefetchw [ebx + Offset(_owner)-2] - masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2)); - } - - // Note that we could employ various encoding schemes to reduce - // the number of loads below (currently 4) to just 2 or 3. - // Refer to the comments in synchronizer.cpp. - // In practice the chain of fetches doesn't seem to impact performance, however. - if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { - // Attempt to reduce branch density - AMD's branch predictor. - masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; - masm.jccb (Assembler::notZero, DONE_LABEL) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; - masm.jmpb (DONE_LABEL) ; - } else { - masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; - masm.jccb (Assembler::notZero, DONE_LABEL) ; - masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; - masm.jccb (Assembler::notZero, CheckSucc) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; - masm.jmpb (DONE_LABEL) ; - } - - // The Following code fragment (EmitSync & 65536) improves the performance of - // contended applications and contended synchronization microbenchmarks. - // Unfortunately the emission of the code - even though not executed - causes regressions - // in scimark and jetstream, evidently because of $ effects. Replacing the code - // with an equal number of never-executed NOPs results in the same regression. - // We leave it off by default. - - if ((EmitSync & 65536) != 0) { - Label LSuccess, LGoSlowPath ; - - masm.bind (CheckSucc) ; - - // Optional pre-test ... it's safe to elide this - if ((EmitSync & 16) == 0) { - masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; - masm.jccb (Assembler::zero, LGoSlowPath) ; - } - - // We have a classic Dekker-style idiom: - // ST m->_owner = 0 ; MEMBAR; LD m->_succ - // There are a number of ways to implement the barrier: - // (1) lock:andl &m->_owner, 0 - // is fast, but mask doesn't currently support the "ANDL M,IMM32" form. - // LOCK: ANDL [ebx+Offset(_Owner)-2], 0 - // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 - // (2) If supported, an explicit MFENCE is appealing. - // In older IA32 processors MFENCE is slower than lock:add or xchg - // particularly if the write-buffer is full as might be the case if - // if stores closely precede the fence or fence-equivalent instruction. - // In more modern implementations MFENCE appears faster, however. - // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack - // The $lines underlying the top-of-stack should be in M-state. - // The locked add instruction is serializing, of course. - // (4) Use xchg, which is serializing - // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works - // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. - // The integer condition codes will tell us if succ was 0. - // Since _succ and _owner should reside in the same $line and - // we just stored into _owner, it's likely that the $line - // remains in M-state for the lock:orl. - // - // We currently use (3), although it's likely that switching to (2) - // is correct for the future. - - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; - if (os::is_MP()) { - if (VM_Version::supports_sse2() && 1 == FenceInstruction) { - masm.mfence(); - } else { - masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; - } - } - // Ratify _succ remains non-null - masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; - masm.jccb (Assembler::notZero, LSuccess) ; - - masm.xorptr(boxReg, boxReg) ; // box is really EAX - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); - masm.jccb (Assembler::notEqual, LSuccess) ; - // Since we're low on registers we installed rsp as a placeholding in _owner. - // Now install Self over rsp. This is safe as we're transitioning from - // non-null to non=null - masm.get_thread (boxReg) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ; - // Intentional fall-through into LGoSlowPath ... - - masm.bind (LGoSlowPath) ; - masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure - masm.jmpb (DONE_LABEL) ; - - masm.bind (LSuccess) ; - masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success - masm.jmpb (DONE_LABEL) ; - } - - masm.bind (Stacked) ; - // It's not inflated and it's not recursively stack-locked and it's not biased. - // It must be stack-locked. - // Try to reset the header to displaced header. - // The "box" value on the stack is stable, so we can reload - // and be assured we observe the same value as above. - masm.movptr(tmpReg, Address(boxReg, 0)) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box - // Intention fall-thru into DONE_LABEL - - - // DONE_LABEL is a hot target - we'd really like to place it at the - // start of cache line by padding with NOPs. - // See the AMD and Intel software optimization manuals for the - // most efficient "long" NOP encodings. - // Unfortunately none of our alignment mechanisms suffice. - if ((EmitSync & 65536) == 0) { - masm.bind (CheckSucc) ; - } - masm.bind(DONE_LABEL); - - // Avoid branch to branch on AMD processors - if (EmitSync & 32768) { masm.nop() ; } - } - %} - - enc_class enc_pop_rdx() %{ emit_opcode(cbuf,0x5A); %} enc_class enc_rethrow() %{ --- 2916,2925 ----
*** 13155,13181 **** ins_pipe( pipe_jmp ); %} // inlined locking and unlocking ! ! instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ ! match( Set cr (FastLock object box) ); ! effect( TEMP tmp, TEMP scr, USE_KILL box ); ins_cost(300); format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} ! ins_encode( Fast_Lock(object,box,tmp,scr) ); ! ins_pipe( pipe_slow ); %} ! instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ ! match( Set cr (FastUnlock object box) ); ! effect( TEMP tmp, USE_KILL box ); ins_cost(300); format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} ! ins_encode( Fast_Unlock(object,box,tmp) ); ! ins_pipe( pipe_slow ); %} // ============================================================================ --- 12619,12648 ---- ins_pipe( pipe_jmp ); %} // inlined locking and unlocking ! instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ ! match(Set cr (FastLock object box)); ! effect(TEMP tmp, TEMP scr, USE_KILL box); ins_cost(300); format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} ! ins_encode %{ ! __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters); ! %} ! ins_pipe(pipe_slow); %} ! instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ ! match(Set cr (FastUnlock object box)); ! effect(TEMP tmp, USE_KILL box); ins_cost(300); format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} ! ins_encode %{ ! __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); ! %} ! ins_pipe(pipe_slow); %} // ============================================================================
src/cpu/x86/vm/x86_32.ad
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File