src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8031320_8u Sdiff src/cpu/x86/vm

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page
rev 5968 : 8031320: Use Intel RTM instructions for locks
Summary: Use RTM for inflated locks and stack locks.
Reviewed-by: iveresov, twisti, roland, dcubed


 284   if (sign_extension) sarl(hi);
 285   else                shrl(hi);
 286 }
 287 
 288 void MacroAssembler::movoop(Register dst, jobject obj) {
 289   mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
 290 }
 291 
 292 void MacroAssembler::movoop(Address dst, jobject obj) {
 293   mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
 294 }
 295 
 296 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
 297   mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 298 }
 299 
 300 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
 301   mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 302 }
 303 
 304 void MacroAssembler::movptr(Register dst, AddressLiteral src) {


 305   if (src.is_lval()) {
 306     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
 307   } else {
 308     movl(dst, as_Address(src));
 309   }
 310 }
 311 
 312 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
 313   movl(as_Address(dst), src);
 314 }
 315 
 316 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
 317   movl(dst, as_Address(src));
 318 }
 319 
 320 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
 321 void MacroAssembler::movptr(Address dst, intptr_t src) {
 322   movl(dst, src);
 323 }
 324 


 596 
 597   return idivq_offset;
 598 }
 599 
 600 void MacroAssembler::decrementq(Register reg, int value) {
 601   if (value == min_jint) { subq(reg, value); return; }
 602   if (value <  0) { incrementq(reg, -value); return; }
 603   if (value == 0) {                        ; return; }
 604   if (value == 1 && UseIncDec) { decq(reg) ; return; }
 605   /* else */      { subq(reg, value)       ; return; }
 606 }
 607 
 608 void MacroAssembler::decrementq(Address dst, int value) {
 609   if (value == min_jint) { subq(dst, value); return; }
 610   if (value <  0) { incrementq(dst, -value); return; }
 611   if (value == 0) {                        ; return; }
 612   if (value == 1 && UseIncDec) { decq(dst) ; return; }
 613   /* else */      { subq(dst, value)       ; return; }
 614 }
 615 









 616 void MacroAssembler::incrementq(Register reg, int value) {
 617   if (value == min_jint) { addq(reg, value); return; }
 618   if (value <  0) { decrementq(reg, -value); return; }
 619   if (value == 0) {                        ; return; }
 620   if (value == 1 && UseIncDec) { incq(reg) ; return; }
 621   /* else */      { addq(reg, value)       ; return; }
 622 }
 623 
 624 void MacroAssembler::incrementq(Address dst, int value) {
 625   if (value == min_jint) { addq(dst, value); return; }
 626   if (value <  0) { decrementq(dst, -value); return; }
 627   if (value == 0) {                        ; return; }
 628   if (value == 1 && UseIncDec) { incq(dst) ; return; }
 629   /* else */      { addq(dst, value)       ; return; }
 630 }
 631 
 632 // 32bit can do a case table jump in one instruction but we no longer allow the base
 633 // to be installed in the Address class
 634 void MacroAssembler::jump(ArrayAddress entry) {
 635   lea(rscratch1, entry.base());


 664 }
 665 
 666 void MacroAssembler::movoop(Register dst, jobject obj) {
 667   mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
 668 }
 669 
 670 void MacroAssembler::movoop(Address dst, jobject obj) {
 671   mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
 672   movq(dst, rscratch1);
 673 }
 674 
 675 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
 676   mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
 677 }
 678 
 679 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
 680   mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
 681   movq(dst, rscratch1);
 682 }
 683 
 684 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
 685   if (src.is_lval()) {
 686     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
 687   } else {
 688     if (reachable(src)) {
 689       movq(dst, as_Address(src));
 690     } else {
 691       lea(rscratch1, src);
 692       movq(dst, Address(rscratch1,0));
 693     }
 694   }
 695 }
 696 
 697 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
 698   movq(as_Address(dst), src);
 699 }
 700 
 701 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
 702   movq(dst, as_Address(src));
 703 }
 704 
 705 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
 706 void MacroAssembler::movptr(Address dst, intptr_t src) {
 707   mov64(rscratch1, src);
 708   movq(dst, rscratch1);
 709 }
 710 
 711 // These are mostly for initializing NULL
 712 void MacroAssembler::movptr(Address dst, int32_t src) {


 971     lea(rscratch1, src);
 972     Assembler::andpd(dst, Address(rscratch1, 0));
 973   }
 974 }
 975 
 976 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
 977   // Used in sign-masking with aligned address.
 978   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
 979   if (reachable(src)) {
 980     Assembler::andps(dst, as_Address(src));
 981   } else {
 982     lea(rscratch1, src);
 983     Assembler::andps(dst, Address(rscratch1, 0));
 984   }
 985 }
 986 
 987 void MacroAssembler::andptr(Register dst, int32_t imm32) {
 988   LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
 989 }
 990 
 991 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
 992   pushf();
 993   if (reachable(counter_addr)) {
 994     if (os::is_MP())
 995       lock();
 996     incrementl(as_Address(counter_addr));





 997   } else {
 998     lea(rscratch1, counter_addr);






 999     if (os::is_MP())
1000       lock();
1001     incrementl(Address(rscratch1, 0));








1002   }
1003   popf();
1004 }

1005 
1006 // Writes to stack successive pages until offset reached to check for
1007 // stack overflow + shadow pages.  This clobbers tmp.
1008 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1009   movptr(tmp, rsp);
1010   // Bang stack for total size given plus shadow page size.
1011   // Bang one page at a time because large size can bang beyond yellow and
1012   // red zones.
1013   Label loop;
1014   bind(loop);
1015   movl(Address(tmp, (-os::vm_page_size())), size );
1016   subptr(tmp, os::vm_page_size());
1017   subl(size, os::vm_page_size());
1018   jcc(Assembler::greater, loop);
1019 
1020   // Bang down shadow pages too.
1021   // At this point, (tmp-0) is the last address touched, so don't
1022   // touch it again.  (It was touched as (tmp-pagesize) but then tmp
1023   // was post-decremented.)  Skip this address by starting at i=1, and
1024   // touch a few more pages below.  N.B.  It is important to touch all


1257 
1258   return null_check_offset;
1259 }
1260 
1261 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1262   assert(UseBiasedLocking, "why call this otherwise?");
1263 
1264   // Check for biased locking unlock case, which is a no-op
1265   // Note: we do not have to check the thread ID for two reasons.
1266   // First, the interpreter checks for IllegalMonitorStateException at
1267   // a higher level. Second, if the bias was revoked while we held the
1268   // lock, the object could not be rebiased toward another thread, so
1269   // the bias bit would be clear.
1270   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1271   andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
1272   cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
1273   jcc(Assembler::equal, done);
1274 }
1275 
1276 #ifdef COMPILER2































































































































































































































































































































1277 // Fast_Lock and Fast_Unlock used by C2
1278 
1279 // Because the transitions from emitted code to the runtime
1280 // monitorenter/exit helper stubs are so slow it's critical that
1281 // we inline both the stack-locking fast-path and the inflated fast path.
1282 //
1283 // See also: cmpFastLock and cmpFastUnlock.
1284 //
1285 // What follows is a specialized inline transliteration of the code
1286 // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
1287 // another option would be to emit TrySlowEnter and TrySlowExit methods
1288 // at startup-time.  These methods would accept arguments as
1289 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1290 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
1291 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1292 // In practice, however, the # of lock sites is bounded and is usually small.
1293 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
1294 // if the processor uses simple bimodal branch predictors keyed by EIP
1295 // Since the helper routines would be called from multiple synchronization
1296 // sites.


1333 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
1334 //    But beware of excessive branch density on AMD Opterons.
1335 //
1336 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
1337 //    or failure of the fast-path.  If the fast-path fails then we pass
1338 //    control to the slow-path, typically in C.  In Fast_Lock and
1339 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
1340 //    will emit a conditional branch immediately after the node.
1341 //    So we have branches to branches and lots of ICC.ZF games.
1342 //    Instead, it might be better to have C2 pass a "FailureLabel"
1343 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
1344 //    will drop through the node.  ICC.ZF is undefined at exit.
1345 //    In the case of failure, the node will branch directly to the
1346 //    FailureLabel
1347 
1348 
1349 // obj: object to lock
1350 // box: on-stack box address (displaced header location) - KILLED
1351 // rax,: tmp -- KILLED
1352 // scr: tmp -- KILLED
1353 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) {






1354   // Ensure the register assignents are disjoint
1355   guarantee (objReg != boxReg, "");
1356   guarantee (objReg != tmpReg, "");
1357   guarantee (objReg != scrReg, "");
1358   guarantee (boxReg != tmpReg, "");
1359   guarantee (boxReg != scrReg, "");
1360   guarantee (tmpReg == rax, "");



1361 
1362   if (counters != NULL) {
1363     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()));
1364   }
1365   if (EmitSync & 1) {
1366       // set box->dhw = unused_mark (3)
1367       // Force all sync thru slow-path: slow_enter() and slow_exit()
1368       movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1369       cmpptr (rsp, (int32_t)NULL_WORD);
1370   } else
1371   if (EmitSync & 2) {
1372       Label DONE_LABEL ;
1373       if (UseBiasedLocking) {
1374          // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
1375          biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
1376       }
1377 
1378       movptr(tmpReg, Address(objReg, 0));           // fetch markword
1379       orptr (tmpReg, 0x1);
1380       movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS
1381       if (os::is_MP()) {
1382         lock();
1383       }


1402     // * neutral
1403     // * stack-locked
1404     //    -- by self
1405     //       = sp-proximity test hits
1406     //       = sp-proximity test generates false-negative
1407     //    -- by other
1408     //
1409 
1410     Label IsInflated, DONE_LABEL;
1411 
1412     // it's stack-locked, biased or neutral
1413     // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
1414     // order to reduce the number of conditional branches in the most common cases.
1415     // Beware -- there's a subtle invariant that fetch of the markword
1416     // at [FETCH], below, will never observe a biased encoding (*101b).
1417     // If this invariant is not held we risk exclusion (safety) failure.
1418     if (UseBiasedLocking && !UseOptoBiasInlining) {
1419       biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
1420     }
1421 








1422     movptr(tmpReg, Address(objReg, 0));          // [FETCH]
1423     testl (tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
1424     jccb  (Assembler::notZero, IsInflated);
1425 
1426     // Attempt stack-locking ...
1427     orptr (tmpReg, 0x1);
1428     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
1429     if (os::is_MP()) {
1430       lock();
1431     }
1432     cmpxchgptr(boxReg, Address(objReg, 0));      // Updates tmpReg
1433     if (counters != NULL) {
1434       cond_inc32(Assembler::equal,
1435                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
1436     }
1437     jccb(Assembler::equal, DONE_LABEL);
1438 
1439     // Recursive locking


1440     subptr(tmpReg, rsp);

1441     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
1442     movptr(Address(boxReg, 0), tmpReg);
1443     if (counters != NULL) {
1444       cond_inc32(Assembler::equal,
1445                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
1446     }
1447     jmpb(DONE_LABEL);
1448 
1449     bind(IsInflated);










1450 #ifndef _LP64
1451     // The object is inflated.
1452     //
1453     // TODO-FIXME: eliminate the ugly use of manifest constants:
1454     //   Use markOopDesc::monitor_value instead of "2".
1455     //   use markOop::unused_mark() instead of "3".
1456     // The tmpReg value is an objectMonitor reference ORed with
1457     // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
1458     // objectmonitor pointer by masking off the "2" bit or we can just
1459     // use tmpReg as an objectmonitor pointer but bias the objectmonitor
1460     // field offsets with "-2" to compensate for and annul the low-order tag bit.
1461     //
1462     // I use the latter as it avoids AGI stalls.
1463     // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
1464     // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
1465     //
1466     #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
1467 
1468     // boxReg refers to the on-stack BasicLock in the current frame.
1469     // We'd like to write:


1559        // If the CAS fails we can either retry or pass control to the slow-path.
1560        // We use the latter tactic.
1561        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1562        // If the CAS was successful ...
1563        //   Self has acquired the lock
1564        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1565        // Intentional fall-through into DONE_LABEL ...
1566     }
1567 #else // _LP64
1568     // It's inflated
1569 
1570     // TODO: someday avoid the ST-before-CAS penalty by
1571     // relocating (deferring) the following ST.
1572     // We should also think about trying a CAS without having
1573     // fetched _owner.  If the CAS is successful we may
1574     // avoid an RTO->RTS upgrade on the $line.
1575 
1576     // Without cast to int32_t a movptr will destroy r10 which is typically obj
1577     movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1578 
1579     mov    (boxReg, tmpReg);
1580     movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1581     testptr(tmpReg, tmpReg);
1582     jccb   (Assembler::notZero, DONE_LABEL);
1583 
1584     // It's inflated and appears unlocked
1585     if (os::is_MP()) {
1586       lock();
1587     }
1588     cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1589     // Intentional fall-through into DONE_LABEL ...

1590 


1591 #endif
1592 
1593     // DONE_LABEL is a hot target - we'd really like to place it at the
1594     // start of cache line by padding with NOPs.
1595     // See the AMD and Intel software optimization manuals for the
1596     // most efficient "long" NOP encodings.
1597     // Unfortunately none of our alignment mechanisms suffice.
1598     bind(DONE_LABEL);
1599 
1600     // At DONE_LABEL the icc ZFlag is set as follows ...
1601     // Fast_Unlock uses the same protocol.
1602     // ZFlag == 1 -> Success
1603     // ZFlag == 0 -> Failure - force control through the slow-path
1604   }
1605 }
1606 
1607 // obj: object to unlock
1608 // box: box address (displaced header location), killed.  Must be EAX.
1609 // tmp: killed, cannot be obj nor box.
1610 //
1611 // Some commentary on balanced locking:
1612 //


1614 // Methods that don't have provably balanced locking are forced to run in the
1615 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
1616 // The interpreter provides two properties:
1617 // I1:  At return-time the interpreter automatically and quietly unlocks any
1618 //      objects acquired the current activation (frame).  Recall that the
1619 //      interpreter maintains an on-stack list of locks currently held by
1620 //      a frame.
1621 // I2:  If a method attempts to unlock an object that is not held by the
1622 //      the frame the interpreter throws IMSX.
1623 //
1624 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
1625 // B() doesn't have provably balanced locking so it runs in the interpreter.
1626 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
1627 // is still locked by A().
1628 //
1629 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
1630 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
1631 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
1632 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
1633 
1634 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
1635   guarantee (objReg != boxReg, "");
1636   guarantee (objReg != tmpReg, "");
1637   guarantee (boxReg != tmpReg, "");
1638   guarantee (boxReg == rax, "");
1639 
1640   if (EmitSync & 4) {
1641     // Disable - inhibit all inlining.  Force control through the slow-path
1642     cmpptr (rsp, 0);
1643   } else
1644   if (EmitSync & 8) {
1645     Label DONE_LABEL;
1646     if (UseBiasedLocking) {
1647        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
1648     }
1649     // Classic stack-locking code ...
1650     // Check whether the displaced header is 0
1651     //(=> recursive unlock)
1652     movptr(tmpReg, Address(boxReg, 0));
1653     testptr(tmpReg, tmpReg);
1654     jccb(Assembler::zero, DONE_LABEL);
1655     // If not recursive lock, reset the header to displaced header
1656     if (os::is_MP()) {
1657       lock();
1658     }
1659     cmpxchgptr(tmpReg, Address(objReg, 0));   // Uses RAX which is box
1660     bind(DONE_LABEL);
1661   } else {
1662     Label DONE_LABEL, Stacked, CheckSucc;
1663 
1664     // Critically, the biased locking test must have precedence over
1665     // and appear before the (box->dhw == 0) recursive stack-lock test.
1666     if (UseBiasedLocking && !UseOptoBiasInlining) {
1667        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
1668     }
1669 














1670     cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header

1671     movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
1672     jccb  (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
1673 
1674     testptr(tmpReg, 0x02);                          // Inflated?
1675     jccb  (Assembler::zero, Stacked);
1676 
1677     // It's inflated.














1678     // Despite our balanced locking property we still check that m->_owner == Self
1679     // as java routines or native JNI code called by this thread might
1680     // have released the lock.
1681     // Refer to the comments in synchronizer.cpp for how we might encode extra
1682     // state in _succ so we can avoid fetching EntryList|cxq.
1683     //
1684     // I'd like to add more cases in fast_lock() and fast_unlock() --
1685     // such as recursive enter and exit -- but we have to be wary of
1686     // I$ bloat, T$ effects and BP$ effects.
1687     //
1688     // If there's no contention try a 1-0 exit.  That is, exit without
1689     // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
1690     // we detect and recover from the race that the 1-0 exit admits.
1691     //
1692     // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
1693     // before it STs null into _owner, releasing the lock.  Updates
1694     // to data protected by the critical section must be visible before
1695     // we drop the lock (and thus before any other thread could acquire
1696     // the lock and observe the fields protected by the lock).
1697     // IA32's memory-model is SPO, so STs are ordered with respect to


2431   } else {
2432     lea(rscratch1, src);
2433     Assembler::comisd(dst, Address(rscratch1, 0));
2434   }
2435 }
2436 
2437 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
2438   if (reachable(src)) {
2439     Assembler::comiss(dst, as_Address(src));
2440   } else {
2441     lea(rscratch1, src);
2442     Assembler::comiss(dst, Address(rscratch1, 0));
2443   }
2444 }
2445 
2446 
2447 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
2448   Condition negated_cond = negate_condition(cond);
2449   Label L;
2450   jcc(negated_cond, L);

2451   atomic_incl(counter_addr);

2452   bind(L);
2453 }
2454 
2455 int MacroAssembler::corrected_idivl(Register reg) {
2456   // Full implementation of Java idiv and irem; checks for
2457   // special case as described in JVM spec., p.243 & p.271.
2458   // The function returns the (pc) offset of the idivl
2459   // instruction - may be needed for implicit exceptions.
2460   //
2461   //         normal case                           special case
2462   //
2463   // input : rax,: dividend                         min_int
2464   //         reg: divisor   (may not be rax,/rdx)   -1
2465   //
2466   // output: rax,: quotient  (= rax, idiv reg)       min_int
2467   //         rdx: remainder (= rax, irem reg)       0
2468   assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
2469   const int min_int = 0x80000000;
2470   Label normal_case, special_case;
2471 




 284   if (sign_extension) sarl(hi);
 285   else                shrl(hi);
 286 }
 287 
 288 void MacroAssembler::movoop(Register dst, jobject obj) {
 289   mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
 290 }
 291 
 292 void MacroAssembler::movoop(Address dst, jobject obj) {
 293   mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
 294 }
 295 
 296 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
 297   mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 298 }
 299 
 300 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
 301   mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 302 }
 303 
 304 void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
 305   // scratch register is not used,
 306   // it is defined to match parameters of 64-bit version of this method.
 307   if (src.is_lval()) {
 308     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
 309   } else {
 310     movl(dst, as_Address(src));
 311   }
 312 }
 313 
 314 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
 315   movl(as_Address(dst), src);
 316 }
 317 
 318 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
 319   movl(dst, as_Address(src));
 320 }
 321 
 322 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
 323 void MacroAssembler::movptr(Address dst, intptr_t src) {
 324   movl(dst, src);
 325 }
 326 


 598 
 599   return idivq_offset;
 600 }
 601 
 602 void MacroAssembler::decrementq(Register reg, int value) {
 603   if (value == min_jint) { subq(reg, value); return; }
 604   if (value <  0) { incrementq(reg, -value); return; }
 605   if (value == 0) {                        ; return; }
 606   if (value == 1 && UseIncDec) { decq(reg) ; return; }
 607   /* else */      { subq(reg, value)       ; return; }
 608 }
 609 
 610 void MacroAssembler::decrementq(Address dst, int value) {
 611   if (value == min_jint) { subq(dst, value); return; }
 612   if (value <  0) { incrementq(dst, -value); return; }
 613   if (value == 0) {                        ; return; }
 614   if (value == 1 && UseIncDec) { decq(dst) ; return; }
 615   /* else */      { subq(dst, value)       ; return; }
 616 }
 617 
 618 void MacroAssembler::incrementq(AddressLiteral dst) {
 619   if (reachable(dst)) {
 620     incrementq(as_Address(dst));
 621   } else {
 622     lea(rscratch1, dst);
 623     incrementq(Address(rscratch1, 0));
 624   }
 625 }
 626 
 627 void MacroAssembler::incrementq(Register reg, int value) {
 628   if (value == min_jint) { addq(reg, value); return; }
 629   if (value <  0) { decrementq(reg, -value); return; }
 630   if (value == 0) {                        ; return; }
 631   if (value == 1 && UseIncDec) { incq(reg) ; return; }
 632   /* else */      { addq(reg, value)       ; return; }
 633 }
 634 
 635 void MacroAssembler::incrementq(Address dst, int value) {
 636   if (value == min_jint) { addq(dst, value); return; }
 637   if (value <  0) { decrementq(dst, -value); return; }
 638   if (value == 0) {                        ; return; }
 639   if (value == 1 && UseIncDec) { incq(dst) ; return; }
 640   /* else */      { addq(dst, value)       ; return; }
 641 }
 642 
 643 // 32bit can do a case table jump in one instruction but we no longer allow the base
 644 // to be installed in the Address class
 645 void MacroAssembler::jump(ArrayAddress entry) {
 646   lea(rscratch1, entry.base());


 675 }
 676 
 677 void MacroAssembler::movoop(Register dst, jobject obj) {
 678   mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
 679 }
 680 
 681 void MacroAssembler::movoop(Address dst, jobject obj) {
 682   mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
 683   movq(dst, rscratch1);
 684 }
 685 
 686 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
 687   mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
 688 }
 689 
 690 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
 691   mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
 692   movq(dst, rscratch1);
 693 }
 694 
 695 void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
 696   if (src.is_lval()) {
 697     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
 698   } else {
 699     if (reachable(src)) {
 700       movq(dst, as_Address(src));
 701     } else {
 702       lea(scratch, src);
 703       movq(dst, Address(scratch, 0));
 704     }
 705   }
 706 }
 707 
 708 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
 709   movq(as_Address(dst), src);
 710 }
 711 
 712 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
 713   movq(dst, as_Address(src));
 714 }
 715 
 716 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
 717 void MacroAssembler::movptr(Address dst, intptr_t src) {
 718   mov64(rscratch1, src);
 719   movq(dst, rscratch1);
 720 }
 721 
 722 // These are mostly for initializing NULL
 723 void MacroAssembler::movptr(Address dst, int32_t src) {


 982     lea(rscratch1, src);
 983     Assembler::andpd(dst, Address(rscratch1, 0));
 984   }
 985 }
 986 
 987 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
 988   // Used in sign-masking with aligned address.
 989   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
 990   if (reachable(src)) {
 991     Assembler::andps(dst, as_Address(src));
 992   } else {
 993     lea(rscratch1, src);
 994     Assembler::andps(dst, Address(rscratch1, 0));
 995   }
 996 }
 997 
 998 void MacroAssembler::andptr(Register dst, int32_t imm32) {
 999   LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
1000 }
1001 
1002 void MacroAssembler::atomic_incl(Address counter_addr) {


1003   if (os::is_MP())
1004     lock();
1005   incrementl(counter_addr);
1006 }
1007 
1008 void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) {
1009   if (reachable(counter_addr)) {
1010     atomic_incl(as_Address(counter_addr));
1011   } else {
1012     lea(scr, counter_addr);
1013     atomic_incl(Address(scr, 0));
1014   }
1015 }
1016 
1017 #ifdef _LP64
1018 void MacroAssembler::atomic_incq(Address counter_addr) {
1019   if (os::is_MP())
1020     lock();
1021   incrementq(counter_addr);
1022 }
1023 
1024 void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) {
1025   if (reachable(counter_addr)) {
1026     atomic_incq(as_Address(counter_addr));
1027   } else {
1028     lea(scr, counter_addr);
1029     atomic_incq(Address(scr, 0));
1030   }

1031 }
1032 #endif
1033 
1034 // Writes to stack successive pages until offset reached to check for
1035 // stack overflow + shadow pages.  This clobbers tmp.
1036 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1037   movptr(tmp, rsp);
1038   // Bang stack for total size given plus shadow page size.
1039   // Bang one page at a time because large size can bang beyond yellow and
1040   // red zones.
1041   Label loop;
1042   bind(loop);
1043   movl(Address(tmp, (-os::vm_page_size())), size );
1044   subptr(tmp, os::vm_page_size());
1045   subl(size, os::vm_page_size());
1046   jcc(Assembler::greater, loop);
1047 
1048   // Bang down shadow pages too.
1049   // At this point, (tmp-0) is the last address touched, so don't
1050   // touch it again.  (It was touched as (tmp-pagesize) but then tmp
1051   // was post-decremented.)  Skip this address by starting at i=1, and
1052   // touch a few more pages below.  N.B.  It is important to touch all


1285 
1286   return null_check_offset;
1287 }
1288 
1289 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1290   assert(UseBiasedLocking, "why call this otherwise?");
1291 
1292   // Check for biased locking unlock case, which is a no-op
1293   // Note: we do not have to check the thread ID for two reasons.
1294   // First, the interpreter checks for IllegalMonitorStateException at
1295   // a higher level. Second, if the bias was revoked while we held the
1296   // lock, the object could not be rebiased toward another thread, so
1297   // the bias bit would be clear.
1298   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1299   andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
1300   cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
1301   jcc(Assembler::equal, done);
1302 }
1303 
1304 #ifdef COMPILER2
1305 
1306 #if INCLUDE_RTM_OPT
1307 
1308 // Update rtm_counters based on abort status
1309 // input: abort_status
1310 //        rtm_counters (RTMLockingCounters*)
1311 // flags are killed
1312 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1313 
1314   atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1315   if (PrintPreciseRTMLockingStatistics) {
1316     for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1317       Label check_abort;
1318       testl(abort_status, (1<<i));
1319       jccb(Assembler::equal, check_abort);
1320       atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
1321       bind(check_abort);
1322     }
1323   }
1324 }
1325 
1326 // Branch if (random & (count-1) != 0), count is 2^n
1327 // tmp, scr and flags are killed
1328 void MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) {
1329   assert(tmp == rax, "");
1330   assert(scr == rdx, "");
1331   rdtsc(); // modifies EDX:EAX
1332   andptr(tmp, count-1);
1333   jccb(Assembler::notZero, brLabel);
1334 }
1335 
1336 // Perform abort ratio calculation, set no_rtm bit if high ratio
1337 // input:  rtm_counters_Reg (RTMLockingCounters* address)
1338 // tmpReg, rtm_counters_Reg and flags are killed
1339 void MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg,
1340                                                  Register rtm_counters_Reg,
1341                                                  RTMLockingCounters* rtm_counters,
1342                                                  Metadata* method_data) {
1343   Label L_done, L_check_always_rtm1, L_check_always_rtm2;
1344 
1345   if (RTMLockingCalculationDelay > 0) {
1346     // Delay calculation
1347     movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg);
1348     testptr(tmpReg, tmpReg);
1349     jccb(Assembler::equal, L_done);
1350   }
1351   // Abort ratio calculation only if abort_count > RTMAbortThreshold
1352   //   Aborted transactions = abort_count * 100
1353   //   All transactions = total_count *  RTMTotalCountIncrRate
1354   //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
1355 
1356   movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset()));
1357   cmpptr(tmpReg, RTMAbortThreshold);
1358   jccb(Assembler::below, L_check_always_rtm2);
1359   imulptr(tmpReg, tmpReg, 100);
1360 
1361   Register scrReg = rtm_counters_Reg;
1362   movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
1363   imulptr(scrReg, scrReg, RTMTotalCountIncrRate);
1364   imulptr(scrReg, scrReg, RTMAbortRatio);
1365   cmpptr(tmpReg, scrReg);
1366   jccb(Assembler::below, L_check_always_rtm1);
1367   if (method_data != NULL) {
1368     // set rtm_state to "no rtm" in MDO
1369     mov_metadata(tmpReg, method_data);
1370     if (os::is_MP()) {
1371       lock();
1372     }
1373     orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM);
1374   }
1375   jmpb(L_done);
1376   bind(L_check_always_rtm1);
1377   // Reload RTMLockingCounters* address
1378   lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
1379   bind(L_check_always_rtm2);
1380   movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
1381   cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
1382   jccb(Assembler::below, L_done);
1383   if (method_data != NULL) {
1384     // set rtm_state to "always rtm" in MDO
1385     mov_metadata(tmpReg, method_data);
1386     if (os::is_MP()) {
1387       lock();
1388     }
1389     orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM);
1390   }
1391   bind(L_done);
1392 }
1393 
1394 // Update counters and perform abort ratio calculation
1395 // input:  abort_status_Reg
1396 // rtm_counters_Reg, flags are killed
1397 void MacroAssembler::rtm_profiling(Register abort_status_Reg,
1398                                    Register rtm_counters_Reg,
1399                                    RTMLockingCounters* rtm_counters,
1400                                    Metadata* method_data,
1401                                    bool profile_rtm) {
1402 
1403   assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1404   // update rtm counters based on rax value at abort
1405   // reads abort_status_Reg, updates flags
1406   lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
1407   rtm_counters_update(abort_status_Reg, rtm_counters_Reg);
1408   if (profile_rtm) {
1409     // Save abort status because abort_status_Reg is used by following code.
1410     if (RTMRetryCount > 0) {
1411       push(abort_status_Reg);
1412     }
1413     assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1414     rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data);
1415     // restore abort status
1416     if (RTMRetryCount > 0) {
1417       pop(abort_status_Reg);
1418     }
1419   }
1420 }
1421 
1422 // Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4)
1423 // inputs: retry_count_Reg
1424 //       : abort_status_Reg
1425 // output: retry_count_Reg decremented by 1
1426 // flags are killed
1427 void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) {
1428   Label doneRetry;
1429   assert(abort_status_Reg == rax, "");
1430   // The abort reason bits are in eax (see all states in rtmLocking.hpp)
1431   // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4)
1432   // if reason is in 0x6 and retry count != 0 then retry
1433   andptr(abort_status_Reg, 0x6);
1434   jccb(Assembler::zero, doneRetry);
1435   testl(retry_count_Reg, retry_count_Reg);
1436   jccb(Assembler::zero, doneRetry);
1437   pause();
1438   decrementl(retry_count_Reg);
1439   jmp(retryLabel);
1440   bind(doneRetry);
1441 }
1442 
1443 // Spin and retry if lock is busy,
1444 // inputs: box_Reg (monitor address)
1445 //       : retry_count_Reg
1446 // output: retry_count_Reg decremented by 1
1447 //       : clear z flag if retry count exceeded
1448 // tmp_Reg, scr_Reg, flags are killed
1449 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg,
1450                                             Register tmp_Reg, Register scr_Reg, Label& retryLabel) {
1451   Label SpinLoop, SpinExit, doneRetry;
1452   // Clean monitor_value bit to get valid pointer
1453   int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
1454 
1455   testl(retry_count_Reg, retry_count_Reg);
1456   jccb(Assembler::zero, doneRetry);
1457   decrementl(retry_count_Reg);
1458   movptr(scr_Reg, RTMSpinLoopCount);
1459 
1460   bind(SpinLoop);
1461   pause();
1462   decrementl(scr_Reg);
1463   jccb(Assembler::lessEqual, SpinExit);
1464   movptr(tmp_Reg, Address(box_Reg, owner_offset));
1465   testptr(tmp_Reg, tmp_Reg);
1466   jccb(Assembler::notZero, SpinLoop);
1467 
1468   bind(SpinExit);
1469   jmp(retryLabel);
1470   bind(doneRetry);
1471   incrementl(retry_count_Reg); // clear z flag
1472 }
1473 
1474 // Use RTM for normal stack locks
1475 // Input: objReg (object to lock)
1476 void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
1477                                        Register retry_on_abort_count_Reg,
1478                                        RTMLockingCounters* stack_rtm_counters,
1479                                        Metadata* method_data, bool profile_rtm,
1480                                        Label& DONE_LABEL, Label& IsInflated) {
1481   assert(UseRTMForStackLocks, "why call this otherwise?");
1482   assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1483   assert(tmpReg == rax, "");
1484   assert(scrReg == rdx, "");
1485   Label L_rtm_retry, L_decrement_retry, L_on_abort;
1486 
1487   if (RTMRetryCount > 0) {
1488     movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1489     bind(L_rtm_retry);
1490   }
1491   if (!UseRTMXendForLockBusy) {
1492     movptr(tmpReg, Address(objReg, 0));
1493     testptr(tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
1494     jcc(Assembler::notZero, IsInflated);
1495   }
1496   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1497     Label L_noincrement;
1498     if (RTMTotalCountIncrRate > 1) {
1499       // tmpReg, scrReg and flags are killed
1500       branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1501     }
1502     assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
1503     atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
1504     bind(L_noincrement);
1505   }
1506   xbegin(L_on_abort);
1507   movptr(tmpReg, Address(objReg, 0));       // fetch markword
1508   andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
1509   cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
1510   jcc(Assembler::equal, DONE_LABEL);        // all done if unlocked
1511 
1512   Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1513   if (UseRTMXendForLockBusy) {
1514     xend();
1515     movptr(tmpReg, Address(objReg, 0));
1516     testptr(tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
1517     jcc(Assembler::notZero, IsInflated);
1518     movptr(abort_status_Reg, 0x1);                // Set the abort status to 1 (as xabort does)
1519     jmp(L_decrement_retry);
1520   }
1521   else {
1522     xabort(0);
1523   }
1524   bind(L_on_abort);
1525   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1526     rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm);
1527   }
1528   bind(L_decrement_retry);
1529   if (RTMRetryCount > 0) {
1530     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1531     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1532   }
1533 }
1534 
1535 // Use RTM for inflating locks
1536 // inputs: objReg (object to lock)
1537 //         boxReg (on-stack box address (displaced header location) - KILLED)
1538 //         tmpReg (ObjectMonitor address + 2(monitor_value))
1539 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
1540                                           Register scrReg, Register retry_on_busy_count_Reg,
1541                                           Register retry_on_abort_count_Reg,
1542                                           RTMLockingCounters* rtm_counters,
1543                                           Metadata* method_data, bool profile_rtm,
1544                                           Label& DONE_LABEL) {
1545   assert(UseRTMLocking, "why call this otherwise?");
1546   assert(tmpReg == rax, "");
1547   assert(scrReg == rdx, "");
1548   Label L_rtm_retry, L_decrement_retry, L_on_abort;
1549   // Clean monitor_value bit to get valid pointer
1550   int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
1551 
1552   // Without cast to int32_t a movptr will destroy r10 which is typically obj
1553   movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1554   movptr(boxReg, tmpReg); // Save ObjectMonitor address
1555 
1556   if (RTMRetryCount > 0) {
1557     movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
1558     movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1559     bind(L_rtm_retry);
1560   }
1561   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1562     Label L_noincrement;
1563     if (RTMTotalCountIncrRate > 1) {
1564       // tmpReg, scrReg and flags are killed
1565       branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1566     }
1567     assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1568     atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1569     bind(L_noincrement);
1570   }
1571   xbegin(L_on_abort);
1572   movptr(tmpReg, Address(objReg, 0));
1573   movptr(tmpReg, Address(tmpReg, owner_offset));
1574   testptr(tmpReg, tmpReg);
1575   jcc(Assembler::zero, DONE_LABEL);
1576   if (UseRTMXendForLockBusy) {
1577     xend();
1578     jmp(L_decrement_retry);
1579   }
1580   else {
1581     xabort(0);
1582   }
1583   bind(L_on_abort);
1584   Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1585   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1586     rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1587   }
1588   if (RTMRetryCount > 0) {
1589     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1590     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1591   }
1592 
1593   movptr(tmpReg, Address(boxReg, owner_offset)) ;
1594   testptr(tmpReg, tmpReg) ;
1595   jccb(Assembler::notZero, L_decrement_retry) ;
1596 
1597   // Appears unlocked - try to swing _owner from null to non-null.
1598   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1599 #ifdef _LP64
1600   Register threadReg = r15_thread;
1601 #else
1602   get_thread(scrReg);
1603   Register threadReg = scrReg;
1604 #endif
1605   if (os::is_MP()) {
1606     lock();
1607   }
1608   cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1609 
1610   if (RTMRetryCount > 0) {
1611     // success done else retry
1612     jccb(Assembler::equal, DONE_LABEL) ;
1613     bind(L_decrement_retry);
1614     // Spin and retry if lock is busy.
1615     rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1616   }
1617   else {
1618     bind(L_decrement_retry);
1619   }
1620 }
1621 
1622 #endif //  INCLUDE_RTM_OPT
1623 
1624 // Fast_Lock and Fast_Unlock used by C2
1625 
1626 // Because the transitions from emitted code to the runtime
1627 // monitorenter/exit helper stubs are so slow it's critical that
1628 // we inline both the stack-locking fast-path and the inflated fast path.
1629 //
1630 // See also: cmpFastLock and cmpFastUnlock.
1631 //
1632 // What follows is a specialized inline transliteration of the code
1633 // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
1634 // another option would be to emit TrySlowEnter and TrySlowExit methods
1635 // at startup-time.  These methods would accept arguments as
1636 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1637 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
1638 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1639 // In practice, however, the # of lock sites is bounded and is usually small.
1640 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
1641 // if the processor uses simple bimodal branch predictors keyed by EIP
1642 // Since the helper routines would be called from multiple synchronization
1643 // sites.


1680 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
1681 //    But beware of excessive branch density on AMD Opterons.
1682 //
1683 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
1684 //    or failure of the fast-path.  If the fast-path fails then we pass
1685 //    control to the slow-path, typically in C.  In Fast_Lock and
1686 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
1687 //    will emit a conditional branch immediately after the node.
1688 //    So we have branches to branches and lots of ICC.ZF games.
1689 //    Instead, it might be better to have C2 pass a "FailureLabel"
1690 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
1691 //    will drop through the node.  ICC.ZF is undefined at exit.
1692 //    In the case of failure, the node will branch directly to the
1693 //    FailureLabel
1694 
1695 
1696 // obj: object to lock
1697 // box: on-stack box address (displaced header location) - KILLED
1698 // rax,: tmp -- KILLED
1699 // scr: tmp -- KILLED
1700 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
1701                                Register scrReg, Register cx1Reg, Register cx2Reg,
1702                                BiasedLockingCounters* counters,
1703                                RTMLockingCounters* rtm_counters,
1704                                RTMLockingCounters* stack_rtm_counters,
1705                                Metadata* method_data,
1706                                bool use_rtm, bool profile_rtm) {
1707   // Ensure the register assignents are disjoint
1708   assert(tmpReg == rax, "");
1709 
1710   if (use_rtm) {
1711     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
1712   } else {
1713     assert(cx1Reg == noreg, "");
1714     assert(cx2Reg == noreg, "");
1715     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
1716   }
1717 
1718   if (counters != NULL) {
1719     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
1720   }
1721   if (EmitSync & 1) {
1722       // set box->dhw = unused_mark (3)
1723       // Force all sync thru slow-path: slow_enter() and slow_exit()
1724       movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1725       cmpptr (rsp, (int32_t)NULL_WORD);
1726   } else
1727   if (EmitSync & 2) {
1728       Label DONE_LABEL ;
1729       if (UseBiasedLocking) {
1730          // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
1731          biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
1732       }
1733 
1734       movptr(tmpReg, Address(objReg, 0));           // fetch markword
1735       orptr (tmpReg, 0x1);
1736       movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS
1737       if (os::is_MP()) {
1738         lock();
1739       }


1758     // * neutral
1759     // * stack-locked
1760     //    -- by self
1761     //       = sp-proximity test hits
1762     //       = sp-proximity test generates false-negative
1763     //    -- by other
1764     //
1765 
1766     Label IsInflated, DONE_LABEL;
1767 
1768     // it's stack-locked, biased or neutral
1769     // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
1770     // order to reduce the number of conditional branches in the most common cases.
1771     // Beware -- there's a subtle invariant that fetch of the markword
1772     // at [FETCH], below, will never observe a biased encoding (*101b).
1773     // If this invariant is not held we risk exclusion (safety) failure.
1774     if (UseBiasedLocking && !UseOptoBiasInlining) {
1775       biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
1776     }
1777 
1778 #if INCLUDE_RTM_OPT
1779     if (UseRTMForStackLocks && use_rtm) {
1780       rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
1781                         stack_rtm_counters, method_data, profile_rtm,
1782                         DONE_LABEL, IsInflated);
1783     }
1784 #endif // INCLUDE_RTM_OPT
1785 
1786     movptr(tmpReg, Address(objReg, 0));          // [FETCH]
1787     testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1788     jccb(Assembler::notZero, IsInflated);
1789 
1790     // Attempt stack-locking ...
1791     orptr (tmpReg, markOopDesc::unlocked_value);
1792     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
1793     if (os::is_MP()) {
1794       lock();
1795     }
1796     cmpxchgptr(boxReg, Address(objReg, 0));      // Updates tmpReg
1797     if (counters != NULL) {
1798       cond_inc32(Assembler::equal,
1799                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
1800     }
1801     jcc(Assembler::equal, DONE_LABEL);           // Success
1802 
1803     // Recursive locking.
1804     // The object is stack-locked: markword contains stack pointer to BasicLock.
1805     // Locked by current thread if difference with current SP is less than one page.
1806     subptr(tmpReg, rsp);
1807     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
1808     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
1809     movptr(Address(boxReg, 0), tmpReg);
1810     if (counters != NULL) {
1811       cond_inc32(Assembler::equal,
1812                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
1813     }
1814     jmp(DONE_LABEL);
1815 
1816     bind(IsInflated);
1817     // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value)
1818 
1819 #if INCLUDE_RTM_OPT
1820     // Use the same RTM locking code in 32- and 64-bit VM.
1821     if (use_rtm) {
1822       rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
1823                            rtm_counters, method_data, profile_rtm, DONE_LABEL);
1824     } else {
1825 #endif // INCLUDE_RTM_OPT
1826 
1827 #ifndef _LP64
1828     // The object is inflated.
1829     //
1830     // TODO-FIXME: eliminate the ugly use of manifest constants:
1831     //   Use markOopDesc::monitor_value instead of "2".
1832     //   use markOop::unused_mark() instead of "3".
1833     // The tmpReg value is an objectMonitor reference ORed with
1834     // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
1835     // objectmonitor pointer by masking off the "2" bit or we can just
1836     // use tmpReg as an objectmonitor pointer but bias the objectmonitor
1837     // field offsets with "-2" to compensate for and annul the low-order tag bit.
1838     //
1839     // I use the latter as it avoids AGI stalls.
1840     // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
1841     // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
1842     //
1843     #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
1844 
1845     // boxReg refers to the on-stack BasicLock in the current frame.
1846     // We'd like to write:


1936        // If the CAS fails we can either retry or pass control to the slow-path.
1937        // We use the latter tactic.
1938        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1939        // If the CAS was successful ...
1940        //   Self has acquired the lock
1941        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1942        // Intentional fall-through into DONE_LABEL ...
1943     }
1944 #else // _LP64
1945     // It's inflated
1946 
1947     // TODO: someday avoid the ST-before-CAS penalty by
1948     // relocating (deferring) the following ST.
1949     // We should also think about trying a CAS without having
1950     // fetched _owner.  If the CAS is successful we may
1951     // avoid an RTO->RTS upgrade on the $line.
1952 
1953     // Without cast to int32_t a movptr will destroy r10 which is typically obj
1954     movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1955 
1956     movptr (boxReg, tmpReg);
1957     movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1958     testptr(tmpReg, tmpReg);
1959     jccb   (Assembler::notZero, DONE_LABEL);
1960 
1961     // It's inflated and appears unlocked
1962     if (os::is_MP()) {
1963       lock();
1964     }
1965     cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1966     // Intentional fall-through into DONE_LABEL ...
1967 #endif // _LP64
1968 
1969 #if INCLUDE_RTM_OPT
1970     } // use_rtm()
1971 #endif

1972     // DONE_LABEL is a hot target - we'd really like to place it at the
1973     // start of cache line by padding with NOPs.
1974     // See the AMD and Intel software optimization manuals for the
1975     // most efficient "long" NOP encodings.
1976     // Unfortunately none of our alignment mechanisms suffice.
1977     bind(DONE_LABEL);
1978 
1979     // At DONE_LABEL the icc ZFlag is set as follows ...
1980     // Fast_Unlock uses the same protocol.
1981     // ZFlag == 1 -> Success
1982     // ZFlag == 0 -> Failure - force control through the slow-path
1983   }
1984 }
1985 
1986 // obj: object to unlock
1987 // box: box address (displaced header location), killed.  Must be EAX.
1988 // tmp: killed, cannot be obj nor box.
1989 //
1990 // Some commentary on balanced locking:
1991 //


1993 // Methods that don't have provably balanced locking are forced to run in the
1994 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
1995 // The interpreter provides two properties:
1996 // I1:  At return-time the interpreter automatically and quietly unlocks any
1997 //      objects acquired the current activation (frame).  Recall that the
1998 //      interpreter maintains an on-stack list of locks currently held by
1999 //      a frame.
2000 // I2:  If a method attempts to unlock an object that is not held by the
2001 //      the frame the interpreter throws IMSX.
2002 //
2003 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
2004 // B() doesn't have provably balanced locking so it runs in the interpreter.
2005 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
2006 // is still locked by A().
2007 //
2008 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
2009 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
2010 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
2011 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
2012 
2013 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
2014   assert(boxReg == rax, "");
2015   assert_different_registers(objReg, boxReg, tmpReg);


2016 
2017   if (EmitSync & 4) {
2018     // Disable - inhibit all inlining.  Force control through the slow-path
2019     cmpptr (rsp, 0);
2020   } else
2021   if (EmitSync & 8) {
2022     Label DONE_LABEL;
2023     if (UseBiasedLocking) {
2024        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2025     }
2026     // Classic stack-locking code ...
2027     // Check whether the displaced header is 0
2028     //(=> recursive unlock)
2029     movptr(tmpReg, Address(boxReg, 0));
2030     testptr(tmpReg, tmpReg);
2031     jccb(Assembler::zero, DONE_LABEL);
2032     // If not recursive lock, reset the header to displaced header
2033     if (os::is_MP()) {
2034       lock();
2035     }
2036     cmpxchgptr(tmpReg, Address(objReg, 0));   // Uses RAX which is box
2037     bind(DONE_LABEL);
2038   } else {
2039     Label DONE_LABEL, Stacked, CheckSucc;
2040 
2041     // Critically, the biased locking test must have precedence over
2042     // and appear before the (box->dhw == 0) recursive stack-lock test.
2043     if (UseBiasedLocking && !UseOptoBiasInlining) {
2044        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
2045     }
2046 
2047 #if INCLUDE_RTM_OPT
2048     if (UseRTMForStackLocks && use_rtm) {
2049       assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
2050       Label L_regular_unlock;
2051       movptr(tmpReg, Address(objReg, 0));           // fetch markword
2052       andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
2053       cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
2054       jccb(Assembler::notEqual, L_regular_unlock);  // if !HLE RegularLock
2055       xend();                                       // otherwise end...
2056       jmp(DONE_LABEL);                              // ... and we're done
2057       bind(L_regular_unlock);
2058     }
2059 #endif
2060 
2061     cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
2062     jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
2063     movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
2064     testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?


2065     jccb  (Assembler::zero, Stacked);
2066 
2067     // It's inflated.
2068 #if INCLUDE_RTM_OPT
2069     if (use_rtm) {
2070       Label L_regular_inflated_unlock;
2071       // Clean monitor_value bit to get valid pointer
2072       int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
2073       movptr(boxReg, Address(tmpReg, owner_offset));
2074       testptr(boxReg, boxReg);
2075       jccb(Assembler::notZero, L_regular_inflated_unlock);
2076       xend();
2077       jmpb(DONE_LABEL);
2078       bind(L_regular_inflated_unlock);
2079     }
2080 #endif
2081 
2082     // Despite our balanced locking property we still check that m->_owner == Self
2083     // as java routines or native JNI code called by this thread might
2084     // have released the lock.
2085     // Refer to the comments in synchronizer.cpp for how we might encode extra
2086     // state in _succ so we can avoid fetching EntryList|cxq.
2087     //
2088     // I'd like to add more cases in fast_lock() and fast_unlock() --
2089     // such as recursive enter and exit -- but we have to be wary of
2090     // I$ bloat, T$ effects and BP$ effects.
2091     //
2092     // If there's no contention try a 1-0 exit.  That is, exit without
2093     // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
2094     // we detect and recover from the race that the 1-0 exit admits.
2095     //
2096     // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
2097     // before it STs null into _owner, releasing the lock.  Updates
2098     // to data protected by the critical section must be visible before
2099     // we drop the lock (and thus before any other thread could acquire
2100     // the lock and observe the fields protected by the lock).
2101     // IA32's memory-model is SPO, so STs are ordered with respect to


2835   } else {
2836     lea(rscratch1, src);
2837     Assembler::comisd(dst, Address(rscratch1, 0));
2838   }
2839 }
2840 
2841 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
2842   if (reachable(src)) {
2843     Assembler::comiss(dst, as_Address(src));
2844   } else {
2845     lea(rscratch1, src);
2846     Assembler::comiss(dst, Address(rscratch1, 0));
2847   }
2848 }
2849 
2850 
2851 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
2852   Condition negated_cond = negate_condition(cond);
2853   Label L;
2854   jcc(negated_cond, L);
2855   pushf(); // Preserve flags
2856   atomic_incl(counter_addr);
2857   popf();
2858   bind(L);
2859 }
2860 
2861 int MacroAssembler::corrected_idivl(Register reg) {
2862   // Full implementation of Java idiv and irem; checks for
2863   // special case as described in JVM spec., p.243 & p.271.
2864   // The function returns the (pc) offset of the idivl
2865   // instruction - may be needed for implicit exceptions.
2866   //
2867   //         normal case                           special case
2868   //
2869   // input : rax,: dividend                         min_int
2870   //         reg: divisor   (may not be rax,/rdx)   -1
2871   //
2872   // output: rax,: quotient  (= rax, idiv reg)       min_int
2873   //         rdx: remainder (= rax, irem reg)       0
2874   assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
2875   const int min_int = 0x80000000;
2876   Label normal_case, special_case;
2877 


src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File