src/cpu/aarch64/vm/aarch64.ad
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File hotspot Sdiff src/cpu/aarch64/vm

src/cpu/aarch64/vm/aarch64.ad

Print this page
rev 10222 : 8087341: C2 doesn't optimize redundant memory operations with G1
Summary: effect of memory barrier in post barrier is too wide
Reviewed-by: kvn, aph
Contributed-by: adinn <adinn@redhat.com>


1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036 
1037   MemBarNode *parent_membar(const Node *n);
1038   MemBarNode *child_membar(const MemBarNode *n);
1039   bool leading_membar(const MemBarNode *barrier);
1040 
1041   bool is_card_mark_membar(const MemBarNode *barrier);
1042   bool is_CAS(int opcode);
1043 
1044   MemBarNode *leading_to_normal(MemBarNode *leading);
1045   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1046   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1047   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1048   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1049 
1050   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1051 
1052   bool unnecessary_acquire(const Node *barrier);
1053   bool needs_acquiring_load(const Node *load);
1054 
1055   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1056 
1057   bool unnecessary_release(const Node *barrier);
1058   bool unnecessary_volatile(const Node *barrier);
1059   bool needs_releasing_store(const Node *store);
1060 
1061   // predicate controlling translation of CompareAndSwapX
1062   bool needs_acquiring_load_exclusive(const Node *load);
1063 
1064   // predicate controlling translation of StoreCM
1065   bool unnecessary_storestore(const Node *storecm);
1066 %}
1067 


1405 
1406     return false;
1407   }
1408 
1409 
1410   // 3) helper predicates to traverse volatile put or CAS graphs which
1411   // may contain GC barrier subgraphs
1412 
1413   // Preamble
1414   // --------
1415   //
1416   // for volatile writes we can omit generating barriers and employ a
1417   // releasing store when we see a node sequence sequence with a
1418   // leading MemBarRelease and a trailing MemBarVolatile as follows
1419   //
1420   //   MemBarRelease
1421   //  {      ||      } -- optional
1422   //  {MemBarCPUOrder}
1423   //         ||     \\
1424   //         ||     StoreX[mo_release]
1425   //         | \     /
1426   //         | MergeMem
1427   //         | /
1428   //   MemBarVolatile
1429   //
1430   // where
1431   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1432   //  | \ and / indicate further routing of the Ctl and Mem feeds
1433   //
1434   // this is the graph we see for non-object stores. however, for a
1435   // volatile Object store (StoreN/P) we may see other nodes below the
1436   // leading membar because of the need for a GC pre- or post-write
1437   // barrier.





1438   //
1439   // with most GC configurations we with see this simple variant which
1440   // includes a post-write barrier card mark.
1441   //
1442   //   MemBarRelease______________________________
1443   //         ||    \\               Ctl \        \\
1444   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1445   //         | \     /                       . . .  /
1446   //         | MergeMem
1447   //         | /
1448   //         ||      /
1449   //   MemBarVolatile
1450   //
1451   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1452   // the object address to an int used to compute the card offset) and
1453   // Ctl+Mem to a StoreB node (which does the actual card mark).
1454   //
1455   // n.b. a StoreCM node will only appear in this configuration when
1456   // using CMS. StoreCM differs from a normal card mark write (StoreB)
1457   // because it implies a requirement to order visibility of the card
1458   // mark (StoreCM) relative to the object put (StoreP/N) using a
1459   // StoreStore memory barrier (arguably this ought to be represented
1460   // explicitly in the ideal graph but that is not how it works). This
1461   // ordering is required for both non-volatile and volatile
1462   // puts. Normally that means we need to translate a StoreCM using
1463   // the sequence





1464   //
1465   //   dmb ishst
1466   //   stlrb
1467   //
1468   // However, in the case of a volatile put if we can recognise this
1469   // configuration and plant an stlr for the object write then we can
1470   // omit the dmb and just plant an strb since visibility of the stlr
1471   // is ordered before visibility of subsequent stores. StoreCM nodes
1472   // also arise when using G1 or using CMS with conditional card
1473   // marking. In these cases (as we shall see) we don't need to insert
1474   // the dmb when translating StoreCM because there is already an
1475   // intervening StoreLoad barrier between it and the StoreP/N.
1476   //
1477   // It is also possible to perform the card mark conditionally on it
1478   // currently being unmarked in which case the volatile put graph
1479   // will look slightly different
1480   //
1481   //   MemBarRelease____________________________________________
1482   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1483   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1484   //         | \     /                              \            |
1485   //         | MergeMem                            . . .      StoreB
1486   //         | /                                                /
1487   //         ||     /
1488   //   MemBarVolatile
1489   //
1490   // It is worth noting at this stage that both the above
1491   // configurations can be uniquely identified by checking that the
1492   // memory flow includes the following subgraph:
1493   //
1494   //   MemBarRelease
1495   //  {MemBarCPUOrder}
1496   //          |  \      . . .
1497   //          |  StoreX[mo_release]  . . .
1498   //          |   /
1499   //         MergeMem
1500   //          |
1501   //   MemBarVolatile
1502   //
1503   // This is referred to as a *normal* subgraph. It can easily be
1504   // detected starting from any candidate MemBarRelease,
1505   // StoreX[mo_release] or MemBarVolatile.
1506   //
1507   // A simple variation on this normal case occurs for an unsafe CAS
1508   // operation. The basic graph for a non-object CAS is

1509   //
1510   //   MemBarRelease
1511   //         ||
1512   //   MemBarCPUOrder
1513   //         ||     \\   . . .
1514   //         ||     CompareAndSwapX
1515   //         ||       |
1516   //         ||     SCMemProj
1517   //         | \     /
1518   //         | MergeMem
1519   //         | /
1520   //   MemBarCPUOrder
1521   //         ||
1522   //   MemBarAcquire
1523   //
1524   // The same basic variations on this arrangement (mutatis mutandis)
1525   // occur when a card mark is introduced. i.e. we se the same basic
1526   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1527   // tail of the graph is a pair comprising a MemBarCPUOrder +
1528   // MemBarAcquire.
1529   //
1530   // So, in the case of a CAS the normal graph has the variant form
1531   //
1532   //   MemBarRelease
1533   //   MemBarCPUOrder
1534   //          |   \      . . .
1535   //          |  CompareAndSwapX  . . .
1536   //          |    |
1537   //          |   SCMemProj
1538   //          |   /  . . .
1539   //         MergeMem
1540   //          |
1541   //   MemBarCPUOrder
1542   //   MemBarAcquire
1543   //
1544   // This graph can also easily be detected starting from any
1545   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1546   //
1547   // the code below uses two helper predicates, leading_to_normal and
1548   // normal_to_leading to identify these normal graphs, one validating
1549   // the layout starting from the top membar and searching down and
1550   // the other validating the layout starting from the lower membar
1551   // and searching up.
1552   //
1553   // There are two special case GC configurations when a normal graph
1554   // may not be generated: when using G1 (which always employs a
1555   // conditional card mark); and when using CMS with conditional card
1556   // marking configured. These GCs are both concurrent rather than
1557   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1558   // graph between the leading and trailing membar nodes, in
1559   // particular enforcing stronger memory serialisation beween the
1560   // object put and the corresponding conditional card mark. CMS
1561   // employs a post-write GC barrier while G1 employs both a pre- and
1562   // post-write GC barrier. Of course the extra nodes may be absent --
1563   // they are only inserted for object puts. This significantly
1564   // complicates the task of identifying whether a MemBarRelease,
1565   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1566   // when using these GC configurations (see below). It adds similar
1567   // complexity to the task of identifying whether a MemBarRelease,
1568   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1569   //
1570   // In both cases the post-write subtree includes an auxiliary
1571   // MemBarVolatile (StoreLoad barrier) separating the object put and
1572   // the read of the corresponding card. This poses two additional
1573   // problems.
1574   //
1575   // Firstly, a card mark MemBarVolatile needs to be distinguished
1576   // from a normal trailing MemBarVolatile. Resolving this first
1577   // problem is straightforward: a card mark MemBarVolatile always
1578   // projects a Mem feed to a StoreCM node and that is a unique marker













1579   //
1580   //      MemBarVolatile (card mark)
1581   //       C |    \     . . .
1582   //         |   StoreCM   . . .
1583   //       . . .
1584   //
1585   // The second problem is how the code generator is to translate the
1586   // card mark barrier? It always needs to be translated to a "dmb
1587   // ish" instruction whether or not it occurs as part of a volatile
1588   // put. A StoreLoad barrier is needed after the object put to ensure
1589   // i) visibility to GC threads of the object put and ii) visibility
1590   // to the mutator thread of any card clearing write by a GC
1591   // thread. Clearly a normal store (str) will not guarantee this
1592   // ordering but neither will a releasing store (stlr). The latter
1593   // guarantees that the object put is visible but does not guarantee
1594   // that writes by other threads have also been observed.
1595   //
1596   // So, returning to the task of translating the object put and the
1597   // leading/trailing membar nodes: what do the non-normal node graph
1598   // look like for these 2 special cases? and how can we determine the
1599   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1600   // in both normal and non-normal cases?
1601   //
1602   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1603   // which selects conditonal execution based on the value loaded
1604   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1605   // intervening StoreLoad barrier (MemBarVolatile).
1606   //
1607   // So, with CMS we may see a node graph for a volatile object store
1608   // which looks like this
1609   //
1610   //   MemBarRelease
1611   //   MemBarCPUOrder_(leading)__________________
1612   //     C |    M \       \\                   C \
1613   //       |       \    StoreN/P[mo_release]  CastP2X
1614   //       |    Bot \    /
1615   //       |       MergeMem
1616   //       |         /
1617   //      MemBarVolatile (card mark)
1618   //     C |  ||    M |
1619   //       | LoadB    |
1620   //       |   |      |
1621   //       | Cmp      |\
1622   //       | /        | \
1623   //       If         |  \
1624   //       | \        |   \
1625   // IfFalse  IfTrue  |    \
1626   //       \     / \  |     \
1627   //        \   / StoreCM    |
1628   //         \ /      |      |
1629   //        Region   . . .   |
1630   //          | \           /
1631   //          |  . . .  \  / Bot
1632   //          |       MergeMem
1633   //          |          |
1634   //        MemBarVolatile (trailing)
1635   //
1636   // The first MergeMem merges the AliasIdxBot Mem slice from the
1637   // leading membar and the oopptr Mem slice from the Store into the
1638   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1639   // Mem slice from the card mark membar and the AliasIdxRaw slice
1640   // from the StoreCM into the trailing membar (n.b. the latter
1641   // proceeds via a Phi associated with the If region).
1642   //
1643   // The graph for a CAS varies slightly, the obvious difference being
1644   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1645   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1646   // MemBarAcquire pair. The other important difference is that the
1647   // CompareAndSwap node's SCMemProj is not merged into the card mark
1648   // membar - it still feeds the trailing MergeMem. This also means
1649   // that the card mark membar receives its Mem feed directly from the
1650   // leading membar rather than via a MergeMem.










1651   //
1652   //   MemBarRelease
1653   //   MemBarCPUOrder__(leading)_________________________
1654   //       ||                       \\                 C \
1655   //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X







1656   //     C |  ||    M |              |
1657   //       | LoadB    |       ______/|
1658   //       |   |      |      /       |
1659   //       | Cmp      |     /      SCMemProj
1660   //       | /        |    /         |
1661   //       If         |   /         /
1662   //       | \        |  /         /
1663   // IfFalse  IfTrue  | /         /
1664   //       \     / \  |/ prec    /
1665   //        \   / StoreCM       /
1666   //         \ /      |        /
1667   //        Region   . . .    /
1668   //          | \            /
1669   //          |  . . .  \   / Bot
1670   //          |       MergeMem
1671   //          |          |
1672   //        MemBarCPUOrder
1673   //        MemBarAcquire (trailing)
1674   //
1675   // This has a slightly different memory subgraph to the one seen
1676   // previously but the core of it is the same as for the CAS normal
1677   // sungraph
1678   //
1679   //   MemBarRelease
1680   //   MemBarCPUOrder____
1681   //      ||             \      . . .
1682   //   MemBarVolatile  CompareAndSwapX  . . .
1683   //      |  \            |









1684   //        . . .   SCMemProj
1685   //          |     /  . . .
1686   //         MergeMem
1687   //          |
1688   //   MemBarCPUOrder
1689   //   MemBarAcquire
1690   //
1691   //
1692   // G1 is quite a lot more complicated. The nodes inserted on behalf
1693   // of G1 may comprise: a pre-write graph which adds the old value to
1694   // the SATB queue; the releasing store itself; and, finally, a
1695   // post-write graph which performs a card mark.
1696   //
1697   // The pre-write graph may be omitted, but only when the put is
1698   // writing to a newly allocated (young gen) object and then only if
1699   // there is a direct memory chain to the Initialize node for the
1700   // object allocation. This will not happen for a volatile put since
1701   // any memory chain passes through the leading membar.
1702   //
1703   // The pre-write graph includes a series of 3 If tests. The outermost
1704   // If tests whether SATB is enabled (no else case). The next If tests
1705   // whether the old value is non-NULL (no else case). The third tests
1706   // whether the SATB queue index is > 0, if so updating the queue. The
1707   // else case for this third If calls out to the runtime to allocate a
1708   // new queue buffer.
1709   //
1710   // So with G1 the pre-write and releasing store subgraph looks like
1711   // this (the nested Ifs are omitted).
1712   //
1713   //  MemBarRelease (leading)____________
1714   //     C |  ||  M \   M \    M \  M \ . . .
1715   //       | LoadB   \  LoadL  LoadN   \
1716   //       | /        \                 \
1717   //       If         |\                 \
1718   //       | \        | \                 \
1719   //  IfFalse  IfTrue |  \                 \
1720   //       |     |    |   \                 |
1721   //       |     If   |   /\                |
1722   //       |     |          \               |
1723   //       |                 \              |
1724   //       |    . . .         \             |
1725   //       | /       | /       |            |
1726   //      Region  Phi[M]       |            |
1727   //       | \       |         |            |
1728   //       |  \_____ | ___     |            |
1729   //     C | C \     |   C \ M |            |
1730   //       | CastP2X | StoreN/P[mo_release] |
1731   //       |         |         |            |
1732   //     C |       M |       M |          M |
1733   //        \        |         |           /
1734   //                  . . .
1735   //          (post write subtree elided)
1736   //                    . . .
1737   //             C \         M /
1738   //         MemBarVolatile (trailing)
1739   //






1740   // n.b. the LoadB in this subgraph is not the card read -- it's a
1741   // read of the SATB queue active flag.
1742   //
1743   // Once again the CAS graph is a minor variant on the above with the
1744   // expected substitutions of CompareAndSawpX for StoreN/P and
1745   // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.





























1746   //
1747   // The G1 post-write subtree is also optional, this time when the
1748   // new value being written is either null or can be identified as a
1749   // newly allocated (young gen) object with no intervening control
1750   // flow. The latter cannot happen but the former may, in which case
1751   // the card mark membar is omitted and the memory feeds form the
1752   // leading membar and the SToreN/P are merged direct into the
1753   // trailing membar as per the normal subgraph. So, the only special
1754   // case which arises is when the post-write subgraph is generated.
1755   //
1756   // The kernel of the post-write G1 subgraph is the card mark itself
1757   // which includes a card mark memory barrier (MemBarVolatile), a
1758   // card test (LoadB), and a conditional update (If feeding a
1759   // StoreCM). These nodes are surrounded by a series of nested Ifs
1760   // which try to avoid doing the card mark. The top level If skips if
1761   // the object reference does not cross regions (i.e. it tests if
1762   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1763   // need not be recorded. The next If, which skips on a NULL value,
1764   // may be absent (it is not generated if the type of value is >=
1765   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1766   // checking if card_val != young).  n.b. although this test requires
1767   // a pre-read of the card it can safely be done before the StoreLoad
1768   // barrier. However that does not bypass the need to reread the card
1769   // after the barrier.
1770   //
1771   //                (pre-write subtree elided)
1772   //        . . .                  . . .    . . .  . . .
1773   //        C |                    M |     M |    M |
1774   //       Region                  Phi[M] StoreN    |
1775   //          |                     / \      |      |
1776   //         / \_______            /   \     |      |
1777   //      C / C \      . . .            \    |      |
1778   //       If   CastP2X . . .            |   |      |
1779   //       / \                           |   |      |
1780   //      /   \                          |   |      |
1781   // IfFalse IfTrue                      |   |      |
1782   //   |       |                         |   |     /|
1783   //   |       If                        |   |    / |
1784   //   |      / \                        |   |   /  |
1785   //   |     /   \                        \  |  /   |
1786   //   | IfFalse IfTrue                   MergeMem  |
1787   //   |  . . .    / \                       /      |
1788   //   |          /   \                     /       |
1789   //   |     IfFalse IfTrue                /        |
1790   //   |      . . .    |                  /         |
1791   //   |               If                /          |
1792   //   |               / \              /           |
1793   //   |              /   \            /            |
1794   //   |         IfFalse IfTrue       /             |
1795   //   |           . . .   |         /              |
1796   //   |                    \       /               |
1797   //   |                     \     /                |
1798   //   |             MemBarVolatile__(card mark)    |
1799   //   |                ||   C |  M \  M \          |
1800   //   |               LoadB   If    |    |         |
1801   //   |                      / \    |    |         |
1802   //   |                     . . .   |    |         |
1803   //   |                          \  |    |        /
1804   //   |                        StoreCM   |       /
1805   //   |                          . . .   |      /
1806   //   |                        _________/      /
1807   //   |                       /  _____________/
1808   //   |   . . .       . . .  |  /            /
1809   //   |    |                 | /   _________/
1810   //   |    |               Phi[M] /        /
1811   //   |    |                 |   /        /


1812   //   |    |                 |  /        /
1813   //   |  Region  . . .     Phi[M]  _____/
1814   //   |    /                 |    /
1815   //   |                      |   /
1816   //   | . . .   . . .        |  /
1817   //   | /                    | /
1818   // Region           |  |  Phi[M]
1819   //   |              |  |  / Bot
1820   //    \            MergeMem
1821   //     \            /
1822   //     MemBarVolatile
1823   //
1824   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1825   // from the leading membar and the oopptr Mem slice from the Store
1826   // into the card mark membar i.e. the memory flow to the card mark
1827   // membar still looks like a normal graph.
1828   //
1829   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1830   // Mem slices (from the StoreCM and other card mark queue stores).
1831   // However in this case the AliasIdxBot Mem slice does not come
1832   // direct from the card mark membar. It is merged through a series
1833   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1834   // from the leading membar with the Mem feed from the card mark
1835   // membar. Each Phi corresponds to one of the Ifs which may skip
1836   // around the card mark membar. So when the If implementing the NULL
1837   // value check has been elided the total number of Phis is 2
1838   // otherwise it is 3.
1839   //
1840   // The CAS graph when using G1GC also includes a pre-write subgraph
1841   // and an optional post-write subgraph. Teh sam evarioations are
1842   // introduced as for CMS with conditional card marking i.e. the
1843   // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1844   // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1845   // Mem feed from the CompareAndSwapP/N includes a precedence
1846   // dependency feed to the StoreCM and a feed via an SCMemProj to the
1847   // trailing membar. So, as before the configuration includes the
1848   // normal CAS graph as a subgraph of the memory flow.
1849   //
1850   // So, the upshot is that in all cases the volatile put graph will
1851   // include a *normal* memory subgraph betwen the leading membar and
1852   // its child membar, either a volatile put graph (including a
1853   // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1854   // When that child is not a card mark membar then it marks the end
1855   // of the volatile put or CAS subgraph. If the child is a card mark
1856   // membar then the normal subgraph will form part of a volatile put
1857   // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1858   // to a trailing barrier via a MergeMem. That feed is either direct
1859   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1860   // memory flow (for G1).












1861   //
1862   // The predicates controlling generation of instructions for store
1863   // and barrier nodes employ a few simple helper functions (described
1864   // below) which identify the presence or absence of all these
1865   // subgraph configurations and provide a means of traversing from
1866   // one node in the subgraph to another.
1867 
1868   // is_CAS(int opcode)
1869   //
1870   // return true if opcode is one of the possible CompareAndSwapX
1871   // values otherwise false.
1872 
1873   bool is_CAS(int opcode)
1874   {
1875     return (opcode == Op_CompareAndSwapI ||
1876             opcode == Op_CompareAndSwapL ||
1877             opcode == Op_CompareAndSwapN ||
1878             opcode == Op_CompareAndSwapP);
1879   }
1880 
1881   // leading_to_normal
1882   //
1883   //graph traversal helper which detects the normal case Mem feed from
1884   // a release membar (or, optionally, its cpuorder child) to a
1885   // dependent volatile membar i.e. it ensures that one or other of
1886   // the following Mem flow subgraph is present.
1887   //
1888   //   MemBarRelease
1889   //   MemBarCPUOrder {leading}
1890   //          |  \      . . .
1891   //          |  StoreN/P[mo_release]  . . .
1892   //          |   /
1893   //         MergeMem
1894   //          |
1895   //   MemBarVolatile {trailing or card mark}
1896   //
1897   //   MemBarRelease
1898   //   MemBarCPUOrder {leading}
1899   //      |       \      . . .
1900   //      |     CompareAndSwapX  . . .
1901   //               |
1902   //     . . .    SCMemProj
1903   //           \   |
1904   //      |    MergeMem
1905   //      |       /
1906   //    MemBarCPUOrder
1907   //    MemBarAcquire {trailing}
1908   //

















1909   // if the correct configuration is present returns the trailing
1910   // membar otherwise NULL.
1911   //
1912   // the input membar is expected to be either a cpuorder membar or a
1913   // release membar. in the latter case it should not have a cpu membar
1914   // child.
1915   //
1916   // the returned value may be a card mark or trailing membar
1917   //
1918 
1919   MemBarNode *leading_to_normal(MemBarNode *leading)
1920   {
1921     assert((leading->Opcode() == Op_MemBarRelease ||
1922             leading->Opcode() == Op_MemBarCPUOrder),
1923            "expecting a volatile or cpuroder membar!");
1924 
1925     // check the mem flow
1926     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1927 
1928     if (!mem) {
1929       return NULL;
1930     }
1931 
1932     Node *x = NULL;
1933     StoreNode * st = NULL;
1934     LoadStoreNode *cas = NULL;
1935     MergeMemNode *mm = NULL;

1936 
1937     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1938       x = mem->fast_out(i);
1939       if (x->is_MergeMem()) {
1940         if (mm != NULL) {


1941           return NULL;


1942         }
1943         // two merge mems is one too many
1944         mm = x->as_MergeMem();

1945       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1946         // two releasing stores/CAS nodes is one too many
1947         if (st != NULL || cas != NULL) {
1948           return NULL;
1949         }
1950         st = x->as_Store();
1951       } else if (is_CAS(x->Opcode())) {
1952         if (st != NULL || cas != NULL) {
1953           return NULL;
1954         }
1955         cas = x->as_LoadStore();
1956       }
1957     }
1958 
1959     // must have a store or a cas
1960     if (!st && !cas) {
1961       return NULL;
1962     }
1963 
1964     // must have a merge if we also have st
1965     if (st && !mm) {
1966       return NULL;
1967     }
1968 
1969     Node *y = NULL;
1970     if (cas) {

1971       // look for an SCMemProj
1972       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
1973         x = cas->fast_out(i);
1974         if (x->is_Proj()) {
1975           y = x;
1976           break;
1977         }
1978       }
1979       if (y == NULL) {
1980         return NULL;
1981       }
1982       // the proj must feed a MergeMem
1983       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
1984         x = y->fast_out(i);
1985         if (x->is_MergeMem()) {
1986           mm = x->as_MergeMem();
1987           break;
1988         }
1989       }
1990       if (mm == NULL)
1991         return NULL;


















1992     } else {
1993       // ensure the store feeds the existing mergemem;

1994       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1995         if (st->fast_out(i) == mm) {
1996           y = st;
1997           break;
1998         }
1999       }
2000       if (y == NULL) {
2001         return NULL;
2002       }











2003     }
2004 
2005     MemBarNode *mbar = NULL;
2006     // ensure the merge feeds to the expected type of membar
2007     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2008       x = mm->fast_out(i);
2009       if (x->is_MemBar()) {
2010         int opcode = x->Opcode();
2011         if (opcode == Op_MemBarVolatile && st) {
2012           mbar = x->as_MemBar();
2013         } else if (cas && opcode == Op_MemBarCPUOrder) {
2014           MemBarNode *y =  x->as_MemBar();
2015           y = child_membar(y);
2016           if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2017             mbar = y;
2018           }















2019         }
2020         break;
2021       }
2022     }
2023 





2024     return mbar;








2025   }
2026 
2027   // normal_to_leading
2028   //
2029   // graph traversal helper which detects the normal case Mem feed
2030   // from either a card mark or a trailing membar to a preceding
2031   // release membar (optionally its cpuorder child) i.e. it ensures
2032   // that one or other of the following Mem flow subgraphs is present.
2033   //
2034   //   MemBarRelease
2035   //   MemBarCPUOrder {leading}
2036   //          |  \      . . .
2037   //          |  StoreN/P[mo_release]  . . .
2038   //          |   /
2039   //         MergeMem
2040   //          |
2041   //   MemBarVolatile {card mark or trailing}
2042   //
2043   //   MemBarRelease
2044   //   MemBarCPUOrder {leading}
2045   //      |       \      . . .
2046   //      |     CompareAndSwapX  . . .
2047   //               |
2048   //     . . .    SCMemProj
2049   //           \   |
2050   //      |    MergeMem
2051   //      |        /
2052   //    MemBarCPUOrder
2053   //    MemBarAcquire {trailing}
2054   //
2055   // this predicate checks for the same flow as the previous predicate
2056   // but starting from the bottom rather than the top.
2057   //
2058   // if the configuration is present returns the cpuorder member for
2059   // preference or when absent the release membar otherwise NULL.
2060   //
2061   // n.b. the input membar is expected to be a MemBarVolatile but
2062   // need not be a card mark membar.

2063 
2064   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2065   {
2066     // input must be a volatile membar
2067     assert((barrier->Opcode() == Op_MemBarVolatile ||
2068             barrier->Opcode() == Op_MemBarAcquire),
2069            "expecting a volatile or an acquire membar");




2070     Node *x;
2071     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2072 
2073     // if we have an acquire membar then it must be fed via a CPUOrder
2074     // membar
2075 
2076     if (is_cas) {
2077       // skip to parent barrier which must be a cpuorder
2078       x = parent_membar(barrier);
2079       if (x->Opcode() != Op_MemBarCPUOrder)
2080         return NULL;
2081     } else {
2082       // start from the supplied barrier
2083       x = (Node *)barrier;
2084     }
2085 
2086     // the Mem feed to the membar should be a merge
2087     x = x ->in(TypeFunc::Memory);
2088     if (!x->is_MergeMem())
2089       return NULL;


2162     if (st == NULL) {
2163       // nothing more to check
2164       return leading;
2165     } else {
2166       // we should not have a store if we started from an acquire
2167       if (is_cas) {
2168         return NULL;
2169       }
2170 
2171       // the store should feed the merge we used to get here
2172       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2173         if (st->fast_out(i) == mm) {
2174           return leading;
2175         }
2176       }
2177     }
2178 
2179     return NULL;
2180   }
2181 
2182   // card_mark_to_trailing
2183   //
2184   // graph traversal helper which detects extra, non-normal Mem feed
2185   // from a card mark volatile membar to a trailing membar i.e. it
2186   // ensures that one of the following three GC post-write Mem flow
2187   // subgraphs is present.
2188   //
2189   // 1)
2190   //     . . .
2191   //       |
2192   //   MemBarVolatile (card mark)
2193   //      |          |
2194   //      |        StoreCM
2195   //      |          |
2196   //      |        . . .
2197   //  Bot |  /
2198   //   MergeMem
2199   //      |
2200   //      |
2201   //    MemBarVolatile {trailing}
2202   //
2203   // 2)
2204   //   MemBarRelease/CPUOrder (leading)
2205   //    |
2206   //    |
2207   //    |\       . . .
2208   //    | \        |
2209   //    |  \  MemBarVolatile (card mark)
2210   //    |   \   |     |
2211   //     \   \  |   StoreCM    . . .
2212   //      \   \ |
2213   //       \  Phi
2214   //        \ /
2215   //        Phi  . . .
2216   //     Bot |   /
2217   //       MergeMem
2218   //         |
2219   //    MemBarVolatile {trailing}
2220   //
2221   //
2222   // 3)
2223   //   MemBarRelease/CPUOrder (leading)
2224   //    |
2225   //    |\
2226   //    | \
2227   //    |  \      . . .
2228   //    |   \       |
2229   //    |\   \  MemBarVolatile (card mark)
2230   //    | \   \   |     |
2231   //    |  \   \  |   StoreCM    . . .
2232   //    |   \   \ |
2233   //     \   \  Phi
2234   //      \   \ /
2235   //       \  Phi
2236   //        \ /
2237   //        Phi  . . .
2238   //     Bot |   /
2239   //       MergeMem
2240   //         |
2241   //         |
2242   //    MemBarVolatile {trailing}
2243   //
2244   // configuration 1 is only valid if UseConcMarkSweepGC &&
2245   // UseCondCardMark
2246   //
2247   // configurations 2 and 3 are only valid if UseG1GC.
2248   //
2249   // if a valid configuration is present returns the trailing membar
2250   // otherwise NULL.
2251   //
2252   // n.b. the supplied membar is expected to be a card mark
2253   // MemBarVolatile i.e. the caller must ensure the input node has the
2254   // correct operand and feeds Mem to a StoreCM node
2255 
2256   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2257   {
2258     // input must be a card mark volatile membar
2259     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2260 
2261     Node *feed = barrier->proj_out(TypeFunc::Memory);
2262     Node *x;
2263     MergeMemNode *mm = NULL;
2264 
2265     const int MAX_PHIS = 3;     // max phis we will search through
2266     int phicount = 0;           // current search count
2267 
2268     bool retry_feed = true;
2269     while (retry_feed) {
2270       // see if we have a direct MergeMem feed
2271       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2272         x = feed->fast_out(i);
2273         // the correct Phi will be merging a Bot memory slice
2274         if (x->is_MergeMem()) {
2275           mm = x->as_MergeMem();
2276           break;
2277         }
2278       }
2279       if (mm) {
2280         retry_feed = false;
2281       } else if (UseG1GC & phicount++ < MAX_PHIS) {
2282         // the barrier may feed indirectly via one or two Phi nodes
2283         PhiNode *phi = NULL;
2284         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2285           x = feed->fast_out(i);
2286           // the correct Phi will be merging a Bot memory slice
2287           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2288             phi = x->as_Phi();
2289             break;
2290           }
2291         }
2292         if (!phi) {
2293           return NULL;
2294         }
2295         // look for another merge below this phi
2296         feed = phi;
2297       } else {
2298         // couldn't find a merge
2299         return NULL;
2300       }
2301     }
2302 
2303     // sanity check this feed turns up as the expected slice
2304     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2305 
2306     MemBarNode *trailing = NULL;
2307     // be sure we have a trailing membar the merge
2308     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2309       x = mm->fast_out(i);
2310       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2311         trailing = x->as_MemBar();
2312         break;
2313       }
2314     }
2315 
2316     return trailing;
2317   }
2318 
2319   // trailing_to_card_mark
2320   //
2321   // graph traversal helper which detects extra, non-normal Mem feed
2322   // from a trailing volatile membar to a preceding card mark volatile
2323   // membar i.e. it identifies whether one of the three possible extra
2324   // GC post-write Mem flow subgraphs is present
2325   //
2326   // this predicate checks for the same flow as the previous predicate
2327   // but starting from the bottom rather than the top.
2328   //
2329   // if the configuration is present returns the card mark membar
2330   // otherwise NULL
2331   //
2332   // n.b. the supplied membar is expected to be a trailing
2333   // MemBarVolatile i.e. the caller must ensure the input node has the
2334   // correct opcode
2335 
2336   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2337   {
2338     assert(trailing->Opcode() == Op_MemBarVolatile,
2339            "expecting a volatile membar");
2340     assert(!is_card_mark_membar(trailing),
2341            "not expecting a card mark membar");
2342 
2343     // the Mem feed to the membar should be a merge
2344     Node *x = trailing->in(TypeFunc::Memory);
2345     if (!x->is_MergeMem()) {
2346       return NULL;
2347     }
2348 
2349     MergeMemNode *mm = x->as_MergeMem();
2350 
2351     x = mm->in(Compile::AliasIdxBot);
2352     // with G1 we may possibly see a Phi or two before we see a Memory
2353     // Proj from the card mark membar
2354 
2355     const int MAX_PHIS = 3;     // max phis we will search through
2356     int phicount = 0;           // current search count
2357 
2358     bool retry_feed = !x->is_Proj();
2359 
2360     while (retry_feed) {
2361       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2362         PhiNode *phi = x->as_Phi();
2363         ProjNode *proj = NULL;
2364         PhiNode *nextphi = NULL;
2365         bool found_leading = false;
2366         for (uint i = 1; i < phi->req(); i++) {
2367           x = phi->in(i);
2368           if (x->is_Phi()) {
2369             nextphi = x->as_Phi();
2370           } else if (x->is_Proj()) {
2371             int opcode = x->in(0)->Opcode();
2372             if (opcode == Op_MemBarVolatile) {
2373               proj = x->as_Proj();
2374             } else if (opcode == Op_MemBarRelease ||
2375                        opcode == Op_MemBarCPUOrder) {
2376               // probably a leading membar
2377               found_leading = true;
2378             }
2379           }
2380         }
2381         // if we found a correct looking proj then retry from there
2382         // otherwise we must see a leading and a phi or this the
2383         // wrong config
2384         if (proj != NULL) {
2385           x = proj;
2386           retry_feed = false;
2387         } else if (found_leading && nextphi != NULL) {
2388           // retry from this phi to check phi2
2389           x = nextphi;
2390         } else {
2391           // not what we were looking for
2392           return NULL;
2393         }
2394       } else {
2395         return NULL;
2396       }
2397     }
2398     // the proj has to come from the card mark membar
2399     x = x->in(0);
2400     if (!x->is_MemBar()) {
2401       return NULL;
2402     }
2403 
2404     MemBarNode *card_mark_membar = x->as_MemBar();
2405 
2406     if (!is_card_mark_membar(card_mark_membar)) {
2407       return NULL;
2408     }
2409 
2410     return card_mark_membar;
2411   }
2412 
2413   // trailing_to_leading
2414   //
2415   // graph traversal helper which checks the Mem flow up the graph
2416   // from a (non-card mark) trailing membar attempting to locate and
2417   // return an associated leading membar. it first looks for a
2418   // subgraph in the normal configuration (relying on helper
2419   // normal_to_leading). failing that it then looks for one of the
2420   // possible post-write card mark subgraphs linking the trailing node
2421   // to a the card mark membar (relying on helper
2422   // trailing_to_card_mark), and then checks that the card mark membar
2423   // is fed by a leading membar (once again relying on auxiliary
2424   // predicate normal_to_leading).
2425   //
2426   // if the configuration is valid returns the cpuorder member for
2427   // preference or when absent the release membar otherwise NULL.
2428   //
2429   // n.b. the input membar is expected to be either a volatile or
2430   // acquire membar but in the former case must *not* be a card mark
2431   // membar.
2432 
2433   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2434   {
2435     assert((trailing->Opcode() == Op_MemBarAcquire ||
2436             trailing->Opcode() == Op_MemBarVolatile),
2437            "expecting an acquire or volatile membar");
2438     assert((trailing->Opcode() != Op_MemBarVolatile ||
2439             !is_card_mark_membar(trailing)),
2440            "not expecting a card mark membar");
2441 
2442     MemBarNode *leading = normal_to_leading(trailing);
2443 
2444     if (leading) {
2445       return leading;
2446     }
2447 
2448     // nothing more to do if this is an acquire
2449     if (trailing->Opcode() == Op_MemBarAcquire) {
2450       return NULL;
2451     }
2452 
2453     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2454 
2455     if (!card_mark_membar) {
2456       return NULL;
2457     }
2458 
2459     return normal_to_leading(card_mark_membar);
2460   }
2461 
2462   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2463 
2464 bool unnecessary_acquire(const Node *barrier)
2465 {
2466   assert(barrier->is_MemBar(), "expecting a membar");
2467 
2468   if (UseBarriersForVolatile) {
2469     // we need to plant a dmb
2470     return false;
2471   }
2472 
2473   // a volatile read derived from bytecode (or also from an inlined
2474   // SHA field read via LibraryCallKit::load_field_from_object)
2475   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2476   // with a bogus read dependency on it's preceding load. so in those
2477   // cases we will find the load node at the PARMS offset of the
2478   // acquire membar.  n.b. there may be an intervening DecodeN node.
2479   //
2480   // a volatile load derived from an inlined unsafe field access
2481   // manifests as a cpuorder membar with Ctl and Mem projections
2482   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2483   // acquire then feeds another cpuorder membar via Ctl and Mem


2658           n->Opcode() == Op_MemBarRelease),
2659          "expecting a release membar");
2660 
2661   if (UseBarriersForVolatile) {
2662     // we need to plant a dmb
2663     return false;
2664   }
2665 
2666   // if there is a dependent CPUOrder barrier then use that as the
2667   // leading
2668 
2669   MemBarNode *barrier = n->as_MemBar();
2670   // check for an intervening cpuorder membar
2671   MemBarNode *b = child_membar(barrier);
2672   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2673     // ok, so start the check from the dependent cpuorder barrier
2674     barrier = b;
2675   }
2676 
2677   // must start with a normal feed
2678   MemBarNode *child_barrier = leading_to_normal(barrier);
2679 
2680   if (!child_barrier) {
2681     return false;
2682   }
2683 
2684   if (!is_card_mark_membar(child_barrier)) {
2685     // this is the trailing membar and we are done
2686     return true;
2687   }
2688 
2689   // must be sure this card mark feeds a trailing membar
2690   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2691   return (trailing != NULL);
2692 }
2693 
2694 bool unnecessary_volatile(const Node *n)
2695 {
2696   // assert n->is_MemBar();
2697   if (UseBarriersForVolatile) {
2698     // we need to plant a dmb
2699     return false;
2700   }
2701 
2702   MemBarNode *mbvol = n->as_MemBar();
2703 
2704   // first we check if this is part of a card mark. if so then we have
2705   // to generate a StoreLoad barrier
2706 
2707   if (is_card_mark_membar(mbvol)) {
2708       return false;
2709   }
2710 
2711   // ok, if it's not a card mark then we still need to check if it is
2712   // a trailing membar of a volatile put hgraph.
2713 
2714   return (trailing_to_leading(mbvol) != NULL);
2715 }
2716 
2717 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2718 
2719 bool needs_releasing_store(const Node *n)
2720 {
2721   // assert n->is_Store();
2722   if (UseBarriersForVolatile) {
2723     // we use a normal store and dmb combination
2724     return false;
2725   }
2726 
2727   StoreNode *st = n->as_Store();
2728 
2729   // the store must be marked as releasing
2730   if (!st->is_release()) {
2731     return false;
2732   }


2742   ProjNode *proj = x->as_Proj();
2743 
2744   x = proj->lookup(0);
2745 
2746   if (!x || !x->is_MemBar()) {
2747     return false;
2748   }
2749 
2750   MemBarNode *barrier = x->as_MemBar();
2751 
2752   // if the barrier is a release membar or a cpuorder mmebar fed by a
2753   // release membar then we need to check whether that forms part of a
2754   // volatile put graph.
2755 
2756   // reject invalid candidates
2757   if (!leading_membar(barrier)) {
2758     return false;
2759   }
2760 
2761   // does this lead a normal subgraph?
2762   MemBarNode *mbvol = leading_to_normal(barrier);
2763 
2764   if (!mbvol) {
2765     return false;
2766   }
2767 
2768   // all done unless this is a card mark
2769   if (!is_card_mark_membar(mbvol)) {
2770     return true;
2771   }
2772 
2773   // we found a card mark -- just make sure we have a trailing barrier
2774 
2775   return (card_mark_to_trailing(mbvol) != NULL);
2776 }
2777 
2778 // predicate controlling translation of CAS
2779 //
2780 // returns true if CAS needs to use an acquiring load otherwise false
2781 
2782 bool needs_acquiring_load_exclusive(const Node *n)
2783 {
2784   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2785   if (UseBarriersForVolatile) {
2786     return false;
2787   }
2788 
2789   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2790 #ifdef ASSERT
2791   LoadStoreNode *st = n->as_LoadStore();
2792 
2793   // the store must be fed by a membar
2794 
2795   Node *x = st->lookup(StoreNode::Memory);


2797   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2798 
2799   ProjNode *proj = x->as_Proj();
2800 
2801   x = proj->lookup(0);
2802 
2803   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2804 
2805   MemBarNode *barrier = x->as_MemBar();
2806 
2807   // the barrier must be a cpuorder mmebar fed by a release membar
2808 
2809   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2810          "CAS not fed by cpuorder membar!");
2811 
2812   MemBarNode *b = parent_membar(barrier);
2813   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2814           "CAS not fed by cpuorder+release membar pair!");
2815 
2816   // does this lead a normal subgraph?
2817   MemBarNode *mbar = leading_to_normal(barrier);
2818 
2819   assert(mbar != NULL, "CAS not embedded in normal graph!");
2820 
2821   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2822 #endif // ASSERT
2823   // so we can just return true here
2824   return true;
2825 }
2826 
2827 // predicate controlling translation of StoreCM
2828 //
2829 // returns true if a StoreStore must precede the card write otherwise
2830 // false
2831 
2832 bool unnecessary_storestore(const Node *storecm)
2833 {
2834   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2835 
2836   // we only ever need to generate a dmb ishst between an object put
2837   // and the associated card mark when we are using CMS without
2838   // conditional card marking




2839 
2840   if (!UseConcMarkSweepGC || UseCondCardMark) {
2841     return true;
2842   }
2843 
2844   // if we are implementing volatile puts using barriers then the
2845   // object put as an str so we must insert the dmb ishst
2846 
2847   if (UseBarriersForVolatile) {
2848     return false;
2849   }
2850 
2851   // we can omit the dmb ishst if this StoreCM is part of a volatile
2852   // put because in thta case the put will be implemented by stlr
2853   //
2854   // we need to check for a normal subgraph feeding this StoreCM.
2855   // that means the StoreCM must be fed Memory from a leading membar,
2856   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2857   // leading membar must be part of a normal subgraph
2858 
2859   Node *x = storecm->in(StoreNode::Memory);
2860 
2861   if (!x->is_Proj()) {
2862     return false;
2863   }
2864 
2865   x = x->in(0);
2866 
2867   if (!x->is_MemBar()) {
2868     return false;
2869   }
2870 
2871   MemBarNode *leading = x->as_MemBar();
2872 
2873   // reject invalid candidates
2874   if (!leading_membar(leading)) {
2875     return false;
2876   }
2877 
2878   // we can omit the StoreStore if it is the head of a normal subgraph
2879   return (leading_to_normal(leading) != NULL);
2880 }
2881 
2882 
2883 #define __ _masm.
2884 
2885 // advance declarations for helper functions to convert register
2886 // indices to register objects
2887 
2888 // the ad file has to provide implementations of certain methods
2889 // expected by the generic code
2890 //
2891 // REQUIRED FUNCTIONALITY
2892 
2893 //=============================================================================
2894 
2895 // !!!!! Special hack to get all types of calls to specify the byte offset
2896 //       from the start of the call to the point where the return address
2897 //       will point.
2898 
2899 int MachCallStaticJavaNode::ret_addr_offset()




1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036 
1037   MemBarNode *parent_membar(const Node *n);
1038   MemBarNode *child_membar(const MemBarNode *n);
1039   bool leading_membar(const MemBarNode *barrier);
1040 
1041   bool is_card_mark_membar(const MemBarNode *barrier);
1042   bool is_CAS(int opcode);
1043 
1044   MemBarNode *leading_to_trailing(MemBarNode *leading);
1045   MemBarNode *card_mark_to_leading(const MemBarNode *barrier);


1046   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1047 
1048   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1049 
1050   bool unnecessary_acquire(const Node *barrier);
1051   bool needs_acquiring_load(const Node *load);
1052 
1053   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1054 
1055   bool unnecessary_release(const Node *barrier);
1056   bool unnecessary_volatile(const Node *barrier);
1057   bool needs_releasing_store(const Node *store);
1058 
1059   // predicate controlling translation of CompareAndSwapX
1060   bool needs_acquiring_load_exclusive(const Node *load);
1061 
1062   // predicate controlling translation of StoreCM
1063   bool unnecessary_storestore(const Node *storecm);
1064 %}
1065 


1403 
1404     return false;
1405   }
1406 
1407 
1408   // 3) helper predicates to traverse volatile put or CAS graphs which
1409   // may contain GC barrier subgraphs
1410 
1411   // Preamble
1412   // --------
1413   //
1414   // for volatile writes we can omit generating barriers and employ a
1415   // releasing store when we see a node sequence sequence with a
1416   // leading MemBarRelease and a trailing MemBarVolatile as follows
1417   //
1418   //   MemBarRelease
1419   //  {    ||        } -- optional
1420   //  {MemBarCPUOrder}
1421   //       ||       \\
1422   //       ||     StoreX[mo_release]
1423   //       | \ Bot    / ???
1424   //       | MergeMem
1425   //       | /
1426   //   MemBarVolatile
1427   //
1428   // where
1429   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1430   //  | \ and / indicate further routing of the Ctl and Mem feeds
1431   //
1432   // Note that the memory feed from the CPUOrder membar to the
1433   // MergeMem node is an AliasIdxBot slice while the feed from the
1434   // StoreX is for a slice determined by the type of value being
1435   // written.
1436   //
1437   // the diagram above shows the graph we see for non-object stores.
1438   // for a volatile Object store (StoreN/P) we may see other nodes
1439   // below the leading membar because of the need for a GC pre- or
1440   // post-write barrier.
1441   //
1442   // with most GC configurations we with see this simple variant which
1443   // includes a post-write barrier card mark.
1444   //
1445   //   MemBarRelease______________________________
1446   //         ||    \\               Ctl \        \\
1447   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1448   //         | \ Bot  / oop                 . . .  /
1449   //         | MergeMem
1450   //         | /
1451   //         ||      /
1452   //   MemBarVolatile
1453   //
1454   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1455   // the object address to an int used to compute the card offset) and
1456   // Ctl+Mem to a StoreB node (which does the actual card mark).
1457   //
1458   // n.b. a StoreCM node is only ever used when CMS (with or without
1459   // CondCardMark) or G1 is configured. This abstract instruction
1460   // differs from a normal card mark write (StoreB) because it implies
1461   // a requirement to order visibility of the card mark (StoreCM)
1462   // after that of the object put (StoreP/N) using a StoreStore memory
1463   // barrier. Note that this is /not/ a requirement to order the
1464   // instructions in the generated code (that is already guaranteed by
1465   // the order of memory dependencies). Rather it is a requirement to
1466   // ensure visibility order which only applies on architectures like
1467   // AArch64 which do not implement TSO. This ordering is required for
1468   // both non-volatile and volatile puts.
1469   //
1470   // That implies that we need to translate a StoreCM using the
1471   // sequence
1472   //
1473   //   dmb ishst
1474   //   stlrb
1475   //
1476   // This dmb cannot be omitted even when the associated StoreX or
1477   // CompareAndSwapX is implemented using stlr. However, as described
1478   // below there are circumstances where a specific GC configuration
1479   // requires a stronger barrier in which case it can be omitted.
1480   // 
1481   // With the Serial or Parallel GC using +CondCardMark the card mark
1482   // is performed conditionally on it currently being unmarked in
1483   // which case the volatile put graph looks slightly different




1484   //
1485   //   MemBarRelease____________________________________________
1486   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1487   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1488   //         | \ Bot / oop                          \            |
1489   //         | MergeMem                            . . .      StoreB
1490   //         | /                                                /
1491   //         ||     /
1492   //   MemBarVolatile
1493   //
1494   // It is worth noting at this stage that all the above
1495   // configurations can be uniquely identified by checking that the
1496   // memory flow includes the following subgraph:
1497   //
1498   //   MemBarRelease
1499   //  {MemBarCPUOrder}
1500   //      |  \      . . .
1501   //      |  StoreX[mo_release]  . . .
1502   //  Bot |   / oop
1503   //     MergeMem
1504   //      |
1505   //   MemBarVolatile
1506   //
1507   // This is referred to as a *normal* volatile store subgraph. It can
1508   // easily be detected starting from any candidate MemBarRelease,
1509   // StoreX[mo_release] or MemBarVolatile node.
1510   //
1511   // A small variation on this normal case occurs for an unsafe CAS
1512   // operation. The basic memory flow subgraph for a non-object CAS is
1513   // as follows
1514   //
1515   //   MemBarRelease
1516   //         ||
1517   //   MemBarCPUOrder
1518   //          |     \\   . . .
1519   //          |     CompareAndSwapX
1520   //          |       |
1521   //      Bot |     SCMemProj
1522   //           \     / Bot
1523   //           MergeMem
1524   //           /
1525   //   MemBarCPUOrder
1526   //         ||
1527   //   MemBarAcquire
1528   //
1529   // The same basic variations on this arrangement (mutatis mutandis)
1530   // occur when a card mark is introduced. i.e. the CPUOrder MemBar
1531   // feeds the extra CastP2X, LoadB etc nodes but the above memory
1532   // flow subgraph is still present.

1533   // 
1534   // This is referred to as a *normal* CAS subgraph. It can easily be
1535   // detected starting from any candidate MemBarRelease,
1536   // StoreX[mo_release] or MemBarAcquire node.













1537   //
1538   // The code below uses two helper predicates, leading_to_trailing
1539   // and trailing_to_leading to identify these normal graphs, one
1540   // validating the layout starting from the top membar and searching
1541   // down and the other validating the layout starting from the lower
1542   // membar and searching up.
1543   //
1544   // There are two special case GC configurations when the simple
1545   // normal graphs above may not be generated: when using G1 (which
1546   // always employs a conditional card mark); and when using CMS with
1547   // conditional card marking (+CondCardMark) configured. These GCs
1548   // are both concurrent rather than stop-the world GCs. So they
1549   // introduce extra Ctl+Mem flow into the graph between the leading
1550   // and trailing membar nodes, in particular enforcing stronger
1551   // memory serialisation beween the object put and the corresponding
1552   // conditional card mark. CMS employs a post-write GC barrier while
1553   // G1 employs both a pre- and post-write GC barrier.
1554   //
1555   // The post-write barrier subgraph for these configurations includes
1556   // a MemBarVolatile node -- referred to as a card mark membar --
1557   // which is needed to order the card write (StoreCM) operation in
1558   // the barrier, the preceding StoreX (or CompareAndSwapX) and Store
1559   // operations performed by GC threads i.e. a card mark membar
1560   // constitutes a StoreLoad barrier hence must be translated to a dmb
1561   // ish (whether or not it sits inside a volatile store sequence).
1562   //
1563   // Of course, the use of the dmb ish for the card mark membar also
1564   // implies theat the StoreCM which follows can omit the dmb ishst
1565   // instruction. The necessary visibility ordering will already be
1566   // guaranteed by the dmb ish. In sum, the dmb ishst instruction only
1567   // needs to be generated for as part of the StoreCM sequence with GC
1568   // configuration +CMS -CondCardMark.
1569   // 
1570   // Of course all these extra barrier nodes may well be absent --
1571   // they are only inserted for object puts. Their potential presence
1572   // significantly complicates the task of identifying whether a
1573   // MemBarRelease, StoreX[mo_release], MemBarVolatile or
1574   // MemBarAcquire forms part of a volatile put or CAS when using
1575   // these GC configurations (see below) and also complicates the
1576   // decision as to how to translate a MemBarVolatile and StoreCM.
1577   //
1578   // So, thjis means that a card mark MemBarVolatile occurring in the
1579   // post-barrier graph it needs to be distinguished from a normal
1580   // trailing MemBarVolatile. Resolving this is straightforward: a
1581   // card mark MemBarVolatile always projects a Mem feed to a StoreCM
1582   // node and that is a unique marker
1583   //
1584   //      MemBarVolatile (card mark)
1585   //       C |    \     . . .
1586   //         |   StoreCM   . . .
1587   //       . . .
1588   //
1589   // Returning to the task of translating the object put and the
1590   // leading/trailing membar nodes: what do the node graphs look like
1591   // for these 2 special cases? and how can we determine the status of
1592   // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both
1593   // normal and non-normal cases?











1594   //
1595   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1596   // which selects conditonal execution based on the value loaded
1597   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1598   // intervening StoreLoad barrier (MemBarVolatile).
1599   //
1600   // So, with CMS we may see a node graph for a volatile object store
1601   // which looks like this
1602   //
1603   //   MemBarRelease
1604   //   MemBarCPUOrder_(leading)____________________
1605   //     C |  | M \       \\               M |   C \
1606   //       |  |    \    StoreN/P[mo_release] |  CastP2X
1607   //       |  | Bot \    / oop      \        |
1608   //       |  |    MergeMem          \      / 
1609   //       |  |      /                |    /
1610   //     MemBarVolatile (card mark)   |   /
1611   //     C |  ||    M |               |  /
1612   //       | LoadB    | Bot       oop | / Bot
1613   //       |   |      |              / /
1614   //       | Cmp      |\            / /
1615   //       | /        | \          / /
1616   //       If         |  \        / /
1617   //       | \        |   \      / /
1618   // IfFalse  IfTrue  |    \    / /
1619   //       \     / \  |    |   / /
1620   //        \   / StoreCM  |  / /
1621   //         \ /      \   /  / /
1622   //        Region     Phi  / /
1623   //          | \   Raw |  / /
1624   //          |  . . .  | / /
1625   //          |       MergeMem
1626   //          |           |
1627   //        MemBarVolatile (trailing)
1628   //
1629   // Notice that there are two MergeMem nodes below the leading
1630   // membar. The first MergeMem merges the AliasIdxBot Mem slice from
1631   // the leading membar and the oopptr Mem slice from the Store into
1632   // the card mark membar. The trailing MergeMem merges the
1633   // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw
1634   // slice from the StoreCM and an oop slice from the StoreN/P node
1635   // into the trailing membar (n.b. the raw slice proceeds via a Phi
1636   // associated with the If region).
1637   //
1638   // So, in the case of CMS + CondCardMark the volatile object store
1639   // graph still includes a normal volatile store subgraph from the
1640   // leading membar to the trailing membar. However, it also contains
1641   // the same shape memory flow to the card mark membar. The two flows
1642   // can be distinguished by testing whether or not the downstream
1643   // membar is a card mark membar.
1644   //
1645   // The graph for a CAS also varies with CMS + CondCardMark, in
1646   // particular employing a control feed from the CompareAndSwapX node
1647   // through a CmpI and If to the card mark membar and StoreCM which
1648   // updates the associated card. This avoids executing the card mark
1649   // if the CAS fails. However, it can be seen from the diagram below
1650   // that the presence of the barrier does not alter the normal CAS
1651   // memory subgraph where the leading membar feeds a CompareAndSwapX,
1652   // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and
1653   // MemBarAcquire pair.
1654   //
1655   //   MemBarRelease
1656   //   MemBarCPUOrder__(leading)_______________________
1657   //   C /  M |                        \\            C \
1658   //  . . .   | Bot                CompareAndSwapN/P   CastP2X
1659   //          |                  C /  M |
1660   //          |                 CmpI    |
1661   //          |                  /      |
1662   //          |               . . .     |
1663   //          |              IfTrue     |
1664   //          |              /          |
1665   //       MemBarVolatile (card mark)   |
1666   //        C |  ||    M |              |
1667   //          | LoadB    | Bot   ______/|
1668   //          |   |      |      /       |
1669   //          | Cmp      |     /      SCMemProj
1670   //          | /        |    /         |
1671   //          If         |   /         /
1672   //          | \        |  /         / Bot
1673   //     IfFalse  IfTrue | /         /
1674   //          |   / \   / / prec    /
1675   //   . . .  |  /  StoreCM        /
1676   //        \ | /      | raw      /
1677   //        Region    . . .      /
1678   //           | \              /
1679   //           |   . . .   \    / Bot
1680   //           |        MergeMem
1681   //           |          /
1682   //         MemBarCPUOrder
1683   //         MemBarAcquire (trailing)
1684   //
1685   // This has a slightly different memory subgraph to the one seen
1686   // previously but the core of it has a similar memory flow to the
1687   // CAS normal subgraph:
1688   //
1689   //   MemBarRelease
1690   //   MemBarCPUOrder____
1691   //         |          \      . . .
1692   //         |       CompareAndSwapX  . . .
1693   //         |       C /  M |
1694   //         |      CmpI    |
1695   //         |       /      |
1696   //         |      . .    /
1697   //     Bot |   IfTrue   /
1698   //         |   /       /
1699   //    MemBarVolatile  /
1700   //         | ...     /
1701   //      StoreCM ... /
1702   //         |       / 
1703   //       . . .  SCMemProj
1704   //      Raw \    / Bot
1705   //        MergeMem
1706   //           |
1707   //   MemBarCPUOrder
1708   //   MemBarAcquire
1709   //
1710   // The G1 graph for a volatile object put is a lot more complicated.
1711   // Nodes inserted on behalf of G1 may comprise: a pre-write graph
1712   // which adds the old value to the SATB queue; the releasing store
1713   // itself; and, finally, a post-write graph which performs a card
1714   // mark.
1715   //
1716   // The pre-write graph may be omitted, but only when the put is
1717   // writing to a newly allocated (young gen) object and then only if
1718   // there is a direct memory chain to the Initialize node for the
1719   // object allocation. This will not happen for a volatile put since
1720   // any memory chain passes through the leading membar.
1721   //
1722   // The pre-write graph includes a series of 3 If tests. The outermost
1723   // If tests whether SATB is enabled (no else case). The next If tests
1724   // whether the old value is non-NULL (no else case). The third tests
1725   // whether the SATB queue index is > 0, if so updating the queue. The
1726   // else case for this third If calls out to the runtime to allocate a
1727   // new queue buffer.
1728   //
1729   // So with G1 the pre-write and releasing store subgraph looks like
1730   // this (the nested Ifs are omitted).
1731   //
1732   //  MemBarRelease (leading)____________
1733   //     C |  ||  M \   M \    M \  M \ . . .
1734   //       | LoadB   \  LoadL  LoadN   \
1735   //       | /        \                 \
1736   //       If         |\                 \
1737   //       | \        | \                 \
1738   //  IfFalse  IfTrue |  \                 \
1739   //       |     |    |   \                 |
1740   //       |     If   |   /\                |
1741   //       |     |          \               |
1742   //       |                 \              |
1743   //       |    . . .         \             |
1744   //       | /       | /       |            |
1745   //      Region  Phi[M]       |            |
1746   //       | \       |         |            |
1747   //       |  \_____ | ___     |            |
1748   //     C | C \     |   C \ M |            |
1749   //       | CastP2X | StoreN/P[mo_release] |
1750   //       |         |         |            |
1751   //     C |       M |       M |          M |
1752   //        \        | Raw     | oop       / Bot
1753   //                  . . .
1754   //          (post write subtree elided)
1755   //                    . . .
1756   //             C \         M /
1757   //         MemBarVolatile (trailing)
1758   //
1759   // Note that the three memory feeds into the post-write tree are an
1760   // AliasRawIdx slice associated with the writes in the pre-write
1761   // tree, an oop type slice from the StoreX specific to the type of
1762   // the volatile field and the AliasBotIdx slice emanating from the
1763   // leading membar.
1764   //
1765   // n.b. the LoadB in this subgraph is not the card read -- it's a
1766   // read of the SATB queue active flag.
1767   //
1768   // The CAS graph is once again a variant of the above with a
1769   // CompareAndSwapX node and SCMemProj in place of the StoreX.  The
1770   // value from the CompareAndSwapX node is fed into the post-write
1771   // graph aling with the AliasIdxRaw feed from the pre-barrier and
1772   // the AliasIdxBot feeds from the leading membar and the ScMemProj.
1773   //
1774   //  MemBarRelease (leading)____________
1775   //     C |  ||  M \   M \    M \  M \ . . .
1776   //       | LoadB   \  LoadL  LoadN   \
1777   //       | /        \                 \
1778   //       If         |\                 \
1779   //       | \        | \                 \
1780   //  IfFalse  IfTrue |  \                 \
1781   //       |     |    |   \                 \
1782   //       |     If   |    \                 |
1783   //       |     |          \                |
1784   //       |                 \               |
1785   //       |    . . .         \              |
1786   //       | /       | /       \             |
1787   //      Region  Phi[M]        \            |
1788   //       | \       |           \           |
1789   //       |  \_____ |            |          |
1790   //     C | C \     |            |          |
1791   //       | CastP2X |     CompareAndSwapX   |
1792   //       |         |   res |     |         |
1793   //     C |       M |       |  SCMemProj  M |
1794   //        \        | Raw   |     | Bot    / Bot
1795   //                  . . .
1796   //          (post write subtree elided)
1797   //                    . . .
1798   //             C \         M /
1799   //         MemBarVolatile (trailing)
1800   //
1801   // The G1 post-write subtree is also optional, this time when the
1802   // new value being written is either null or can be identified as a
1803   // newly allocated (young gen) object with no intervening control
1804   // flow. The latter cannot happen but the former may, in which case
1805   // the card mark membar is omitted and the memory feeds from the
1806   // leading membar and the SToreN/P are merged direct into the
1807   // trailing membar as per the normal subgraph. So, the only special
1808   // case which arises is when the post-write subgraph is generated.
1809   //
1810   // The kernel of the post-write G1 subgraph is the card mark itself
1811   // which includes a card mark memory barrier (MemBarVolatile), a
1812   // card test (LoadB), and a conditional update (If feeding a
1813   // StoreCM). These nodes are surrounded by a series of nested Ifs
1814   // which try to avoid doing the card mark. The top level If skips if
1815   // the object reference does not cross regions (i.e. it tests if
1816   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1817   // need not be recorded. The next If, which skips on a NULL value,
1818   // may be absent (it is not generated if the type of value is >=
1819   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1820   // checking if card_val != young).  n.b. although this test requires
1821   // a pre-read of the card it can safely be done before the StoreLoad
1822   // barrier. However that does not bypass the need to reread the card
1823   // after the barrier.
1824   //
1825   //                (pre-write subtree elided)
1826   //        . . .                  . . .    . . .  . . .
1827   //        C |               M |    M |    M |
1828   //       Region            Phi[M] StoreN    |
1829   //          |            Raw  |  oop |  Bot |
1830   //         / \_______         |\     |\     |\
1831   //      C / C \      . . .    | \    | \    | \
1832   //       If   CastP2X . . .   |  \   |  \   |  \
1833   //       / \                  |   \  |   \  |   \
1834   //      /   \                 |    \ |    \ |    \
1835   // IfFalse IfTrue             |      |      |     \
1836   //   |       |                 \     |     /       |
1837   //   |       If                 \    | \  /   \    |
1838   //   |      / \                  \   |   /     \   |
1839   //   |     /   \                  \  |  / \     |  |
1840   //   | IfFalse IfTrue           MergeMem   \    |  |
1841   //   |  . . .    / \                 |      \   |  |
1842   //   |          /   \                |       |  |  |
1843   //   |     IfFalse IfTrue            |       |  |  |
1844   //   |      . . .    |               |       |  |  |
1845   //   |               If             /        |  |  |
1846   //   |               / \           /         |  |  |
1847   //   |              /   \         /          |  |  |
1848   //   |         IfFalse IfTrue    /           |  |  |
1849   //   |           . . .   |      /            |  |  |
1850   //   |                    \    /             |  |  |
1851   //   |                     \  /              |  |  |
1852   //   |         MemBarVolatile__(card mark  ) |  |  |
1853   //   |              ||   C |     \           |  |  |
1854   //   |             LoadB   If     |         /   |  |
1855   //   |                    / \ Raw |        /   /  /
1856   //   |                   . . .    |       /   /  /
1857   //   |                        \   |      /   /  /
1858   //   |                        StoreCM   /   /  /
1859   //   |                           |     /   /  /
1860   //   |                            . . .   /  /
1861   //   |                                   /  /
1862   //   |   . . .                          /  /
1863   //   |    |             | /            /  /
1864   //   |    |           Phi[M] /        /  /
1865   //   |    |             |   /        /  /
1866   //   |    |             |  /        /  /
1867   //   |  Region  . . .  Phi[M]      /  /
1868   //   |    |             |         /  /
1869   //    \   |             |        /  /
1870   //     \  | . . .       |       /  /
1871   //      \ |             |      /  /
1872   //      Region         Phi[M] /  /
1873   //        |               \  /  /


1874   //         \             MergeMem
1875   //          \            /
1876   //          MemBarVolatile
1877   //
1878   // As with CMS + CondCardMark the first MergeMem merges the
1879   // AliasIdxBot Mem slice from the leading membar and the oopptr Mem
1880   // slice from the Store into the card mark membar. However, in this
1881   // case it may also merge an AliasRawIdx mem slice from the pre
1882   // barrier write.
1883   //
1884   // The trailing MergeMem merges an AliasIdxBot Mem slice from the
1885   // leading membar with an oop slice from the StoreN and an
1886   // AliasRawIdx slice from the post barrier writes. In this case the
1887   // AliasIdxRaw Mem slice is merged through a series of Phi nodes
1888   // which combine feeds from the If regions in the post barrier
1889   // subgraph.
1890   //
1891   // So, for G1 the same characteristic subgraph arises as for CMS +
1892   // CondCardMark. There is a normal subgraph feeding the card mark
1893   // membar and a normal subgraph feeding the trailing membar.
1894   //
1895   // The CAS graph when using G1GC also includes an optional
1896   // post-write subgraph. It is very similar to the above graph except
1897   // for a few details.
1898   // 
1899   // - The control flow is gated by an additonal If which tests the
1900   // result from the CompareAndSwapX node
1901   // 
1902   //  - The MergeMem which feeds the card mark membar only merges the
1903   // AliasIdxBot slice from the leading membar and the AliasIdxRaw
1904   // slice from the pre-barrier. It does not merge the SCMemProj
1905   // AliasIdxBot slice. So, this subgraph does not look like the
1906   // normal CAS subgraph.
1907   //
1908   // - The MergeMem which feeds the trailing membar merges the
1909   // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice
1910   // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it
1911   // has two AliasIdxBot input slices. However, this subgraph does
1912   // still look like the normal CAS subgraph.
1913   //
1914   // So, the upshot is:
1915   //
1916   // In all cases a volatile put graph will include a *normal*
1917   // volatile store subgraph betwen the leading membar and the
1918   // trailing membar. It may also include a normal volatile store
1919   // subgraph betwen the leading membar and the card mark membar.
1920   //
1921   // In all cases a CAS graph will contain a unique normal CAS graph
1922   // feeding the trailing membar.
1923   //
1924   // In all cases where there is a card mark membar (either as part of
1925   // a volatile object put or CAS) it will be fed by a MergeMem whose
1926   // AliasIdxBot slice feed will be a leading membar.
1927   //
1928   // The predicates controlling generation of instructions for store
1929   // and barrier nodes employ a few simple helper functions (described
1930   // below) which identify the presence or absence of all these
1931   // subgraph configurations and provide a means of traversing from
1932   // one node in the subgraph to another.
1933 
1934   // is_CAS(int opcode)
1935   //
1936   // return true if opcode is one of the possible CompareAndSwapX
1937   // values otherwise false.
1938 
1939   bool is_CAS(int opcode)
1940   {
1941     return (opcode == Op_CompareAndSwapI ||
1942             opcode == Op_CompareAndSwapL ||
1943             opcode == Op_CompareAndSwapN ||
1944             opcode == Op_CompareAndSwapP);
1945   }
1946 
1947   // leading_to_trailing
1948   //
1949   //graph traversal helper which detects the normal case Mem feed from
1950   // a release membar (or, optionally, its cpuorder child) to a
1951   // dependent volatile membar i.e. it ensures that one or other of
1952   // the following Mem flow subgraph is present.
1953   //
1954   //   MemBarRelease {leading}
1955   //   {MemBarCPUOrder} {optional}
1956   //     Bot |  \      . . .
1957   //         |  StoreN/P[mo_release]  . . .
1958   //         |   /
1959   //        MergeMem
1960   //         |
1961   //   MemBarVolatile {not card mark}
1962   //
1963   //   MemBarRelease {leading}
1964   //   {MemBarCPUOrder} {optional}
1965   //      |       \      . . .
1966   //      |     CompareAndSwapX  . . .
1967   //               |
1968   //     . . .    SCMemProj
1969   //           \   |
1970   //      |    MergeMem
1971   //      |       /
1972   //    MemBarCPUOrder
1973   //    MemBarAcquire {trailing}
1974   //
1975   // the predicate needs to be capable of distinguishing the following
1976   // volatile put graph which may arises when a GC post barrier
1977   // inserts a card mark membar
1978   //
1979   //   MemBarRelease {leading}
1980   //   {MemBarCPUOrder}__
1981   //     Bot |   \       \
1982   //         |   StoreN/P \
1983   //         |    / \     |
1984   //        MergeMem \    |
1985   //         |        \   |
1986   //   MemBarVolatile  \  |
1987   //    {card mark}     \ |
1988   //                  MergeMem
1989   //                      |
1990   // {not card mark} MemBarVolatile
1991   //
1992   // if the correct configuration is present returns the trailing
1993   // membar otherwise NULL.
1994   //
1995   // the input membar is expected to be either a cpuorder membar or a
1996   // release membar. in the latter case it should not have a cpu membar
1997   // child.
1998   //
1999   // the returned value may be a card mark or trailing membar
2000   //
2001 
2002   MemBarNode *leading_to_trailing(MemBarNode *leading)
2003   {
2004     assert((leading->Opcode() == Op_MemBarRelease ||
2005             leading->Opcode() == Op_MemBarCPUOrder),
2006            "expecting a volatile or cpuroder membar!");
2007 
2008     // check the mem flow
2009     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2010 
2011     if (!mem) {
2012       return NULL;
2013     }
2014 
2015     Node *x = NULL;
2016     StoreNode * st = NULL;
2017     LoadStoreNode *cas = NULL;
2018     MergeMemNode *mm = NULL;
2019     MergeMemNode *mm2 = NULL;
2020 
2021     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2022       x = mem->fast_out(i);
2023       if (x->is_MergeMem()) {
2024         if (mm != NULL) {
2025           if (mm2 != NULL) {
2026           // should not see more than 2 merge mems
2027             return NULL;
2028           } else {
2029             mm2 = x->as_MergeMem();
2030           }
2031         } else {
2032           mm = x->as_MergeMem();
2033         }
2034       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2035         // two releasing stores/CAS nodes is one too many
2036         if (st != NULL || cas != NULL) {
2037           return NULL;
2038         }
2039         st = x->as_Store();
2040       } else if (is_CAS(x->Opcode())) {
2041         if (st != NULL || cas != NULL) {
2042           return NULL;
2043         }
2044         cas = x->as_LoadStore();
2045       }
2046     }
2047 
2048     // must have a store or a cas
2049     if (!st && !cas) {
2050       return NULL;
2051     }
2052 
2053     // must have at least one merge if we also have st
2054     if (st && !mm) {
2055       return NULL;
2056     }
2057 

2058     if (cas) {
2059       Node *y = NULL;
2060       // look for an SCMemProj
2061       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2062         x = cas->fast_out(i);
2063         if (x->is_Proj()) {
2064           y = x;
2065           break;
2066         }
2067       }
2068       if (y == NULL) {
2069         return NULL;
2070       }
2071       // the proj must feed a MergeMem
2072       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2073         x = y->fast_out(i);
2074         if (x->is_MergeMem()) {
2075           mm = x->as_MergeMem();
2076           break;
2077         }
2078       }
2079       if (mm == NULL) {
2080         return NULL;
2081       }
2082       MemBarNode *mbar = NULL;
2083       // ensure the merge feeds a trailing membar cpuorder + acquire pair
2084       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2085         x = mm->fast_out(i);
2086         if (x->is_MemBar()) {
2087           int opcode = x->Opcode();
2088           if (opcode == Op_MemBarCPUOrder) {
2089             MemBarNode *z =  x->as_MemBar();
2090             z = child_membar(z);
2091             if (z != NULL && z->Opcode() == Op_MemBarAcquire) {
2092               mbar = z;
2093             }
2094           }
2095           break;
2096         }
2097       }
2098       return mbar;
2099     } else {
2100       Node *y = NULL;
2101       // ensure the store feeds the first mergemem;
2102       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2103         if (st->fast_out(i) == mm) {
2104           y = st;
2105           break;
2106         }
2107       }
2108       if (y == NULL) {
2109         return NULL;
2110       }
2111       if (mm2 != NULL) {
2112         // ensure the store feeds the second mergemem;
2113         y = NULL;
2114         for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2115           if (st->fast_out(i) == mm2) {
2116             y = st;
2117           }
2118         }
2119         if (y == NULL) {
2120           return NULL;
2121         }
2122       }
2123 
2124       MemBarNode *mbar = NULL;
2125       // ensure the first mergemem feeds a volatile membar
2126       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2127         x = mm->fast_out(i);
2128         if (x->is_MemBar()) {
2129           int opcode = x->Opcode();
2130           if (opcode == Op_MemBarVolatile) {
2131             mbar = x->as_MemBar();





2132           }
2133           break;
2134         }
2135       }
2136       if (mm2 == NULL) {
2137         // this is our only option for a trailing membar
2138         return mbar;
2139       }
2140       // ensure the second mergemem feeds a volatile membar
2141       MemBarNode *mbar2 = NULL;
2142       for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) {
2143         x = mm2->fast_out(i);
2144         if (x->is_MemBar()) {
2145           int opcode = x->Opcode();
2146           if (opcode == Op_MemBarVolatile) {
2147             mbar2 = x->as_MemBar();
2148           }
2149           break;
2150         }
2151       }
2152       // if we have two merge mems we must have two volatile membars
2153       if (mbar == NULL || mbar2 == NULL) {
2154         return NULL;
2155       }
2156       // return the trailing membar
2157       if (is_card_mark_membar(mbar2)) {
2158         return mbar;
2159       } else {
2160         if (is_card_mark_membar(mbar)) {
2161           return mbar2;
2162         } else {
2163           return NULL;
2164         }
2165       }
2166     }
2167   }
2168 
2169   // trailing_to_leading
2170   //
2171   // graph traversal helper which detects the normal case Mem feed
2172   // from a trailing membar to a preceding release membar (optionally
2173   // its cpuorder child) i.e. it ensures that one or other of the
2174   // following Mem flow subgraphs is present.
2175   //
2176   //   MemBarRelease {leading}
2177   //   MemBarCPUOrder {optional}
2178   //    | Bot |  \      . . .
2179   //    |     |  StoreN/P[mo_release]  . . .
2180   //    |     |   /
2181   //    |    MergeMem
2182   //    |     |
2183   //   MemBarVolatile {not card mark}
2184   //
2185   //   MemBarRelease {leading}
2186   //   MemBarCPUOrder {optional}
2187   //      |       \      . . .
2188   //      |     CompareAndSwapX  . . .
2189   //               |
2190   //     . . .    SCMemProj
2191   //           \   |
2192   //      |    MergeMem
2193   //      |       |
2194   //    MemBarCPUOrder
2195   //    MemBarAcquire {trailing}
2196   //
2197   // this predicate checks for the same flow as the previous predicate
2198   // but starting from the bottom rather than the top.
2199   //
2200   // if the configuration is present returns the cpuorder member for
2201   // preference or when absent the release membar otherwise NULL.
2202   //
2203   // n.b. the input membar is expected to be a MemBarVolatile or
2204   // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card
2205   // mark membar.
2206 
2207   MemBarNode *trailing_to_leading(const MemBarNode *barrier)
2208   {
2209     // input must be a volatile membar
2210     assert((barrier->Opcode() == Op_MemBarVolatile ||
2211             barrier->Opcode() == Op_MemBarAcquire),
2212            "expecting a volatile or an acquire membar");
2213 
2214     assert((barrier->Opcode() != Op_MemBarVolatile) ||
2215            !is_card_mark_membar(barrier),
2216            "not expecting a card mark membar");
2217     Node *x;
2218     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2219 
2220     // if we have an acquire membar then it must be fed via a CPUOrder
2221     // membar
2222 
2223     if (is_cas) {
2224       // skip to parent barrier which must be a cpuorder
2225       x = parent_membar(barrier);
2226       if (x->Opcode() != Op_MemBarCPUOrder)
2227         return NULL;
2228     } else {
2229       // start from the supplied barrier
2230       x = (Node *)barrier;
2231     }
2232 
2233     // the Mem feed to the membar should be a merge
2234     x = x ->in(TypeFunc::Memory);
2235     if (!x->is_MergeMem())
2236       return NULL;


2309     if (st == NULL) {
2310       // nothing more to check
2311       return leading;
2312     } else {
2313       // we should not have a store if we started from an acquire
2314       if (is_cas) {
2315         return NULL;
2316       }
2317 
2318       // the store should feed the merge we used to get here
2319       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2320         if (st->fast_out(i) == mm) {
2321           return leading;
2322         }
2323       }
2324     }
2325 
2326     return NULL;
2327   }
2328 
2329   // card_mark_to_leading
2330   //
2331   // graph traversal helper which traverses from a card mark volatile
2332   // membar to a leading membar i.e. it ensures that the following Mem
2333   // flow subgraph is present.

2334   //
2335   //    MemBarRelease {leading}
2336   //   {MemBarCPUOrder} {optional}





2337   //         |   . . .
2338   //     Bot |   /
2339   //      MergeMem
2340   //         |
2341   //     MemBarVolatile (card mark)

























2342   //        |     \
2343   //      . . .   StoreCM


















2344   //
2345   // if the configuration is present returns the cpuorder member for
2346   // preference or when absent the release membar otherwise NULL.


2347   //
2348   // n.b. the input membar is expected to be a MemBarVolatile amd must
2349   // be a card mark membar.

2350 
2351   MemBarNode *card_mark_to_leading(const MemBarNode *barrier)
2352   {
2353     // input must be a card mark volatile membar
2354     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2355 


















































































2356     // the Mem feed to the membar should be a merge
2357     Node *x = barrier->in(TypeFunc::Memory);
2358     if (!x->is_MergeMem()) {
2359       return NULL;
2360     }
2361 
2362     MergeMemNode *mm = x->as_MergeMem();
2363 
2364     x = mm->in(Compile::AliasIdxBot);





2365 










































2366     if (!x->is_MemBar()) {
2367       return NULL;
2368     }
2369 
2370     MemBarNode *leading = x->as_MemBar();






































2371 
2372     if (leading_membar(leading)) {
2373       return leading;
2374     }
2375 








2376     return NULL;
2377   }
2378 





2379 bool unnecessary_acquire(const Node *barrier)
2380 {
2381   assert(barrier->is_MemBar(), "expecting a membar");
2382 
2383   if (UseBarriersForVolatile) {
2384     // we need to plant a dmb
2385     return false;
2386   }
2387 
2388   // a volatile read derived from bytecode (or also from an inlined
2389   // SHA field read via LibraryCallKit::load_field_from_object)
2390   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2391   // with a bogus read dependency on it's preceding load. so in those
2392   // cases we will find the load node at the PARMS offset of the
2393   // acquire membar.  n.b. there may be an intervening DecodeN node.
2394   //
2395   // a volatile load derived from an inlined unsafe field access
2396   // manifests as a cpuorder membar with Ctl and Mem projections
2397   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2398   // acquire then feeds another cpuorder membar via Ctl and Mem


2573           n->Opcode() == Op_MemBarRelease),
2574          "expecting a release membar");
2575 
2576   if (UseBarriersForVolatile) {
2577     // we need to plant a dmb
2578     return false;
2579   }
2580 
2581   // if there is a dependent CPUOrder barrier then use that as the
2582   // leading
2583 
2584   MemBarNode *barrier = n->as_MemBar();
2585   // check for an intervening cpuorder membar
2586   MemBarNode *b = child_membar(barrier);
2587   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2588     // ok, so start the check from the dependent cpuorder barrier
2589     barrier = b;
2590   }
2591 
2592   // must start with a normal feed
2593   MemBarNode *trailing = leading_to_trailing(barrier);




2594 







2595   return (trailing != NULL);
2596 }
2597 
2598 bool unnecessary_volatile(const Node *n)
2599 {
2600   // assert n->is_MemBar();
2601   if (UseBarriersForVolatile) {
2602     // we need to plant a dmb
2603     return false;
2604   }
2605 
2606   MemBarNode *mbvol = n->as_MemBar();
2607 
2608   // first we check if this is part of a card mark. if so then we have
2609   // to generate a StoreLoad barrier
2610 
2611   if (is_card_mark_membar(mbvol)) {
2612       return false;
2613   }
2614 
2615   // ok, if it's not a card mark then we still need to check if it is
2616   // a trailing membar of a volatile put graph.
2617 
2618   return (trailing_to_leading(mbvol) != NULL);
2619 }
2620 
2621 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2622 
2623 bool needs_releasing_store(const Node *n)
2624 {
2625   // assert n->is_Store();
2626   if (UseBarriersForVolatile) {
2627     // we use a normal store and dmb combination
2628     return false;
2629   }
2630 
2631   StoreNode *st = n->as_Store();
2632 
2633   // the store must be marked as releasing
2634   if (!st->is_release()) {
2635     return false;
2636   }


2646   ProjNode *proj = x->as_Proj();
2647 
2648   x = proj->lookup(0);
2649 
2650   if (!x || !x->is_MemBar()) {
2651     return false;
2652   }
2653 
2654   MemBarNode *barrier = x->as_MemBar();
2655 
2656   // if the barrier is a release membar or a cpuorder mmebar fed by a
2657   // release membar then we need to check whether that forms part of a
2658   // volatile put graph.
2659 
2660   // reject invalid candidates
2661   if (!leading_membar(barrier)) {
2662     return false;
2663   }
2664 
2665   // does this lead a normal subgraph?
2666   MemBarNode *trailing = leading_to_trailing(barrier);











2667 
2668   return (trailing != NULL);
2669 }
2670 
2671 // predicate controlling translation of CAS
2672 //
2673 // returns true if CAS needs to use an acquiring load otherwise false
2674 
2675 bool needs_acquiring_load_exclusive(const Node *n)
2676 {
2677   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2678   if (UseBarriersForVolatile) {
2679     return false;
2680   }
2681 
2682   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2683 #ifdef ASSERT
2684   LoadStoreNode *st = n->as_LoadStore();
2685 
2686   // the store must be fed by a membar
2687 
2688   Node *x = st->lookup(StoreNode::Memory);


2690   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2691 
2692   ProjNode *proj = x->as_Proj();
2693 
2694   x = proj->lookup(0);
2695 
2696   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2697 
2698   MemBarNode *barrier = x->as_MemBar();
2699 
2700   // the barrier must be a cpuorder mmebar fed by a release membar
2701 
2702   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2703          "CAS not fed by cpuorder membar!");
2704 
2705   MemBarNode *b = parent_membar(barrier);
2706   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2707           "CAS not fed by cpuorder+release membar pair!");
2708 
2709   // does this lead a normal subgraph?
2710   MemBarNode *mbar = leading_to_trailing(barrier);
2711 
2712   assert(mbar != NULL, "CAS not embedded in normal graph!");
2713 
2714   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2715 #endif // ASSERT
2716   // so we can just return true here
2717   return true;
2718 }
2719 
2720 // predicate controlling translation of StoreCM
2721 //
2722 // returns true if a StoreStore must precede the card write otherwise
2723 // false
2724 
2725 bool unnecessary_storestore(const Node *storecm)
2726 {
2727   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2728 
2729   // we only ever need to generate a dmb ishst between an object put
2730   // and the associated card mark when we are using CMS without
2731   // conditional card marking. Any other occurence will happen when
2732   // performing a card mark using CMS with conditional card marking or
2733   // G1. In those cases the preceding MamBarVolatile will be
2734   // translated to a dmb ish which guarantes visibility of the
2735   // preceding StoreN/P before this StoreCM
2736 
2737   if (!UseConcMarkSweepGC || UseCondCardMark) {
2738     return true;
2739   }
2740 
2741   // if we are implementing volatile puts using barriers then we must
2742   // insert the dmb ishst
2743 
2744   if (UseBarriersForVolatile) {
2745     return false;
2746   }
2747 
2748   // we must be using CMS with conditional card marking so we ahve to
2749   // generate the StoreStore



















2750 


2751   return false;




2752 }
2753 
2754 
2755 #define __ _masm.
2756 
2757 // advance declarations for helper functions to convert register
2758 // indices to register objects
2759 
2760 // the ad file has to provide implementations of certain methods
2761 // expected by the generic code
2762 //
2763 // REQUIRED FUNCTIONALITY
2764 
2765 //=============================================================================
2766 
2767 // !!!!! Special hack to get all types of calls to specify the byte offset
2768 //       from the start of the call to the point where the return address
2769 //       will point.
2770 
2771 int MachCallStaticJavaNode::ret_addr_offset()


src/cpu/aarch64/vm/aarch64.ad
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File