1176 // the predicates below for exact details). The card mark may be as
1177 // simple as a few extra nodes or, in a few GC configurations, may
1178 // include more complex control flow between the leading and
1179 // trailing memory barriers. However, whatever the card mark
1180 // configuration these signatures are unique to translated volatile
1181 // reads/stores -- they will not appear as a result of any other
1182 // bytecode translation or inlining nor as a consequence of
1183 // optimizing transforms.
1184 //
1185 // We also want to catch inlined unsafe volatile gets and puts and
1186 // be able to implement them using either ldar<x>/stlr<x> or some
1187 // combination of ldr<x>/stlr<x> and dmb instructions.
1188 //
1189 // Inlined unsafe volatiles puts manifest as a minor variant of the
1190 // normal volatile put node sequence containing an extra cpuorder
1191 // membar
1192 //
1193 // MemBarRelease
1194 // MemBarCPUOrder
1195 // StoreX[mo_release] {CardMark}-optional
1196 // MemBarVolatile
1197 //
1198 // n.b. as an aside, the cpuorder membar is not itself subject to
1199 // matching and translation by adlc rules. However, the rule
1200 // predicates need to detect its presence in order to correctly
1201 // select the desired adlc rules.
1202 //
1203 // Inlined unsafe volatile gets manifest as a somewhat different
1204 // node sequence to a normal volatile get
1205 //
1206 // MemBarCPUOrder
1207 // || \\
1208 // MemBarAcquire LoadX[mo_acquire]
1209 // ||
1210 // MemBarCPUOrder
1211 //
1212 // In this case the acquire membar does not directly depend on the
1213 // load. However, we can be sure that the load is generated from an
1214 // inlined unsafe volatile get if we see it dependent on this unique
1215 // sequence of membar nodes. Similarly, given an acquire membar we
1216 // can know that it was added because of an inlined unsafe volatile
1217 // get if it is fed and feeds a cpuorder membar and if its feed
1218 // membar also feeds an acquiring load.
1219 //
1220 // Finally an inlined (Unsafe) CAS operation is translated to the
1221 // following ideal graph
1222 //
1223 // MemBarRelease
1224 // MemBarCPUOrder
1225 // CompareAndSwapX {CardMark}-optional
1226 // MemBarCPUOrder
1227 // MemBarAcquire
1228 //
1229 // So, where we can identify these volatile read and write
1230 // signatures we can choose to plant either of the above two code
1297 }
1298
1299 membar = ctl->lookup(0);
1300
1301 if (!membar || !membar->is_MemBar()) {
1302 return NULL;
1303 }
1304
1305 if (mem->lookup(0) != membar) {
1306 return NULL;
1307 }
1308
1309 return membar->as_MemBar();
1310 }
1311
1312 // if n is linked to a child MemBarNode by intervening Control and
1313 // Memory ProjNodes return the MemBarNode otherwise return NULL.
1314
1315 MemBarNode *child_membar(const MemBarNode *n)
1316 {
1317 ProjNode *ctl = n->proj_out(TypeFunc::Control);
1318 ProjNode *mem = n->proj_out(TypeFunc::Memory);
1319
1320 // MemBar needs to have both a Ctl and Mem projection
1321 if (! ctl || ! mem)
1322 return NULL;
1323
1324 MemBarNode *child = NULL;
1325 Node *x;
1326
1327 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1328 x = ctl->fast_out(i);
1329 // if we see a membar we keep hold of it. we may also see a new
1330 // arena copy of the original but it will appear later
1331 if (x->is_MemBar()) {
1332 child = x->as_MemBar();
1333 break;
1334 }
1335 }
1336
1337 if (child == NULL) {
1338 return NULL;
1415
1416
1417 // 3) helper predicates to traverse volatile put or CAS graphs which
1418 // may contain GC barrier subgraphs
1419
1420 // Preamble
1421 // --------
1422 //
1423 // for volatile writes we can omit generating barriers and employ a
1424 // releasing store when we see a node sequence sequence with a
1425 // leading MemBarRelease and a trailing MemBarVolatile as follows
1426 //
1427 // MemBarRelease
1428 // { || } -- optional
1429 // {MemBarCPUOrder}
1430 // || \\
1431 // || StoreX[mo_release]
1432 // | \ /
1433 // | MergeMem
1434 // | /
1435 // MemBarVolatile
1436 //
1437 // where
1438 // || and \\ represent Ctl and Mem feeds via Proj nodes
1439 // | \ and / indicate further routing of the Ctl and Mem feeds
1440 //
1441 // this is the graph we see for non-object stores. however, for a
1442 // volatile Object store (StoreN/P) we may see other nodes below the
1443 // leading membar because of the need for a GC pre- or post-write
1444 // barrier.
1445 //
1446 // with most GC configurations we with see this simple variant which
1447 // includes a post-write barrier card mark.
1448 //
1449 // MemBarRelease______________________________
1450 // || \\ Ctl \ \\
1451 // || StoreN/P[mo_release] CastP2X StoreB/CM
1452 // | \ / . . . /
1453 // | MergeMem
1454 // | /
1455 // || /
1456 // MemBarVolatile
1457 //
1458 // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1459 // the object address to an int used to compute the card offset) and
1460 // Ctl+Mem to a StoreB node (which does the actual card mark).
1461 //
1462 // n.b. a StoreCM node will only appear in this configuration when
1463 // using CMS. StoreCM differs from a normal card mark write (StoreB)
1464 // because it implies a requirement to order visibility of the card
1465 // mark (StoreCM) relative to the object put (StoreP/N) using a
1466 // StoreStore memory barrier (arguably this ought to be represented
1467 // explicitly in the ideal graph but that is not how it works). This
1468 // ordering is required for both non-volatile and volatile
1469 // puts. Normally that means we need to translate a StoreCM using
1470 // the sequence
1471 //
1472 // dmb ishst
1473 // stlrb
1474 //
1475 // However, in the case of a volatile put if we can recognise this
1488 // MemBarRelease____________________________________________
1489 // || \\ Ctl \ Ctl \ \\ Mem \
1490 // || StoreN/P[mo_release] CastP2X If LoadB |
1491 // | \ / \ |
1492 // | MergeMem . . . StoreB
1493 // | / /
1494 // || /
1495 // MemBarVolatile
1496 //
1497 // It is worth noting at this stage that both the above
1498 // configurations can be uniquely identified by checking that the
1499 // memory flow includes the following subgraph:
1500 //
1501 // MemBarRelease
1502 // {MemBarCPUOrder}
1503 // | \ . . .
1504 // | StoreX[mo_release] . . .
1505 // | /
1506 // MergeMem
1507 // |
1508 // MemBarVolatile
1509 //
1510 // This is referred to as a *normal* subgraph. It can easily be
1511 // detected starting from any candidate MemBarRelease,
1512 // StoreX[mo_release] or MemBarVolatile.
1513 //
1514 // A simple variation on this normal case occurs for an unsafe CAS
1515 // operation. The basic graph for a non-object CAS is
1516 //
1517 // MemBarRelease
1518 // ||
1519 // MemBarCPUOrder
1520 // || \\ . . .
1521 // || CompareAndSwapX
1522 // || |
1523 // || SCMemProj
1524 // | \ /
1525 // | MergeMem
1526 // | /
1527 // MemBarCPUOrder
1550 //
1551 // This graph can also easily be detected starting from any
1552 // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1553 //
1554 // the code below uses two helper predicates, leading_to_normal and
1555 // normal_to_leading to identify these normal graphs, one validating
1556 // the layout starting from the top membar and searching down and
1557 // the other validating the layout starting from the lower membar
1558 // and searching up.
1559 //
1560 // There are two special case GC configurations when a normal graph
1561 // may not be generated: when using G1 (which always employs a
1562 // conditional card mark); and when using CMS with conditional card
1563 // marking configured. These GCs are both concurrent rather than
1564 // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1565 // graph between the leading and trailing membar nodes, in
1566 // particular enforcing stronger memory serialisation beween the
1567 // object put and the corresponding conditional card mark. CMS
1568 // employs a post-write GC barrier while G1 employs both a pre- and
1569 // post-write GC barrier. Of course the extra nodes may be absent --
1570 // they are only inserted for object puts. This significantly
1571 // complicates the task of identifying whether a MemBarRelease,
1572 // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1573 // when using these GC configurations (see below). It adds similar
1574 // complexity to the task of identifying whether a MemBarRelease,
1575 // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1576 //
1577 // In both cases the post-write subtree includes an auxiliary
1578 // MemBarVolatile (StoreLoad barrier) separating the object put and
1579 // the read of the corresponding card. This poses two additional
1580 // problems.
1581 //
1582 // Firstly, a card mark MemBarVolatile needs to be distinguished
1583 // from a normal trailing MemBarVolatile. Resolving this first
1584 // problem is straightforward: a card mark MemBarVolatile always
1585 // projects a Mem feed to a StoreCM node and that is a unique marker
1586 //
1587 // MemBarVolatile (card mark)
1588 // C | \ . . .
1589 // | StoreCM . . .
1590 // . . .
1591 //
1592 // The second problem is how the code generator is to translate the
1593 // card mark barrier? It always needs to be translated to a "dmb
1594 // ish" instruction whether or not it occurs as part of a volatile
1595 // put. A StoreLoad barrier is needed after the object put to ensure
1596 // i) visibility to GC threads of the object put and ii) visibility
1597 // to the mutator thread of any card clearing write by a GC
1598 // thread. Clearly a normal store (str) will not guarantee this
1599 // ordering but neither will a releasing store (stlr). The latter
1621 // | Bot \ /
1622 // | MergeMem
1623 // | /
1624 // MemBarVolatile (card mark)
1625 // C | || M |
1626 // | LoadB |
1627 // | | |
1628 // | Cmp |\
1629 // | / | \
1630 // If | \
1631 // | \ | \
1632 // IfFalse IfTrue | \
1633 // \ / \ | \
1634 // \ / StoreCM |
1635 // \ / | |
1636 // Region . . . |
1637 // | \ /
1638 // | . . . \ / Bot
1639 // | MergeMem
1640 // | |
1641 // MemBarVolatile (trailing)
1642 //
1643 // The first MergeMem merges the AliasIdxBot Mem slice from the
1644 // leading membar and the oopptr Mem slice from the Store into the
1645 // card mark membar. The trailing MergeMem merges the AliasIdxBot
1646 // Mem slice from the card mark membar and the AliasIdxRaw slice
1647 // from the StoreCM into the trailing membar (n.b. the latter
1648 // proceeds via a Phi associated with the If region).
1649 //
1650 // The graph for a CAS varies slightly, the obvious difference being
1651 // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1652 // and the trailing MemBarVolatile by a MemBarCPUOrder +
1653 // MemBarAcquire pair. The other important difference is that the
1654 // CompareAndSwap node's SCMemProj is not merged into the card mark
1655 // membar - it still feeds the trailing MergeMem. This also means
1656 // that the card mark membar receives its Mem feed directly from the
1657 // leading membar rather than via a MergeMem.
1658 //
1659 // MemBarRelease
1660 // MemBarCPUOrder__(leading)_________________________
1661 // || \\ C \
1662 // MemBarVolatile (card mark) CompareAndSwapN/P CastP2X
1663 // C | || M | |
1664 // | LoadB | ______/|
1665 // | | | / |
1666 // | Cmp | / SCMemProj
1667 // | / | / |
1668 // If | / /
1669 // | \ | / /
1670 // IfFalse IfTrue | / /
1671 // \ / \ |/ prec /
1672 // \ / StoreCM /
1673 // \ / | /
1674 // Region . . . /
1675 // | \ /
1676 // | . . . \ / Bot
1677 // | MergeMem
1678 // | |
1679 // MemBarCPUOrder
1680 // MemBarAcquire (trailing)
1681 //
1682 // This has a slightly different memory subgraph to the one seen
1683 // previously but the core of it is the same as for the CAS normal
1684 // sungraph
1685 //
1686 // MemBarRelease
1687 // MemBarCPUOrder____
1688 // || \ . . .
1689 // MemBarVolatile CompareAndSwapX . . .
1690 // | \ |
1691 // . . . SCMemProj
1692 // | / . . .
1693 // MergeMem
1694 // |
1695 // MemBarCPUOrder
1696 // MemBarAcquire
1697 //
1698 //
1699 // G1 is quite a lot more complicated. The nodes inserted on behalf
1700 // of G1 may comprise: a pre-write graph which adds the old value to
1701 // the SATB queue; the releasing store itself; and, finally, a
1702 // post-write graph which performs a card mark.
1703 //
1704 // The pre-write graph may be omitted, but only when the put is
1705 // writing to a newly allocated (young gen) object and then only if
1706 // there is a direct memory chain to the Initialize node for the
1707 // object allocation. This will not happen for a volatile put since
1708 // any memory chain passes through the leading membar.
1709 //
1710 // The pre-write graph includes a series of 3 If tests. The outermost
1711 // If tests whether SATB is enabled (no else case). The next If tests
1712 // whether the old value is non-NULL (no else case). The third tests
1713 // whether the SATB queue index is > 0, if so updating the queue. The
1714 // else case for this third If calls out to the runtime to allocate a
1715 // new queue buffer.
1716 //
1725 // | \ | \ \
1726 // IfFalse IfTrue | \ \
1727 // | | | \ |
1728 // | If | /\ |
1729 // | | \ |
1730 // | \ |
1731 // | . . . \ |
1732 // | / | / | |
1733 // Region Phi[M] | |
1734 // | \ | | |
1735 // | \_____ | ___ | |
1736 // C | C \ | C \ M | |
1737 // | CastP2X | StoreN/P[mo_release] |
1738 // | | | |
1739 // C | M | M | M |
1740 // \ | | /
1741 // . . .
1742 // (post write subtree elided)
1743 // . . .
1744 // C \ M /
1745 // MemBarVolatile (trailing)
1746 //
1747 // n.b. the LoadB in this subgraph is not the card read -- it's a
1748 // read of the SATB queue active flag.
1749 //
1750 // Once again the CAS graph is a minor variant on the above with the
1751 // expected substitutions of CompareAndSawpX for StoreN/P and
1752 // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1753 //
1754 // The G1 post-write subtree is also optional, this time when the
1755 // new value being written is either null or can be identified as a
1756 // newly allocated (young gen) object with no intervening control
1757 // flow. The latter cannot happen but the former may, in which case
1758 // the card mark membar is omitted and the memory feeds form the
1759 // leading membar and the SToreN/P are merged direct into the
1760 // trailing membar as per the normal subgraph. So, the only special
1761 // case which arises is when the post-write subgraph is generated.
1762 //
1763 // The kernel of the post-write G1 subgraph is the card mark itself
1764 // which includes a card mark memory barrier (MemBarVolatile), a
1765 // card test (LoadB), and a conditional update (If feeding a
1766 // StoreCM). These nodes are surrounded by a series of nested Ifs
1767 // which try to avoid doing the card mark. The top level If skips if
1768 // the object reference does not cross regions (i.e. it tests if
1769 // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1770 // need not be recorded. The next If, which skips on a NULL value,
1771 // may be absent (it is not generated if the type of value is >=
1772 // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1773 // checking if card_val != young). n.b. although this test requires
1774 // a pre-read of the card it can safely be done before the StoreLoad
1775 // barrier. However that does not bypass the need to reread the card
1776 // after the barrier.
1777 //
1778 // (pre-write subtree elided)
1779 // . . . . . . . . . . . .
1780 // C | M | M | M |
1781 // Region Phi[M] StoreN |
1782 // | / \ | |
1783 // / \_______ / \ | |
1784 // C / C \ . . . \ | |
1785 // If CastP2X . . . | | |
1786 // / \ | | |
1787 // / \ | | |
1788 // IfFalse IfTrue | | |
1789 // | | | | /|
1790 // | If | | / |
1791 // | / \ | | / |
1792 // | / \ \ | / |
1793 // | IfFalse IfTrue MergeMem |
1794 // | . . . / \ / |
1795 // | / \ / |
1796 // | IfFalse IfTrue / |
1809 // | . . . | | |
1810 // | \ | | /
1811 // | StoreCM | /
1812 // | . . . | /
1813 // | _________/ /
1814 // | / _____________/
1815 // | . . . . . . | / /
1816 // | | | / _________/
1817 // | | Phi[M] / /
1818 // | | | / /
1819 // | | | / /
1820 // | Region . . . Phi[M] _____/
1821 // | / | /
1822 // | | /
1823 // | . . . . . . | /
1824 // | / | /
1825 // Region | | Phi[M]
1826 // | | | / Bot
1827 // \ MergeMem
1828 // \ /
1829 // MemBarVolatile
1830 //
1831 // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1832 // from the leading membar and the oopptr Mem slice from the Store
1833 // into the card mark membar i.e. the memory flow to the card mark
1834 // membar still looks like a normal graph.
1835 //
1836 // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1837 // Mem slices (from the StoreCM and other card mark queue stores).
1838 // However in this case the AliasIdxBot Mem slice does not come
1839 // direct from the card mark membar. It is merged through a series
1840 // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1841 // from the leading membar with the Mem feed from the card mark
1842 // membar. Each Phi corresponds to one of the Ifs which may skip
1843 // around the card mark membar. So when the If implementing the NULL
1844 // value check has been elided the total number of Phis is 2
1845 // otherwise it is 3.
1846 //
1847 // The CAS graph when using G1GC also includes a pre-write subgraph
1848 // and an optional post-write subgraph. Teh sam evarioations are
1849 // introduced as for CMS with conditional card marking i.e. the
1850 // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1851 // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1852 // Mem feed from the CompareAndSwapP/N includes a precedence
1853 // dependency feed to the StoreCM and a feed via an SCMemProj to the
1854 // trailing membar. So, as before the configuration includes the
1855 // normal CAS graph as a subgraph of the memory flow.
1856 //
1857 // So, the upshot is that in all cases the volatile put graph will
1858 // include a *normal* memory subgraph betwen the leading membar and
1859 // its child membar, either a volatile put graph (including a
1860 // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1861 // When that child is not a card mark membar then it marks the end
1862 // of the volatile put or CAS subgraph. If the child is a card mark
1863 // membar then the normal subgraph will form part of a volatile put
1864 // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1865 // to a trailing barrier via a MergeMem. That feed is either direct
1866 // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1867 // memory flow (for G1).
1868 //
1869 // The predicates controlling generation of instructions for store
1870 // and barrier nodes employ a few simple helper functions (described
1871 // below) which identify the presence or absence of all these
1872 // subgraph configurations and provide a means of traversing from
1873 // one node in the subgraph to another.
1874
1875 // is_CAS(int opcode)
1876 //
1877 // return true if opcode is one of the possible CompareAndSwapX
1878 // values otherwise false.
1879
1880 bool is_CAS(int opcode)
1881 {
1882 switch(opcode) {
1883 // We handle these
1884 case Op_CompareAndSwapI:
1885 case Op_CompareAndSwapL:
1886 case Op_CompareAndSwapP:
1887 case Op_CompareAndSwapN:
1890 return true;
1891 // These are TBD
1892 case Op_WeakCompareAndSwapB:
1893 case Op_WeakCompareAndSwapS:
1894 case Op_WeakCompareAndSwapI:
1895 case Op_WeakCompareAndSwapL:
1896 case Op_WeakCompareAndSwapP:
1897 case Op_WeakCompareAndSwapN:
1898 case Op_CompareAndExchangeB:
1899 case Op_CompareAndExchangeS:
1900 case Op_CompareAndExchangeI:
1901 case Op_CompareAndExchangeL:
1902 case Op_CompareAndExchangeP:
1903 case Op_CompareAndExchangeN:
1904 return false;
1905 default:
1906 return false;
1907 }
1908 }
1909
1910
1911 // leading_to_normal
1912 //
1913 //graph traversal helper which detects the normal case Mem feed from
1914 // a release membar (or, optionally, its cpuorder child) to a
1915 // dependent volatile membar i.e. it ensures that one or other of
1916 // the following Mem flow subgraph is present.
1917 //
1918 // MemBarRelease
1919 // MemBarCPUOrder {leading}
1920 // | \ . . .
1921 // | StoreN/P[mo_release] . . .
1922 // | /
1923 // MergeMem
1924 // |
1925 // MemBarVolatile {trailing or card mark}
1926 //
1927 // MemBarRelease
1928 // MemBarCPUOrder {leading}
1929 // | \ . . .
1930 // | CompareAndSwapX . . .
1931 // |
1932 // . . . SCMemProj
1933 // \ |
1934 // | MergeMem
1935 // | /
1936 // MemBarCPUOrder
1937 // MemBarAcquire {trailing}
1938 //
1939 // if the correct configuration is present returns the trailing
1940 // membar otherwise NULL.
1941 //
1942 // the input membar is expected to be either a cpuorder membar or a
1943 // release membar. in the latter case it should not have a cpu membar
1944 // child.
1945 //
1946 // the returned value may be a card mark or trailing membar
1947 //
1948
1949 MemBarNode *leading_to_normal(MemBarNode *leading)
1950 {
1951 assert((leading->Opcode() == Op_MemBarRelease ||
1952 leading->Opcode() == Op_MemBarCPUOrder),
1953 "expecting a volatile or cpuroder membar!");
1954
1955 // check the mem flow
1974 mm = x->as_MergeMem();
1975 } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1976 // two releasing stores/CAS nodes is one too many
1977 if (st != NULL || cas != NULL) {
1978 return NULL;
1979 }
1980 st = x->as_Store();
1981 } else if (is_CAS(x->Opcode())) {
1982 if (st != NULL || cas != NULL) {
1983 return NULL;
1984 }
1985 cas = x->as_LoadStore();
1986 }
1987 }
1988
1989 // must have a store or a cas
1990 if (!st && !cas) {
1991 return NULL;
1992 }
1993
1994 // must have a merge if we also have st
1995 if (st && !mm) {
1996 return NULL;
1997 }
1998
1999 Node *y = NULL;
2000 if (cas) {
2001 // look for an SCMemProj
2002 for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2003 x = cas->fast_out(i);
2004 if (x->is_Proj()) {
2005 y = x;
2006 break;
2007 }
2008 }
2009 if (y == NULL) {
2010 return NULL;
2011 }
2012 // the proj must feed a MergeMem
2013 for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2014 x = y->fast_out(i);
2015 if (x->is_MergeMem()) {
2016 mm = x->as_MergeMem();
2017 break;
2018 }
2019 }
2020 if (mm == NULL)
2021 return NULL;
2022 } else {
2023 // ensure the store feeds the existing mergemem;
2024 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2025 if (st->fast_out(i) == mm) {
2026 y = st;
2027 break;
2028 }
2029 }
2030 if (y == NULL) {
2031 return NULL;
2032 }
2033 }
2034
2035 MemBarNode *mbar = NULL;
2036 // ensure the merge feeds to the expected type of membar
2037 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2038 x = mm->fast_out(i);
2039 if (x->is_MemBar()) {
2040 int opcode = x->Opcode();
2041 if (opcode == Op_MemBarVolatile && st) {
2042 mbar = x->as_MemBar();
2043 } else if (cas && opcode == Op_MemBarCPUOrder) {
2044 MemBarNode *y = x->as_MemBar();
2045 y = child_membar(y);
2046 if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2047 mbar = y;
2048 }
2049 }
2050 break;
2051 }
2052 }
2053
2054 return mbar;
2055 }
2056
2057 // normal_to_leading
2058 //
2059 // graph traversal helper which detects the normal case Mem feed
2060 // from either a card mark or a trailing membar to a preceding
2061 // release membar (optionally its cpuorder child) i.e. it ensures
2062 // that one or other of the following Mem flow subgraphs is present.
2063 //
2064 // MemBarRelease
2065 // MemBarCPUOrder {leading}
2066 // | \ . . .
2067 // | StoreN/P[mo_release] . . .
2068 // | /
2069 // MergeMem
2070 // |
2071 // MemBarVolatile {card mark or trailing}
2072 //
2073 // MemBarRelease
2074 // MemBarCPUOrder {leading}
2075 // | \ . . .
2076 // | CompareAndSwapX . . .
2077 // |
2078 // . . . SCMemProj
2079 // \ |
2080 // | MergeMem
2081 // | /
2082 // MemBarCPUOrder
2083 // MemBarAcquire {trailing}
2084 //
2085 // this predicate checks for the same flow as the previous predicate
2086 // but starting from the bottom rather than the top.
2087 //
2088 // if the configuration is present returns the cpuorder member for
2089 // preference or when absent the release membar otherwise NULL.
2090 //
2091 // n.b. the input membar is expected to be a MemBarVolatile but
2092 // need not be a card mark membar.
2093
2094 MemBarNode *normal_to_leading(const MemBarNode *barrier)
2095 {
2096 // input must be a volatile membar
2097 assert((barrier->Opcode() == Op_MemBarVolatile ||
2098 barrier->Opcode() == Op_MemBarAcquire),
2099 "expecting a volatile or an acquire membar");
2100 Node *x;
2101 bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2102
2103 // if we have an acquire membar then it must be fed via a CPUOrder
2104 // membar
2105
2106 if (is_cas) {
2107 // skip to parent barrier which must be a cpuorder
2108 x = parent_membar(barrier);
2109 if (x->Opcode() != Op_MemBarCPUOrder)
2110 return NULL;
2111 } else {
2112 // start from the supplied barrier
2113 x = (Node *)barrier;
2114 }
2115
2116 // the Mem feed to the membar should be a merge
2117 x = x ->in(TypeFunc::Memory);
2118 if (!x->is_MergeMem())
2119 return NULL;
2120
2121 MergeMemNode *mm = x->as_MergeMem();
2122
2123 if (is_cas) {
2124 // the merge should be fed from the CAS via an SCMemProj node
2125 x = NULL;
2126 for (uint idx = 1; idx < mm->req(); idx++) {
2127 if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2128 x = mm->in(idx);
2129 break;
2130 }
2131 }
2132 if (x == NULL) {
2133 return NULL;
2134 }
2135 // check for a CAS feeding this proj
2136 x = x->in(0);
2137 int opcode = x->Opcode();
2138 if (!is_CAS(opcode)) {
2139 return NULL;
2140 }
2141 // the CAS should get its mem feed from the leading membar
2142 x = x->in(MemNode::Memory);
2143 } else {
2144 // the merge should get its Bottom mem feed from the leading membar
2145 x = mm->in(Compile::AliasIdxBot);
2146 }
2147
2148 // ensure this is a non control projection
2149 if (!x->is_Proj() || x->is_CFG()) {
2150 return NULL;
2151 }
2152 // if it is fed by a membar that's the one we want
2153 x = x->in(0);
2154
2155 if (!x->is_MemBar()) {
2156 return NULL;
2157 }
2158
2159 MemBarNode *leading = x->as_MemBar();
2160 // reject invalid candidates
2161 if (!leading_membar(leading)) {
2162 return NULL;
2163 }
2164
2165 // ok, we have a leading membar, now for the sanity clauses
2166
2171 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2172 x = mem->fast_out(i);
2173 if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2174 // two stores or CASes is one too many
2175 if (st != NULL || cas != NULL) {
2176 return NULL;
2177 }
2178 st = x->as_Store();
2179 } else if (is_CAS(x->Opcode())) {
2180 if (st != NULL || cas != NULL) {
2181 return NULL;
2182 }
2183 cas = x->as_LoadStore();
2184 }
2185 }
2186
2187 // we should not have both a store and a cas
2188 if (st == NULL & cas == NULL) {
2189 return NULL;
2190 }
2191
2192 if (st == NULL) {
2193 // nothing more to check
2194 return leading;
2195 } else {
2196 // we should not have a store if we started from an acquire
2197 if (is_cas) {
2198 return NULL;
2199 }
2200
2201 // the store should feed the merge we used to get here
2202 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2203 if (st->fast_out(i) == mm) {
2204 return leading;
2205 }
2206 }
2207 }
2208
2209 return NULL;
2210 }
2211
2212 // card_mark_to_trailing
2213 //
2214 // graph traversal helper which detects extra, non-normal Mem feed
2215 // from a card mark volatile membar to a trailing membar i.e. it
2216 // ensures that one of the following three GC post-write Mem flow
2217 // subgraphs is present.
2218 //
2219 // 1)
2220 // . . .
2221 // |
2222 // MemBarVolatile (card mark)
2223 // | |
2224 // | StoreCM
2225 // | |
2226 // | . . .
2227 // Bot | /
2228 // MergeMem
2229 // |
2230 // |
2231 // MemBarVolatile {trailing}
2232 //
2233 // 2)
2234 // MemBarRelease/CPUOrder (leading)
2235 // |
2236 // |
2237 // |\ . . .
2238 // | \ |
2239 // | \ MemBarVolatile (card mark)
2240 // | \ | |
2241 // \ \ | StoreCM . . .
2242 // \ \ |
2243 // \ Phi
2244 // \ /
2245 // Phi . . .
2246 // Bot | /
2247 // MergeMem
2248 // |
2249 // MemBarVolatile {trailing}
2250 //
2251 //
2252 // 3)
2253 // MemBarRelease/CPUOrder (leading)
2254 // |
2255 // |\
2256 // | \
2257 // | \ . . .
2258 // | \ |
2259 // |\ \ MemBarVolatile (card mark)
2260 // | \ \ | |
2261 // | \ \ | StoreCM . . .
2262 // | \ \ |
2263 // \ \ Phi
2264 // \ \ /
2265 // \ Phi
2266 // \ /
2267 // Phi . . .
2268 // Bot | /
2269 // MergeMem
2270 // |
2271 // |
2272 // MemBarVolatile {trailing}
2273 //
2274 // configuration 1 is only valid if UseConcMarkSweepGC &&
2275 // UseCondCardMark
2276 //
2277 // configurations 2 and 3 are only valid if UseG1GC.
2278 //
2279 // if a valid configuration is present returns the trailing membar
2280 // otherwise NULL.
2281 //
2282 // n.b. the supplied membar is expected to be a card mark
2283 // MemBarVolatile i.e. the caller must ensure the input node has the
2284 // correct operand and feeds Mem to a StoreCM node
2285
2286 MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2287 {
2288 // input must be a card mark volatile membar
2289 assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2290
2291 Node *feed = barrier->proj_out(TypeFunc::Memory);
2292 Node *x;
2293 MergeMemNode *mm = NULL;
2294
2295 const int MAX_PHIS = 3; // max phis we will search through
2296 int phicount = 0; // current search count
2297
2298 bool retry_feed = true;
2299 while (retry_feed) {
2300 // see if we have a direct MergeMem feed
2301 for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2302 x = feed->fast_out(i);
2303 // the correct Phi will be merging a Bot memory slice
2304 if (x->is_MergeMem()) {
2305 mm = x->as_MergeMem();
2306 break;
2307 }
2308 }
2309 if (mm) {
2310 retry_feed = false;
2311 } else if (UseG1GC & phicount++ < MAX_PHIS) {
2312 // the barrier may feed indirectly via one or two Phi nodes
2313 PhiNode *phi = NULL;
2314 for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2315 x = feed->fast_out(i);
2316 // the correct Phi will be merging a Bot memory slice
2317 if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2318 phi = x->as_Phi();
2319 break;
2320 }
2321 }
2322 if (!phi) {
2323 return NULL;
2324 }
2325 // look for another merge below this phi
2326 feed = phi;
2327 } else {
2328 // couldn't find a merge
2329 return NULL;
2330 }
2331 }
2332
2333 // sanity check this feed turns up as the expected slice
2334 assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2335
2336 MemBarNode *trailing = NULL;
2337 // be sure we have a trailing membar the merge
2338 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2339 x = mm->fast_out(i);
2340 if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2341 trailing = x->as_MemBar();
2342 break;
2343 }
2344 }
2345
2346 return trailing;
2347 }
2348
2349 // trailing_to_card_mark
2350 //
2351 // graph traversal helper which detects extra, non-normal Mem feed
2352 // from a trailing volatile membar to a preceding card mark volatile
2353 // membar i.e. it identifies whether one of the three possible extra
2354 // GC post-write Mem flow subgraphs is present
2355 //
2356 // this predicate checks for the same flow as the previous predicate
2357 // but starting from the bottom rather than the top.
2358 //
2359 // if the configuration is present returns the card mark membar
2360 // otherwise NULL
2361 //
2362 // n.b. the supplied membar is expected to be a trailing
2363 // MemBarVolatile i.e. the caller must ensure the input node has the
2364 // correct opcode
2365
2366 MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2367 {
2368 assert(trailing->Opcode() == Op_MemBarVolatile,
2369 "expecting a volatile membar");
2370 assert(!is_card_mark_membar(trailing),
2371 "not expecting a card mark membar");
2372
2373 // the Mem feed to the membar should be a merge
2374 Node *x = trailing->in(TypeFunc::Memory);
2375 if (!x->is_MergeMem()) {
2376 return NULL;
2377 }
2378
2379 MergeMemNode *mm = x->as_MergeMem();
2380
2381 x = mm->in(Compile::AliasIdxBot);
2382 // with G1 we may possibly see a Phi or two before we see a Memory
2383 // Proj from the card mark membar
2384
2385 const int MAX_PHIS = 3; // max phis we will search through
2386 int phicount = 0; // current search count
2387
2388 bool retry_feed = !x->is_Proj();
2389
2390 while (retry_feed) {
2391 if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2392 PhiNode *phi = x->as_Phi();
2393 ProjNode *proj = NULL;
2394 PhiNode *nextphi = NULL;
2395 bool found_leading = false;
2396 for (uint i = 1; i < phi->req(); i++) {
2397 x = phi->in(i);
2398 if (x->is_Phi()) {
2399 nextphi = x->as_Phi();
2400 } else if (x->is_Proj()) {
2401 int opcode = x->in(0)->Opcode();
2402 if (opcode == Op_MemBarVolatile) {
2403 proj = x->as_Proj();
2404 } else if (opcode == Op_MemBarRelease ||
2405 opcode == Op_MemBarCPUOrder) {
2406 // probably a leading membar
2407 found_leading = true;
2408 }
2409 }
2410 }
2411 // if we found a correct looking proj then retry from there
2412 // otherwise we must see a leading and a phi or this the
2413 // wrong config
2414 if (proj != NULL) {
2415 x = proj;
2416 retry_feed = false;
2417 } else if (found_leading && nextphi != NULL) {
2418 // retry from this phi to check phi2
2458 //
2459 // n.b. the input membar is expected to be either a volatile or
2460 // acquire membar but in the former case must *not* be a card mark
2461 // membar.
2462
2463 MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2464 {
2465 assert((trailing->Opcode() == Op_MemBarAcquire ||
2466 trailing->Opcode() == Op_MemBarVolatile),
2467 "expecting an acquire or volatile membar");
2468 assert((trailing->Opcode() != Op_MemBarVolatile ||
2469 !is_card_mark_membar(trailing)),
2470 "not expecting a card mark membar");
2471
2472 MemBarNode *leading = normal_to_leading(trailing);
2473
2474 if (leading) {
2475 return leading;
2476 }
2477
2478 // nothing more to do if this is an acquire
2479 if (trailing->Opcode() == Op_MemBarAcquire) {
2480 return NULL;
2481 }
2482
2483 MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2484
2485 if (!card_mark_membar) {
2486 return NULL;
2487 }
2488
2489 return normal_to_leading(card_mark_membar);
2490 }
2491
2492 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2493
2494 bool unnecessary_acquire(const Node *barrier)
2495 {
2496 assert(barrier->is_MemBar(), "expecting a membar");
2497
2498 if (UseBarriersForVolatile) {
2499 // we need to plant a dmb
2500 return false;
2501 }
2502
2503 // a volatile read derived from bytecode (or also from an inlined
2504 // SHA field read via LibraryCallKit::load_field_from_object)
2505 // manifests as a LoadX[mo_acquire] followed by an acquire membar
2506 // with a bogus read dependency on it's preceding load. so in those
2507 // cases we will find the load node at the PARMS offset of the
2508 // acquire membar. n.b. there may be an intervening DecodeN node.
2509 //
2510 // a volatile load derived from an inlined unsafe field access
2511 // manifests as a cpuorder membar with Ctl and Mem projections
2512 // feeding both an acquire membar and a LoadX[mo_acquire]. The
2513 // acquire then feeds another cpuorder membar via Ctl and Mem
2514 // projections. The load has no output dependency on these trailing
2515 // membars because subsequent nodes inserted into the graph take
2516 // their control feed from the final membar cpuorder meaning they
2517 // are all ordered after the load.
2518
2519 Node *x = barrier->lookup(TypeFunc::Parms);
2520 if (x) {
2521 // we are starting from an acquire and it has a fake dependency
2522 //
2523 // need to check for
2524 //
2525 // LoadX[mo_acquire]
2526 // { |1 }
2527 // {DecodeN}
2528 // |Parms
2529 // MemBarAcquire*
2530 //
2531 // where * tags node we were passed
2532 // and |k means input k
2533 if (x->is_DecodeNarrowPtr()) {
2534 x = x->in(1);
2535 }
2536
2537 return (x->is_Load() && x->as_Load()->is_acquire());
2538 }
2539
2540 // now check for an unsafe volatile get
2541
2542 // need to check for
2543 //
2544 // MemBarCPUOrder
2545 // || \\
2546 // MemBarAcquire* LoadX[mo_acquire]
2547 // ||
2548 // MemBarCPUOrder
2549 //
2550 // where * tags node we were passed
2551 // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2552
2553 // check for a parent MemBarCPUOrder
2554 ProjNode *ctl;
2555 ProjNode *mem;
2556 MemBarNode *parent = parent_membar(barrier);
2557 if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2558 return false;
2559 ctl = parent->proj_out(TypeFunc::Control);
2560 mem = parent->proj_out(TypeFunc::Memory);
2561 if (!ctl || !mem) {
2562 return false;
2563 }
2564 // ensure the proj nodes both feed a LoadX[mo_acquire]
2565 LoadNode *ld = NULL;
2566 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2567 x = ctl->fast_out(i);
2568 // if we see a load we keep hold of it and stop searching
2569 if (x->is_Load()) {
2570 ld = x->as_Load();
2571 break;
2572 }
2573 }
2574 // it must be an acquiring load
2575 if (ld && ld->is_acquire()) {
2576
2577 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2578 x = mem->fast_out(i);
2579 // if we see the same load we drop it and stop searching
2580 if (x == ld) {
2581 ld = NULL;
2582 break;
2583 }
2584 }
2585 // we must have dropped the load
2586 if (ld == NULL) {
2587 // check for a child cpuorder membar
2588 MemBarNode *child = child_membar(barrier->as_MemBar());
2589 if (child && child->Opcode() == Op_MemBarCPUOrder)
2590 return true;
2591 }
2592 }
2593
2594 // final option for unnecessary mebar is that it is a trailing node
2595 // belonging to a CAS
2596
2597 MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2598
2599 return leading != NULL;
2600 }
2601
2602 bool needs_acquiring_load(const Node *n)
2603 {
2604 assert(n->is_Load(), "expecting a load");
2605 if (UseBarriersForVolatile) {
2606 // we use a normal load and a dmb
2607 return false;
2608 }
2609
2610 LoadNode *ld = n->as_Load();
2611
2612 if (!ld->is_acquire()) {
2613 return false;
2614 }
2630 // if we hit a DecodeNarrowPtr we reset the start node and restart
2631 // the search through the outputs
2632 restart:
2633
2634 for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2635 Node *x = start->fast_out(i);
2636 if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2637 mbacq = x;
2638 } else if (!mbacq &&
2639 (x->is_DecodeNarrowPtr() ||
2640 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2641 start = x;
2642 goto restart;
2643 }
2644 }
2645
2646 if (mbacq) {
2647 return true;
2648 }
2649
2650 // now check for an unsafe volatile get
2651
2652 // check if Ctl and Proj feed comes from a MemBarCPUOrder
2653 //
2654 // MemBarCPUOrder
2655 // || \\
2656 // MemBarAcquire* LoadX[mo_acquire]
2657 // ||
2658 // MemBarCPUOrder
2659
2660 MemBarNode *membar;
2661
2662 membar = parent_membar(ld);
2663
2664 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2665 return false;
2666 }
2667
2668 // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2669
2670 membar = child_membar(membar);
2671
2672 if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2673 return false;
2674 }
2675
2676 membar = child_membar(membar);
2677
2678 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2679 return false;
2680 }
2681
2682 return true;
2683 }
2684
2685 bool unnecessary_release(const Node *n)
2686 {
2687 assert((n->is_MemBar() &&
2688 n->Opcode() == Op_MemBarRelease),
2689 "expecting a release membar");
2690
2691 if (UseBarriersForVolatile) {
2692 // we need to plant a dmb
2693 return false;
2694 }
2695
2696 // if there is a dependent CPUOrder barrier then use that as the
2697 // leading
2698
2699 MemBarNode *barrier = n->as_MemBar();
2700 // check for an intervening cpuorder membar
2701 MemBarNode *b = child_membar(barrier);
2702 if (b && b->Opcode() == Op_MemBarCPUOrder) {
2722 }
2723
2724 bool unnecessary_volatile(const Node *n)
2725 {
2726 // assert n->is_MemBar();
2727 if (UseBarriersForVolatile) {
2728 // we need to plant a dmb
2729 return false;
2730 }
2731
2732 MemBarNode *mbvol = n->as_MemBar();
2733
2734 // first we check if this is part of a card mark. if so then we have
2735 // to generate a StoreLoad barrier
2736
2737 if (is_card_mark_membar(mbvol)) {
2738 return false;
2739 }
2740
2741 // ok, if it's not a card mark then we still need to check if it is
2742 // a trailing membar of a volatile put hgraph.
2743
2744 return (trailing_to_leading(mbvol) != NULL);
2745 }
2746
2747 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2748
2749 bool needs_releasing_store(const Node *n)
2750 {
2751 // assert n->is_Store();
2752 if (UseBarriersForVolatile) {
2753 // we use a normal store and dmb combination
2754 return false;
2755 }
2756
2757 StoreNode *st = n->as_Store();
2758
2759 // the store must be marked as releasing
2760 if (!st->is_release()) {
2761 return false;
2762 }
2830
2831 x = proj->lookup(0);
2832
2833 assert (x && x->is_MemBar(), "CAS not fed by membar!");
2834
2835 MemBarNode *barrier = x->as_MemBar();
2836
2837 // the barrier must be a cpuorder mmebar fed by a release membar
2838
2839 assert(barrier->Opcode() == Op_MemBarCPUOrder,
2840 "CAS not fed by cpuorder membar!");
2841
2842 MemBarNode *b = parent_membar(barrier);
2843 assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2844 "CAS not fed by cpuorder+release membar pair!");
2845
2846 // does this lead a normal subgraph?
2847 MemBarNode *mbar = leading_to_normal(barrier);
2848
2849 assert(mbar != NULL, "CAS not embedded in normal graph!");
2850
2851 assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2852 #endif // ASSERT
2853 // so we can just return true here
2854 return true;
2855 }
2856
2857 // predicate controlling translation of StoreCM
2858 //
2859 // returns true if a StoreStore must precede the card write otherwise
2860 // false
2861
2862 bool unnecessary_storestore(const Node *storecm)
2863 {
2864 assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM");
2865
2866 // we only ever need to generate a dmb ishst between an object put
2867 // and the associated card mark when we are using CMS without
2868 // conditional card marking
2869
|
1176 // the predicates below for exact details). The card mark may be as
1177 // simple as a few extra nodes or, in a few GC configurations, may
1178 // include more complex control flow between the leading and
1179 // trailing memory barriers. However, whatever the card mark
1180 // configuration these signatures are unique to translated volatile
1181 // reads/stores -- they will not appear as a result of any other
1182 // bytecode translation or inlining nor as a consequence of
1183 // optimizing transforms.
1184 //
1185 // We also want to catch inlined unsafe volatile gets and puts and
1186 // be able to implement them using either ldar<x>/stlr<x> or some
1187 // combination of ldr<x>/stlr<x> and dmb instructions.
1188 //
1189 // Inlined unsafe volatiles puts manifest as a minor variant of the
1190 // normal volatile put node sequence containing an extra cpuorder
1191 // membar
1192 //
1193 // MemBarRelease
1194 // MemBarCPUOrder
1195 // StoreX[mo_release] {CardMark}-optional
1196 // MemBarCPUOrder
1197 // MemBarVolatile
1198 //
1199 // n.b. as an aside, a cpuorder membar is not itself subject to
1200 // matching and translation by adlc rules. However, the rule
1201 // predicates need to detect its presence in order to correctly
1202 // select the desired adlc rules.
1203 //
1204 // Inlined unsafe volatile gets manifest as a slightly different
1205 // node sequence to a normal volatile get because of the
1206 // introduction of some CPUOrder memory barriers to bracket the
1207 // Load. However, but the same basic skeleton of a LoadX feeding a
1208 // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1209 // present
1210 //
1211 // MemBarCPUOrder
1212 // || \\
1213 // MemBarCPUOrder LoadX[mo_acquire]
1214 // || |
1215 // || {DecodeN} optional
1216 // || /
1217 // MemBarAcquire
1218 //
1219 // In this case the acquire membar does not directly depend on the
1220 // load. However, we can be sure that the load is generated from an
1221 // inlined unsafe volatile get if we see it dependent on this unique
1222 // sequence of membar nodes. Similarly, given an acquire membar we
1223 // can know that it was added because of an inlined unsafe volatile
1224 // get if it is fed and feeds a cpuorder membar and if its feed
1225 // membar also feeds an acquiring load.
1226 //
1227 // Finally an inlined (Unsafe) CAS operation is translated to the
1228 // following ideal graph
1229 //
1230 // MemBarRelease
1231 // MemBarCPUOrder
1232 // CompareAndSwapX {CardMark}-optional
1233 // MemBarCPUOrder
1234 // MemBarAcquire
1235 //
1236 // So, where we can identify these volatile read and write
1237 // signatures we can choose to plant either of the above two code
1304 }
1305
1306 membar = ctl->lookup(0);
1307
1308 if (!membar || !membar->is_MemBar()) {
1309 return NULL;
1310 }
1311
1312 if (mem->lookup(0) != membar) {
1313 return NULL;
1314 }
1315
1316 return membar->as_MemBar();
1317 }
1318
1319 // if n is linked to a child MemBarNode by intervening Control and
1320 // Memory ProjNodes return the MemBarNode otherwise return NULL.
1321
1322 MemBarNode *child_membar(const MemBarNode *n)
1323 {
1324 ProjNode *ctl = n->proj_out_or_null(TypeFunc::Control);
1325 ProjNode *mem = n->proj_out_or_null(TypeFunc::Memory);
1326
1327 // MemBar needs to have both a Ctl and Mem projection
1328 if (! ctl || ! mem)
1329 return NULL;
1330
1331 MemBarNode *child = NULL;
1332 Node *x;
1333
1334 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1335 x = ctl->fast_out(i);
1336 // if we see a membar we keep hold of it. we may also see a new
1337 // arena copy of the original but it will appear later
1338 if (x->is_MemBar()) {
1339 child = x->as_MemBar();
1340 break;
1341 }
1342 }
1343
1344 if (child == NULL) {
1345 return NULL;
1422
1423
1424 // 3) helper predicates to traverse volatile put or CAS graphs which
1425 // may contain GC barrier subgraphs
1426
1427 // Preamble
1428 // --------
1429 //
1430 // for volatile writes we can omit generating barriers and employ a
1431 // releasing store when we see a node sequence sequence with a
1432 // leading MemBarRelease and a trailing MemBarVolatile as follows
1433 //
1434 // MemBarRelease
1435 // { || } -- optional
1436 // {MemBarCPUOrder}
1437 // || \\
1438 // || StoreX[mo_release]
1439 // | \ /
1440 // | MergeMem
1441 // | /
1442 // {MemBarCPUOrder} -- optional
1443 // { || }
1444 // MemBarVolatile
1445 //
1446 // where
1447 // || and \\ represent Ctl and Mem feeds via Proj nodes
1448 // | \ and / indicate further routing of the Ctl and Mem feeds
1449 //
1450 // this is the graph we see for non-object stores. however, for a
1451 // volatile Object store (StoreN/P) we may see other nodes below the
1452 // leading membar because of the need for a GC pre- or post-write
1453 // barrier.
1454 //
1455 // with most GC configurations we with see this simple variant which
1456 // includes a post-write barrier card mark.
1457 //
1458 // MemBarRelease______________________________
1459 // || \\ Ctl \ \\
1460 // || StoreN/P[mo_release] CastP2X StoreB/CM
1461 // | \ / . . . /
1462 // | MergeMem
1463 // | /
1464 // || /
1465 // {MemBarCPUOrder} -- optional
1466 // { || }
1467 // MemBarVolatile
1468 //
1469 // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1470 // the object address to an int used to compute the card offset) and
1471 // Ctl+Mem to a StoreB node (which does the actual card mark).
1472 //
1473 // n.b. a StoreCM node will only appear in this configuration when
1474 // using CMS. StoreCM differs from a normal card mark write (StoreB)
1475 // because it implies a requirement to order visibility of the card
1476 // mark (StoreCM) relative to the object put (StoreP/N) using a
1477 // StoreStore memory barrier (arguably this ought to be represented
1478 // explicitly in the ideal graph but that is not how it works). This
1479 // ordering is required for both non-volatile and volatile
1480 // puts. Normally that means we need to translate a StoreCM using
1481 // the sequence
1482 //
1483 // dmb ishst
1484 // stlrb
1485 //
1486 // However, in the case of a volatile put if we can recognise this
1499 // MemBarRelease____________________________________________
1500 // || \\ Ctl \ Ctl \ \\ Mem \
1501 // || StoreN/P[mo_release] CastP2X If LoadB |
1502 // | \ / \ |
1503 // | MergeMem . . . StoreB
1504 // | / /
1505 // || /
1506 // MemBarVolatile
1507 //
1508 // It is worth noting at this stage that both the above
1509 // configurations can be uniquely identified by checking that the
1510 // memory flow includes the following subgraph:
1511 //
1512 // MemBarRelease
1513 // {MemBarCPUOrder}
1514 // | \ . . .
1515 // | StoreX[mo_release] . . .
1516 // | /
1517 // MergeMem
1518 // |
1519 // {MemBarCPUOrder}
1520 // MemBarVolatile
1521 //
1522 // This is referred to as a *normal* subgraph. It can easily be
1523 // detected starting from any candidate MemBarRelease,
1524 // StoreX[mo_release] or MemBarVolatile.
1525 //
1526 // A simple variation on this normal case occurs for an unsafe CAS
1527 // operation. The basic graph for a non-object CAS is
1528 //
1529 // MemBarRelease
1530 // ||
1531 // MemBarCPUOrder
1532 // || \\ . . .
1533 // || CompareAndSwapX
1534 // || |
1535 // || SCMemProj
1536 // | \ /
1537 // | MergeMem
1538 // | /
1539 // MemBarCPUOrder
1562 //
1563 // This graph can also easily be detected starting from any
1564 // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1565 //
1566 // the code below uses two helper predicates, leading_to_normal and
1567 // normal_to_leading to identify these normal graphs, one validating
1568 // the layout starting from the top membar and searching down and
1569 // the other validating the layout starting from the lower membar
1570 // and searching up.
1571 //
1572 // There are two special case GC configurations when a normal graph
1573 // may not be generated: when using G1 (which always employs a
1574 // conditional card mark); and when using CMS with conditional card
1575 // marking configured. These GCs are both concurrent rather than
1576 // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1577 // graph between the leading and trailing membar nodes, in
1578 // particular enforcing stronger memory serialisation beween the
1579 // object put and the corresponding conditional card mark. CMS
1580 // employs a post-write GC barrier while G1 employs both a pre- and
1581 // post-write GC barrier. Of course the extra nodes may be absent --
1582 // they are only inserted for object puts/swaps. This significantly
1583 // complicates the task of identifying whether a MemBarRelease,
1584 // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1585 // when using these GC configurations (see below). It adds similar
1586 // complexity to the task of identifying whether a MemBarRelease,
1587 // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1588 //
1589 // In both cases the post-write subtree includes an auxiliary
1590 // MemBarVolatile (StoreLoad barrier) separating the object put/swap
1591 // and the read of the corresponding card. This poses two additional
1592 // problems.
1593 //
1594 // Firstly, a card mark MemBarVolatile needs to be distinguished
1595 // from a normal trailing MemBarVolatile. Resolving this first
1596 // problem is straightforward: a card mark MemBarVolatile always
1597 // projects a Mem feed to a StoreCM node and that is a unique marker
1598 //
1599 // MemBarVolatile (card mark)
1600 // C | \ . . .
1601 // | StoreCM . . .
1602 // . . .
1603 //
1604 // The second problem is how the code generator is to translate the
1605 // card mark barrier? It always needs to be translated to a "dmb
1606 // ish" instruction whether or not it occurs as part of a volatile
1607 // put. A StoreLoad barrier is needed after the object put to ensure
1608 // i) visibility to GC threads of the object put and ii) visibility
1609 // to the mutator thread of any card clearing write by a GC
1610 // thread. Clearly a normal store (str) will not guarantee this
1611 // ordering but neither will a releasing store (stlr). The latter
1633 // | Bot \ /
1634 // | MergeMem
1635 // | /
1636 // MemBarVolatile (card mark)
1637 // C | || M |
1638 // | LoadB |
1639 // | | |
1640 // | Cmp |\
1641 // | / | \
1642 // If | \
1643 // | \ | \
1644 // IfFalse IfTrue | \
1645 // \ / \ | \
1646 // \ / StoreCM |
1647 // \ / | |
1648 // Region . . . |
1649 // | \ /
1650 // | . . . \ / Bot
1651 // | MergeMem
1652 // | |
1653 // {MemBarCPUOrder}
1654 // MemBarVolatile (trailing)
1655 //
1656 // The first MergeMem merges the AliasIdxBot Mem slice from the
1657 // leading membar and the oopptr Mem slice from the Store into the
1658 // card mark membar. The trailing MergeMem merges the AliasIdxBot
1659 // Mem slice from the card mark membar and the AliasIdxRaw slice
1660 // from the StoreCM into the trailing membar (n.b. the latter
1661 // proceeds via a Phi associated with the If region).
1662 //
1663 // The graph for a CAS varies slightly, the difference being
1664 // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1665 // and the trailing MemBarVolatile by a MemBarCPUOrder +
1666 // MemBarAcquire pair.
1667 //
1668 // MemBarRelease
1669 // MemBarCPUOrder_(leading)_______________
1670 // C | M \ \\ C \
1671 // | \ CompareAndSwapN/P CastP2X
1672 // | \ |
1673 // | \ SCMemProj
1674 // | Bot \ /
1675 // | MergeMem
1676 // | /
1677 // MemBarVolatile (card mark)
1678 // C | || M |
1679 // | LoadB |
1680 // | | |
1681 // | Cmp |\
1682 // | / | \
1683 // If | \
1684 // | \ | \
1685 // IfFalse IfTrue | \
1686 // \ / \ | \
1687 // \ / StoreCM |
1688 // \ / | |
1689 // Region . . . |
1690 // | \ /
1691 // | . . . \ / Bot
1692 // | MergeMem
1693 // | |
1694 // {MemBarCPUOrder}
1695 // MemBarVolatile (trailing)
1696 //
1697 //
1698 // G1 is quite a lot more complicated. The nodes inserted on behalf
1699 // of G1 may comprise: a pre-write graph which adds the old value to
1700 // the SATB queue; the releasing store itself; and, finally, a
1701 // post-write graph which performs a card mark.
1702 //
1703 // The pre-write graph may be omitted, but only when the put is
1704 // writing to a newly allocated (young gen) object and then only if
1705 // there is a direct memory chain to the Initialize node for the
1706 // object allocation. This will not happen for a volatile put since
1707 // any memory chain passes through the leading membar.
1708 //
1709 // The pre-write graph includes a series of 3 If tests. The outermost
1710 // If tests whether SATB is enabled (no else case). The next If tests
1711 // whether the old value is non-NULL (no else case). The third tests
1712 // whether the SATB queue index is > 0, if so updating the queue. The
1713 // else case for this third If calls out to the runtime to allocate a
1714 // new queue buffer.
1715 //
1724 // | \ | \ \
1725 // IfFalse IfTrue | \ \
1726 // | | | \ |
1727 // | If | /\ |
1728 // | | \ |
1729 // | \ |
1730 // | . . . \ |
1731 // | / | / | |
1732 // Region Phi[M] | |
1733 // | \ | | |
1734 // | \_____ | ___ | |
1735 // C | C \ | C \ M | |
1736 // | CastP2X | StoreN/P[mo_release] |
1737 // | | | |
1738 // C | M | M | M |
1739 // \ | | /
1740 // . . .
1741 // (post write subtree elided)
1742 // . . .
1743 // C \ M /
1744 // \ /
1745 // {MemBarCPUOrder}
1746 // MemBarVolatile (trailing)
1747 //
1748 // n.b. the LoadB in this subgraph is not the card read -- it's a
1749 // read of the SATB queue active flag.
1750 //
1751 // The G1 post-write subtree is also optional, this time when the
1752 // new value being written is either null or can be identified as a
1753 // newly allocated (young gen) object with no intervening control
1754 // flow. The latter cannot happen but the former may, in which case
1755 // the card mark membar is omitted and the memory feeds form the
1756 // leading membar and the SToreN/P are merged direct into the
1757 // trailing membar as per the normal subgraph. So, the only special
1758 // case which arises is when the post-write subgraph is generated.
1759 //
1760 // The kernel of the post-write G1 subgraph is the card mark itself
1761 // which includes a card mark memory barrier (MemBarVolatile), a
1762 // card test (LoadB), and a conditional update (If feeding a
1763 // StoreCM). These nodes are surrounded by a series of nested Ifs
1764 // which try to avoid doing the card mark. The top level If skips if
1765 // the object reference does not cross regions (i.e. it tests if
1766 // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1767 // need not be recorded. The next If, which skips on a NULL value,
1768 // may be absent (it is not generated if the type of value is >=
1769 // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1770 // checking if card_val != young). n.b. although this test requires
1771 // a pre-read of the card it can safely be done before the StoreLoad
1772 // barrier. However that does not bypass the need to reread the card
1773 // after the barrier. A final, 4th If tests if the card is already
1774 // marked.
1775 //
1776 // (pre-write subtree elided)
1777 // . . . . . . . . . . . .
1778 // C | M | M | M |
1779 // Region Phi[M] StoreN |
1780 // | / \ | |
1781 // / \_______ / \ | |
1782 // C / C \ . . . \ | |
1783 // If CastP2X . . . | | |
1784 // / \ | | |
1785 // / \ | | |
1786 // IfFalse IfTrue | | |
1787 // | | | | /|
1788 // | If | | / |
1789 // | / \ | | / |
1790 // | / \ \ | / |
1791 // | IfFalse IfTrue MergeMem |
1792 // | . . . / \ / |
1793 // | / \ / |
1794 // | IfFalse IfTrue / |
1807 // | . . . | | |
1808 // | \ | | /
1809 // | StoreCM | /
1810 // | . . . | /
1811 // | _________/ /
1812 // | / _____________/
1813 // | . . . . . . | / /
1814 // | | | / _________/
1815 // | | Phi[M] / /
1816 // | | | / /
1817 // | | | / /
1818 // | Region . . . Phi[M] _____/
1819 // | / | /
1820 // | | /
1821 // | . . . . . . | /
1822 // | / | /
1823 // Region | | Phi[M]
1824 // | | | / Bot
1825 // \ MergeMem
1826 // \ /
1827 // {MemBarCPUOrder}
1828 // MemBarVolatile
1829 //
1830 // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1831 // from the leading membar and the oopptr Mem slice from the Store
1832 // into the card mark membar i.e. the memory flow to the card mark
1833 // membar still looks like a normal graph.
1834 //
1835 // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1836 // Mem slices (from the StoreCM and other card mark queue stores).
1837 // However in this case the AliasIdxBot Mem slice does not come
1838 // direct from the card mark membar. It is merged through a series
1839 // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1840 // from the leading membar with the Mem feed from the card mark
1841 // membar. Each Phi corresponds to one of the Ifs which may skip
1842 // around the card mark membar. So when the If implementing the NULL
1843 // value check has been elided the total number of Phis is 2
1844 // otherwise it is 3.
1845 //
1846 // The CAS graph when using G1GC also includes a pre-write subgraph
1847 // and an optional post-write subgraph. The same variations are
1848 // introduced as for CMS with conditional card marking i.e. the
1849 // StoreP/N is swapped for a CompareAndSwapP/N with a following
1850 // SCMemProj, the trailing MemBarVolatile for a MemBarCPUOrder +
1851 // MemBarAcquire pair. There may be an extra If test introduced in
1852 // the CAS case, when the boolean result of the CAS is tested by the
1853 // caller. In that case an extra Region and AliasIdxBot Phi may be
1854 // introduced before the MergeMem
1855 //
1856 // So, the upshot is that in all cases the subgraph will include a
1857 // *normal* memory subgraph betwen the leading membar and its child
1858 // membar: either a normal volatile put graph including a releasing
1859 // StoreX and terminating with a trailing volatile membar or card
1860 // mark volatile membar; or a normal CAS graph including a
1861 // CompareAndSwapX + SCMemProj pair and terminating with a card mark
1862 // volatile membar or a trailing cpu order and acquire membar
1863 // pair. If the child membar is not a (volatile) card mark membar
1864 // then it marks the end of the volatile put or CAS subgraph. If the
1865 // child is a card mark membar then the normal subgraph will form
1866 // part of a larger volatile put or CAS subgraph if and only if the
1867 // child feeds an AliasIdxBot Mem feed to a trailing barrier via a
1868 // MergeMem. That feed is either direct (for CMS) or via 2, 3 or 4
1869 // Phi nodes merging the leading barrier memory flow (for G1).
1870 //
1871 // The predicates controlling generation of instructions for store
1872 // and barrier nodes employ a few simple helper functions (described
1873 // below) which identify the presence or absence of all these
1874 // subgraph configurations and provide a means of traversing from
1875 // one node in the subgraph to another.
1876
1877 // is_CAS(int opcode)
1878 //
1879 // return true if opcode is one of the possible CompareAndSwapX
1880 // values otherwise false.
1881
1882 bool is_CAS(int opcode)
1883 {
1884 switch(opcode) {
1885 // We handle these
1886 case Op_CompareAndSwapI:
1887 case Op_CompareAndSwapL:
1888 case Op_CompareAndSwapP:
1889 case Op_CompareAndSwapN:
1892 return true;
1893 // These are TBD
1894 case Op_WeakCompareAndSwapB:
1895 case Op_WeakCompareAndSwapS:
1896 case Op_WeakCompareAndSwapI:
1897 case Op_WeakCompareAndSwapL:
1898 case Op_WeakCompareAndSwapP:
1899 case Op_WeakCompareAndSwapN:
1900 case Op_CompareAndExchangeB:
1901 case Op_CompareAndExchangeS:
1902 case Op_CompareAndExchangeI:
1903 case Op_CompareAndExchangeL:
1904 case Op_CompareAndExchangeP:
1905 case Op_CompareAndExchangeN:
1906 return false;
1907 default:
1908 return false;
1909 }
1910 }
1911
1912 // helper to determine the maximum number of Phi nodes we may need to
1913 // traverse when searching from a card mark membar for the merge mem
1914 // feeding a trailing membar or vice versa
1915
1916 int max_phis()
1917 {
1918 if (UseG1GC) {
1919 return 4;
1920 } else if (UseConcMarkSweepGC && UseCondCardMark) {
1921 return 1;
1922 } else {
1923 return 0;
1924 }
1925 }
1926
1927 // leading_to_normal
1928 //
1929 // graph traversal helper which detects the normal case Mem feed
1930 // from a release membar (or, optionally, its cpuorder child) to a
1931 // dependent volatile or acquire membar i.e. it ensures that one of
1932 // the following 3 Mem flow subgraphs is present.
1933 //
1934 // MemBarRelease
1935 // MemBarCPUOrder {leading}
1936 // | \ . . .
1937 // | StoreN/P[mo_release] . . .
1938 // | /
1939 // MergeMem
1940 // |
1941 // {MemBarCPUOrder}
1942 // MemBarVolatile {trailing or card mark}
1943 //
1944 // MemBarRelease
1945 // MemBarCPUOrder {leading}
1946 // | \ . . .
1947 // | CompareAndSwapX . . .
1948 // | /
1949 // MergeMem
1950 // |
1951 // MemBarVolatile {card mark}
1952 //
1953 // MemBarRelease
1954 // MemBarCPUOrder {leading}
1955 // | \ . . .
1956 // | CompareAndSwapX . . .
1957 // | /
1958 // MergeMem
1959 // |
1960 // MemBarCPUOrder
1961 // MemBarAcquire {trailing}
1962 //
1963 // if the correct configuration is present returns the trailing
1964 // membar otherwise NULL.
1965 //
1966 // the input membar is expected to be either a cpuorder membar or a
1967 // release membar. in the latter case it should not have a cpu membar
1968 // child.
1969 //
1970 // the returned value may be a card mark or trailing membar
1971 //
1972
1973 MemBarNode *leading_to_normal(MemBarNode *leading)
1974 {
1975 assert((leading->Opcode() == Op_MemBarRelease ||
1976 leading->Opcode() == Op_MemBarCPUOrder),
1977 "expecting a volatile or cpuroder membar!");
1978
1979 // check the mem flow
1998 mm = x->as_MergeMem();
1999 } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2000 // two releasing stores/CAS nodes is one too many
2001 if (st != NULL || cas != NULL) {
2002 return NULL;
2003 }
2004 st = x->as_Store();
2005 } else if (is_CAS(x->Opcode())) {
2006 if (st != NULL || cas != NULL) {
2007 return NULL;
2008 }
2009 cas = x->as_LoadStore();
2010 }
2011 }
2012
2013 // must have a store or a cas
2014 if (!st && !cas) {
2015 return NULL;
2016 }
2017
2018 // must have a merge
2019 if (!mm) {
2020 return NULL;
2021 }
2022
2023 Node *feed = NULL;
2024 if (cas) {
2025 // look for an SCMemProj
2026 for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2027 x = cas->fast_out(i);
2028 if (x->Opcode() == Op_SCMemProj) {
2029 feed = x;
2030 break;
2031 }
2032 }
2033 if (feed == NULL) {
2034 return NULL;
2035 }
2036 } else {
2037 feed = st;
2038 }
2039 // ensure the feed node feeds the existing mergemem;
2040 for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2041 x = feed->fast_out(i);
2042 if (x == mm) {
2043 break;
2044 }
2045 }
2046 if (x != mm) {
2047 return NULL;
2048 }
2049
2050 MemBarNode *mbar = NULL;
2051 // ensure the merge feeds to the expected type of membar
2052 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2053 x = mm->fast_out(i);
2054 if (x->is_MemBar()) {
2055 if (x->Opcode() == Op_MemBarCPUOrder) {
2056 // with a store any cpu order membar should precede a
2057 // trailing volatile membar. with a cas it should precede a
2058 // trailing acquire membar. in either case try to skip to
2059 // that next membar
2060 MemBarNode *y = x->as_MemBar();
2061 y = child_membar(y);
2062 if (y != NULL) {
2063 // skip to this new membar to do the check
2064 x = y;
2065 }
2066
2067 }
2068 if (x->Opcode() == Op_MemBarVolatile) {
2069 mbar = x->as_MemBar();
2070 // for a volatile store this can be either a trailing membar
2071 // or a card mark membar. for a cas it must be a card mark
2072 // membar
2073 assert(cas == NULL || is_card_mark_membar(mbar),
2074 "in CAS graph volatile membar must be a card mark");
2075 } else if (cas != NULL && x->Opcode() == Op_MemBarAcquire) {
2076 mbar = x->as_MemBar();
2077 }
2078 break;
2079 }
2080 }
2081
2082 return mbar;
2083 }
2084
2085 // normal_to_leading
2086 //
2087 // graph traversal helper which detects the normal case Mem feed
2088 // from either a card mark or a trailing membar to a preceding
2089 // release membar (optionally its cpuorder child) i.e. it ensures
2090 // that one of the following 3 Mem flow subgraphs is present.
2091 //
2092 // MemBarRelease
2093 // {MemBarCPUOrder} {leading}
2094 // | \ . . .
2095 // | StoreN/P[mo_release] . . .
2096 // | /
2097 // MergeMem
2098 // |
2099 // {MemBarCPUOrder}
2100 // MemBarVolatile {trailing or card mark}
2101 //
2102 // MemBarRelease
2103 // MemBarCPUOrder {leading}
2104 // | \ . . .
2105 // | CompareAndSwapX . . .
2106 // | /
2107 // MergeMem
2108 // |
2109 // MemBarVolatile {card mark}
2110 //
2111 // MemBarRelease
2112 // MemBarCPUOrder {leading}
2113 // | \ . . .
2114 // | CompareAndSwapX . . .
2115 // | /
2116 // MergeMem
2117 // |
2118 // MemBarCPUOrder
2119 // MemBarAcquire {trailing}
2120 //
2121 // this predicate checks for the same flow as the previous predicate
2122 // but starting from the bottom rather than the top.
2123 //
2124 // if the configuration is present returns the cpuorder member for
2125 // preference or when absent the release membar otherwise NULL.
2126 //
2127 // n.b. the input membar is expected to be a MemBarVolatile but
2128 // need not be a card mark membar.
2129
2130 MemBarNode *normal_to_leading(const MemBarNode *barrier)
2131 {
2132 // input must be a volatile membar
2133 assert((barrier->Opcode() == Op_MemBarVolatile ||
2134 barrier->Opcode() == Op_MemBarAcquire),
2135 "expecting a volatile or an acquire membar");
2136 bool barrier_is_acquire = barrier->Opcode() == Op_MemBarAcquire;
2137
2138 // if we have an intervening cpu order membar then start the
2139 // search from it
2140
2141 Node *x = parent_membar(barrier);
2142
2143 if (x == NULL) {
2144 // stick with the original barrier
2145 x = (Node *)barrier;
2146 } else if (x->Opcode() != Op_MemBarCPUOrder) {
2147 // any other barrier means this is not the graph we want
2148 return NULL;
2149 }
2150
2151 // the Mem feed to the membar should be a merge
2152 x = x ->in(TypeFunc::Memory);
2153 if (!x->is_MergeMem())
2154 return NULL;
2155
2156 MergeMemNode *mm = x->as_MergeMem();
2157
2158 // the merge should get its Bottom mem feed from the leading membar
2159 x = mm->in(Compile::AliasIdxBot);
2160
2161 // ensure this is a non control projection
2162 if (!x->is_Proj() || x->is_CFG()) {
2163 return NULL;
2164 }
2165 // if it is fed by a membar that's the one we want
2166 x = x->in(0);
2167
2168 if (!x->is_MemBar()) {
2169 return NULL;
2170 }
2171
2172 MemBarNode *leading = x->as_MemBar();
2173 // reject invalid candidates
2174 if (!leading_membar(leading)) {
2175 return NULL;
2176 }
2177
2178 // ok, we have a leading membar, now for the sanity clauses
2179
2184 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2185 x = mem->fast_out(i);
2186 if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2187 // two stores or CASes is one too many
2188 if (st != NULL || cas != NULL) {
2189 return NULL;
2190 }
2191 st = x->as_Store();
2192 } else if (is_CAS(x->Opcode())) {
2193 if (st != NULL || cas != NULL) {
2194 return NULL;
2195 }
2196 cas = x->as_LoadStore();
2197 }
2198 }
2199
2200 // we should not have both a store and a cas
2201 if (st == NULL & cas == NULL) {
2202 return NULL;
2203 }
2204 if (st == NULL) {
2205 // if we started from a volatile membar and found a CAS then the
2206 // original membar ought to be for a card mark
2207 assert((barrier_is_acquire || is_card_mark_membar(barrier)),
2208 "unexpected volatile barrier (i.e. not card mark) in CAS graph");
2209 // check that the CAS feeds the merge we used to get here via an
2210 // intermediary SCMemProj
2211 Node *scmemproj = NULL;
2212 for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2213 x = cas->fast_out(i);
2214 if (x->Opcode() == Op_SCMemProj) {
2215 scmemproj = x;
2216 break;
2217 }
2218 }
2219 if (scmemproj == NULL) {
2220 return NULL;
2221 }
2222 for (DUIterator_Fast imax, i = scmemproj->fast_outs(imax); i < imax; i++) {
2223 x = scmemproj->fast_out(i);
2224 if (x == mm) {
2225 return leading;
2226 }
2227 }
2228 } else {
2229 // we should not have found a store if we started from an acquire
2230 assert(!barrier_is_acquire,
2231 "unexpected trailing acquire barrier in volatile store graph");
2232
2233 // the store should feed the merge we used to get here
2234 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2235 if (st->fast_out(i) == mm) {
2236 return leading;
2237 }
2238 }
2239 }
2240
2241 return NULL;
2242 }
2243
2244 // card_mark_to_trailing
2245 //
2246 // graph traversal helper which detects extra, non-normal Mem feed
2247 // from a card mark volatile membar to a trailing membar i.e. it
2248 // ensures that one of the following three GC post-write Mem flow
2249 // subgraphs is present.
2250 //
2251 // 1)
2252 // . . .
2253 // |
2254 // MemBarVolatile (card mark)
2255 // | |
2256 // | StoreCM
2257 // | |
2258 // | . . .
2259 // Bot | /
2260 // MergeMem
2261 // |
2262 // {MemBarCPUOrder} OR MemBarCPUOrder
2263 // MemBarVolatile {trailing} MemBarAcquire {trailing}
2264 //
2265 //
2266 // 2)
2267 // MemBarRelease/CPUOrder (leading)
2268 // |
2269 // |
2270 // |\ . . .
2271 // | \ |
2272 // | \ MemBarVolatile (card mark)
2273 // | \ | |
2274 // \ \ | StoreCM . . .
2275 // \ \ |
2276 // \ Phi
2277 // \ /
2278 // Phi . . .
2279 // Bot | /
2280 // MergeMem
2281 // |
2282 // {MemBarCPUOrder} OR MemBarCPUOrder
2283 // MemBarVolatile {trailing} MemBarAcquire {trailing}
2284 //
2285 // 3)
2286 // MemBarRelease/CPUOrder (leading)
2287 // |
2288 // |\
2289 // | \
2290 // | \ . . .
2291 // | \ |
2292 // |\ \ MemBarVolatile (card mark)
2293 // | \ \ | |
2294 // | \ \ | StoreCM . . .
2295 // | \ \ |
2296 // \ \ Phi
2297 // \ \ /
2298 // \ Phi
2299 // \ /
2300 // Phi . . .
2301 // Bot | /
2302 // MergeMem
2303 // |
2304 // |
2305 // {MemBarCPUOrder} OR MemBarCPUOrder
2306 // MemBarVolatile {trailing} MemBarAcquire {trailing}
2307 //
2308 // 4)
2309 // MemBarRelease/CPUOrder (leading)
2310 // |
2311 // |\
2312 // | \
2313 // | \
2314 // | \
2315 // |\ \
2316 // | \ \
2317 // | \ \ . . .
2318 // | \ \ |
2319 // |\ \ \ MemBarVolatile (card mark)
2320 // | \ \ \ / |
2321 // | \ \ \ / StoreCM . . .
2322 // | \ \ Phi
2323 // \ \ \ /
2324 // \ \ Phi
2325 // \ \ /
2326 // \ Phi
2327 // \ /
2328 // Phi . . .
2329 // Bot | /
2330 // MergeMem
2331 // |
2332 // |
2333 // MemBarCPUOrder
2334 // MemBarAcquire {trailing}
2335 //
2336 // configuration 1 is only valid if UseConcMarkSweepGC &&
2337 // UseCondCardMark
2338 //
2339 // configuration 2, is only valid if UseConcMarkSweepGC &&
2340 // UseCondCardMark or if UseG1GC
2341 //
2342 // configurations 3 and 4 are only valid if UseG1GC.
2343 //
2344 // if a valid configuration is present returns the trailing membar
2345 // otherwise NULL.
2346 //
2347 // n.b. the supplied membar is expected to be a card mark
2348 // MemBarVolatile i.e. the caller must ensure the input node has the
2349 // correct operand and feeds Mem to a StoreCM node
2350
2351 MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2352 {
2353 // input must be a card mark volatile membar
2354 assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2355
2356 Node *feed = barrier->proj_out(TypeFunc::Memory);
2357 Node *x;
2358 MergeMemNode *mm = NULL;
2359
2360 const int MAX_PHIS = max_phis(); // max phis we will search through
2361 int phicount = 0; // current search count
2362
2363 bool retry_feed = true;
2364 while (retry_feed) {
2365 // see if we have a direct MergeMem feed
2366 for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2367 x = feed->fast_out(i);
2368 // the correct Phi will be merging a Bot memory slice
2369 if (x->is_MergeMem()) {
2370 mm = x->as_MergeMem();
2371 break;
2372 }
2373 }
2374 if (mm) {
2375 retry_feed = false;
2376 } else if (phicount++ < MAX_PHIS) {
2377 // the barrier may feed indirectly via one or two Phi nodes
2378 PhiNode *phi = NULL;
2379 for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2380 x = feed->fast_out(i);
2381 // the correct Phi will be merging a Bot memory slice
2382 if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2383 phi = x->as_Phi();
2384 break;
2385 }
2386 }
2387 if (!phi) {
2388 return NULL;
2389 }
2390 // look for another merge below this phi
2391 feed = phi;
2392 } else {
2393 // couldn't find a merge
2394 return NULL;
2395 }
2396 }
2397
2398 // sanity check this feed turns up as the expected slice
2399 assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2400
2401 MemBarNode *trailing = NULL;
2402 // be sure we have a trailing membar fed by the merge
2403 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2404 x = mm->fast_out(i);
2405 if (x->is_MemBar()) {
2406 // if this is an intervening cpu order membar skip to the
2407 // following membar
2408 if (x->Opcode() == Op_MemBarCPUOrder) {
2409 MemBarNode *y = x->as_MemBar();
2410 y = child_membar(y);
2411 if (y != NULL) {
2412 x = y;
2413 }
2414 }
2415 if (x->Opcode() == Op_MemBarVolatile ||
2416 x->Opcode() == Op_MemBarAcquire) {
2417 trailing = x->as_MemBar();
2418 }
2419 break;
2420 }
2421 }
2422
2423 return trailing;
2424 }
2425
2426 // trailing_to_card_mark
2427 //
2428 // graph traversal helper which detects extra, non-normal Mem feed
2429 // from a trailing volatile membar to a preceding card mark volatile
2430 // membar i.e. it identifies whether one of the three possible extra
2431 // GC post-write Mem flow subgraphs is present
2432 //
2433 // this predicate checks for the same flow as the previous predicate
2434 // but starting from the bottom rather than the top.
2435 //
2436 // if the configuration is present returns the card mark membar
2437 // otherwise NULL
2438 //
2439 // n.b. the supplied membar is expected to be a trailing
2440 // MemBarVolatile or MemBarAcquire i.e. the caller must ensure the
2441 // input node has the correct opcode
2442
2443 MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2444 {
2445 assert(trailing->Opcode() == Op_MemBarVolatile ||
2446 trailing->Opcode() == Op_MemBarAcquire,
2447 "expecting a volatile or acquire membar");
2448 assert(!is_card_mark_membar(trailing),
2449 "not expecting a card mark membar");
2450
2451 Node *x = (Node *)trailing;
2452
2453 // look for a preceding cpu order membar
2454 MemBarNode *y = parent_membar(x->as_MemBar());
2455 if (y != NULL) {
2456 // make sure it is a cpu order membar
2457 if (y->Opcode() != Op_MemBarCPUOrder) {
2458 // this is nto the graph we were looking for
2459 return NULL;
2460 }
2461 // start the search from here
2462 x = y;
2463 }
2464
2465 // the Mem feed to the membar should be a merge
2466 x = x->in(TypeFunc::Memory);
2467 if (!x->is_MergeMem()) {
2468 return NULL;
2469 }
2470
2471 MergeMemNode *mm = x->as_MergeMem();
2472
2473 x = mm->in(Compile::AliasIdxBot);
2474 // with G1 we may possibly see a Phi or two before we see a Memory
2475 // Proj from the card mark membar
2476
2477 const int MAX_PHIS = max_phis(); // max phis we will search through
2478 int phicount = 0; // current search count
2479
2480 bool retry_feed = !x->is_Proj();
2481
2482 while (retry_feed) {
2483 if (x->is_Phi() && phicount++ < MAX_PHIS) {
2484 PhiNode *phi = x->as_Phi();
2485 ProjNode *proj = NULL;
2486 PhiNode *nextphi = NULL;
2487 bool found_leading = false;
2488 for (uint i = 1; i < phi->req(); i++) {
2489 x = phi->in(i);
2490 if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2491 nextphi = x->as_Phi();
2492 } else if (x->is_Proj()) {
2493 int opcode = x->in(0)->Opcode();
2494 if (opcode == Op_MemBarVolatile) {
2495 proj = x->as_Proj();
2496 } else if (opcode == Op_MemBarRelease ||
2497 opcode == Op_MemBarCPUOrder) {
2498 // probably a leading membar
2499 found_leading = true;
2500 }
2501 }
2502 }
2503 // if we found a correct looking proj then retry from there
2504 // otherwise we must see a leading and a phi or this the
2505 // wrong config
2506 if (proj != NULL) {
2507 x = proj;
2508 retry_feed = false;
2509 } else if (found_leading && nextphi != NULL) {
2510 // retry from this phi to check phi2
2550 //
2551 // n.b. the input membar is expected to be either a volatile or
2552 // acquire membar but in the former case must *not* be a card mark
2553 // membar.
2554
2555 MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2556 {
2557 assert((trailing->Opcode() == Op_MemBarAcquire ||
2558 trailing->Opcode() == Op_MemBarVolatile),
2559 "expecting an acquire or volatile membar");
2560 assert((trailing->Opcode() != Op_MemBarVolatile ||
2561 !is_card_mark_membar(trailing)),
2562 "not expecting a card mark membar");
2563
2564 MemBarNode *leading = normal_to_leading(trailing);
2565
2566 if (leading) {
2567 return leading;
2568 }
2569
2570 // there is no normal path from trailing to leading membar. see if
2571 // we can arrive via a card mark membar
2572
2573 MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2574
2575 if (!card_mark_membar) {
2576 return NULL;
2577 }
2578
2579 return normal_to_leading(card_mark_membar);
2580 }
2581
2582 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2583
2584 bool unnecessary_acquire(const Node *barrier)
2585 {
2586 assert(barrier->is_MemBar(), "expecting a membar");
2587
2588 if (UseBarriersForVolatile) {
2589 // we need to plant a dmb
2590 return false;
2591 }
2592
2593 // a volatile read derived from bytecode (or also from an inlined
2594 // SHA field read via LibraryCallKit::load_field_from_object)
2595 // manifests as a LoadX[mo_acquire] followed by an acquire membar
2596 // with a bogus read dependency on it's preceding load. so in those
2597 // cases we will find the load node at the PARMS offset of the
2598 // acquire membar. n.b. there may be an intervening DecodeN node.
2599
2600 Node *x = barrier->lookup(TypeFunc::Parms);
2601 if (x) {
2602 // we are starting from an acquire and it has a fake dependency
2603 //
2604 // need to check for
2605 //
2606 // LoadX[mo_acquire]
2607 // { |1 }
2608 // {DecodeN}
2609 // |Parms
2610 // MemBarAcquire*
2611 //
2612 // where * tags node we were passed
2613 // and |k means input k
2614 if (x->is_DecodeNarrowPtr()) {
2615 x = x->in(1);
2616 }
2617
2618 return (x->is_Load() && x->as_Load()->is_acquire());
2619 }
2620
2621 // other option for unnecessary membar is that it is a trailing node
2622 // belonging to a CAS
2623
2624 MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2625
2626 return leading != NULL;
2627 }
2628
2629 bool needs_acquiring_load(const Node *n)
2630 {
2631 assert(n->is_Load(), "expecting a load");
2632 if (UseBarriersForVolatile) {
2633 // we use a normal load and a dmb
2634 return false;
2635 }
2636
2637 LoadNode *ld = n->as_Load();
2638
2639 if (!ld->is_acquire()) {
2640 return false;
2641 }
2657 // if we hit a DecodeNarrowPtr we reset the start node and restart
2658 // the search through the outputs
2659 restart:
2660
2661 for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2662 Node *x = start->fast_out(i);
2663 if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2664 mbacq = x;
2665 } else if (!mbacq &&
2666 (x->is_DecodeNarrowPtr() ||
2667 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2668 start = x;
2669 goto restart;
2670 }
2671 }
2672
2673 if (mbacq) {
2674 return true;
2675 }
2676
2677 return false;
2678 }
2679
2680 bool unnecessary_release(const Node *n)
2681 {
2682 assert((n->is_MemBar() &&
2683 n->Opcode() == Op_MemBarRelease),
2684 "expecting a release membar");
2685
2686 if (UseBarriersForVolatile) {
2687 // we need to plant a dmb
2688 return false;
2689 }
2690
2691 // if there is a dependent CPUOrder barrier then use that as the
2692 // leading
2693
2694 MemBarNode *barrier = n->as_MemBar();
2695 // check for an intervening cpuorder membar
2696 MemBarNode *b = child_membar(barrier);
2697 if (b && b->Opcode() == Op_MemBarCPUOrder) {
2717 }
2718
2719 bool unnecessary_volatile(const Node *n)
2720 {
2721 // assert n->is_MemBar();
2722 if (UseBarriersForVolatile) {
2723 // we need to plant a dmb
2724 return false;
2725 }
2726
2727 MemBarNode *mbvol = n->as_MemBar();
2728
2729 // first we check if this is part of a card mark. if so then we have
2730 // to generate a StoreLoad barrier
2731
2732 if (is_card_mark_membar(mbvol)) {
2733 return false;
2734 }
2735
2736 // ok, if it's not a card mark then we still need to check if it is
2737 // a trailing membar of a volatile put graph.
2738
2739 return (trailing_to_leading(mbvol) != NULL);
2740 }
2741
2742 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2743
2744 bool needs_releasing_store(const Node *n)
2745 {
2746 // assert n->is_Store();
2747 if (UseBarriersForVolatile) {
2748 // we use a normal store and dmb combination
2749 return false;
2750 }
2751
2752 StoreNode *st = n->as_Store();
2753
2754 // the store must be marked as releasing
2755 if (!st->is_release()) {
2756 return false;
2757 }
2825
2826 x = proj->lookup(0);
2827
2828 assert (x && x->is_MemBar(), "CAS not fed by membar!");
2829
2830 MemBarNode *barrier = x->as_MemBar();
2831
2832 // the barrier must be a cpuorder mmebar fed by a release membar
2833
2834 assert(barrier->Opcode() == Op_MemBarCPUOrder,
2835 "CAS not fed by cpuorder membar!");
2836
2837 MemBarNode *b = parent_membar(barrier);
2838 assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2839 "CAS not fed by cpuorder+release membar pair!");
2840
2841 // does this lead a normal subgraph?
2842 MemBarNode *mbar = leading_to_normal(barrier);
2843
2844 assert(mbar != NULL, "CAS not embedded in normal graph!");
2845
2846 // if this is a card mark membar check we have a trailing acquire
2847
2848 if (is_card_mark_membar(mbar)) {
2849 mbar = card_mark_to_trailing(mbar);
2850 }
2851
2852 assert(mbar != NULL, "card mark membar for CAS not embedded in normal graph!");
2853
2854 assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2855 #endif // ASSERT
2856 // so we can just return true here
2857 return true;
2858 }
2859
2860 // predicate controlling translation of StoreCM
2861 //
2862 // returns true if a StoreStore must precede the card write otherwise
2863 // false
2864
2865 bool unnecessary_storestore(const Node *storecm)
2866 {
2867 assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM");
2868
2869 // we only ever need to generate a dmb ishst between an object put
2870 // and the associated card mark when we are using CMS without
2871 // conditional card marking
2872
|