1024
1025 static uint size_exception_handler() {
1026 return MacroAssembler::far_branch_size();
1027 }
1028
1029 static uint size_deopt_handler() {
1030 // count one adr and one far branch instruction
1031 return 4 * NativeInstruction::instruction_size;
1032 }
1033 };
1034
1035 // graph traversal helpers
1036
1037 MemBarNode *parent_membar(const Node *n);
1038 MemBarNode *child_membar(const MemBarNode *n);
1039 bool leading_membar(const MemBarNode *barrier);
1040
1041 bool is_card_mark_membar(const MemBarNode *barrier);
1042 bool is_CAS(int opcode);
1043
1044 MemBarNode *leading_to_normal(MemBarNode *leading);
1045 MemBarNode *normal_to_leading(const MemBarNode *barrier);
1046 MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1047 MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1048 MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1049
1050 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1051
1052 bool unnecessary_acquire(const Node *barrier);
1053 bool needs_acquiring_load(const Node *load);
1054
1055 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1056
1057 bool unnecessary_release(const Node *barrier);
1058 bool unnecessary_volatile(const Node *barrier);
1059 bool needs_releasing_store(const Node *store);
1060
1061 // predicate controlling translation of CompareAndSwapX
1062 bool needs_acquiring_load_exclusive(const Node *load);
1063
1064 // predicate controlling translation of StoreCM
1065 bool unnecessary_storestore(const Node *storecm);
1066 %}
1067
1405
1406 return false;
1407 }
1408
1409
1410 // 3) helper predicates to traverse volatile put or CAS graphs which
1411 // may contain GC barrier subgraphs
1412
1413 // Preamble
1414 // --------
1415 //
1416 // for volatile writes we can omit generating barriers and employ a
1417 // releasing store when we see a node sequence sequence with a
1418 // leading MemBarRelease and a trailing MemBarVolatile as follows
1419 //
1420 // MemBarRelease
1421 // { || } -- optional
1422 // {MemBarCPUOrder}
1423 // || \\
1424 // || StoreX[mo_release]
1425 // | \ /
1426 // | MergeMem
1427 // | /
1428 // MemBarVolatile
1429 //
1430 // where
1431 // || and \\ represent Ctl and Mem feeds via Proj nodes
1432 // | \ and / indicate further routing of the Ctl and Mem feeds
1433 //
1434 // this is the graph we see for non-object stores. however, for a
1435 // volatile Object store (StoreN/P) we may see other nodes below the
1436 // leading membar because of the need for a GC pre- or post-write
1437 // barrier.
1438 //
1439 // with most GC configurations we with see this simple variant which
1440 // includes a post-write barrier card mark.
1441 //
1442 // MemBarRelease______________________________
1443 // || \\ Ctl \ \\
1444 // || StoreN/P[mo_release] CastP2X StoreB/CM
1445 // | \ / . . . /
1446 // | MergeMem
1447 // | /
1448 // || /
1449 // MemBarVolatile
1450 //
1451 // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1452 // the object address to an int used to compute the card offset) and
1453 // Ctl+Mem to a StoreB node (which does the actual card mark).
1454 //
1455 // n.b. a StoreCM node will only appear in this configuration when
1456 // using CMS. StoreCM differs from a normal card mark write (StoreB)
1457 // because it implies a requirement to order visibility of the card
1458 // mark (StoreCM) relative to the object put (StoreP/N) using a
1459 // StoreStore memory barrier (arguably this ought to be represented
1460 // explicitly in the ideal graph but that is not how it works). This
1461 // ordering is required for both non-volatile and volatile
1462 // puts. Normally that means we need to translate a StoreCM using
1463 // the sequence
1464 //
1465 // dmb ishst
1466 // stlrb
1467 //
1468 // However, in the case of a volatile put if we can recognise this
1469 // configuration and plant an stlr for the object write then we can
1470 // omit the dmb and just plant an strb since visibility of the stlr
1471 // is ordered before visibility of subsequent stores. StoreCM nodes
1472 // also arise when using G1 or using CMS with conditional card
1473 // marking. In these cases (as we shall see) we don't need to insert
1474 // the dmb when translating StoreCM because there is already an
1475 // intervening StoreLoad barrier between it and the StoreP/N.
1476 //
1477 // It is also possible to perform the card mark conditionally on it
1478 // currently being unmarked in which case the volatile put graph
1479 // will look slightly different
1480 //
1481 // MemBarRelease____________________________________________
1482 // || \\ Ctl \ Ctl \ \\ Mem \
1483 // || StoreN/P[mo_release] CastP2X If LoadB |
1484 // | \ / \ |
1485 // | MergeMem . . . StoreB
1486 // | / /
1487 // || /
1488 // MemBarVolatile
1489 //
1490 // It is worth noting at this stage that both the above
1491 // configurations can be uniquely identified by checking that the
1492 // memory flow includes the following subgraph:
1493 //
1494 // MemBarRelease
1495 // {MemBarCPUOrder}
1496 // | \ . . .
1497 // | StoreX[mo_release] . . .
1498 // | /
1499 // MergeMem
1500 // |
1501 // MemBarVolatile
1502 //
1503 // This is referred to as a *normal* subgraph. It can easily be
1504 // detected starting from any candidate MemBarRelease,
1505 // StoreX[mo_release] or MemBarVolatile.
1506 //
1507 // A simple variation on this normal case occurs for an unsafe CAS
1508 // operation. The basic graph for a non-object CAS is
1509 //
1510 // MemBarRelease
1511 // ||
1512 // MemBarCPUOrder
1513 // || \\ . . .
1514 // || CompareAndSwapX
1515 // || |
1516 // || SCMemProj
1517 // | \ /
1518 // | MergeMem
1519 // | /
1520 // MemBarCPUOrder
1521 // ||
1522 // MemBarAcquire
1523 //
1524 // The same basic variations on this arrangement (mutatis mutandis)
1525 // occur when a card mark is introduced. i.e. we se the same basic
1526 // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1527 // tail of the graph is a pair comprising a MemBarCPUOrder +
1528 // MemBarAcquire.
1529 //
1530 // So, in the case of a CAS the normal graph has the variant form
1531 //
1532 // MemBarRelease
1533 // MemBarCPUOrder
1534 // | \ . . .
1535 // | CompareAndSwapX . . .
1536 // | |
1537 // | SCMemProj
1538 // | / . . .
1539 // MergeMem
1540 // |
1541 // MemBarCPUOrder
1542 // MemBarAcquire
1543 //
1544 // This graph can also easily be detected starting from any
1545 // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1546 //
1547 // the code below uses two helper predicates, leading_to_normal and
1548 // normal_to_leading to identify these normal graphs, one validating
1549 // the layout starting from the top membar and searching down and
1550 // the other validating the layout starting from the lower membar
1551 // and searching up.
1552 //
1553 // There are two special case GC configurations when a normal graph
1554 // may not be generated: when using G1 (which always employs a
1555 // conditional card mark); and when using CMS with conditional card
1556 // marking configured. These GCs are both concurrent rather than
1557 // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1558 // graph between the leading and trailing membar nodes, in
1559 // particular enforcing stronger memory serialisation beween the
1560 // object put and the corresponding conditional card mark. CMS
1561 // employs a post-write GC barrier while G1 employs both a pre- and
1562 // post-write GC barrier. Of course the extra nodes may be absent --
1563 // they are only inserted for object puts. This significantly
1564 // complicates the task of identifying whether a MemBarRelease,
1565 // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1566 // when using these GC configurations (see below). It adds similar
1567 // complexity to the task of identifying whether a MemBarRelease,
1568 // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1569 //
1570 // In both cases the post-write subtree includes an auxiliary
1571 // MemBarVolatile (StoreLoad barrier) separating the object put and
1572 // the read of the corresponding card. This poses two additional
1573 // problems.
1574 //
1575 // Firstly, a card mark MemBarVolatile needs to be distinguished
1576 // from a normal trailing MemBarVolatile. Resolving this first
1577 // problem is straightforward: a card mark MemBarVolatile always
1578 // projects a Mem feed to a StoreCM node and that is a unique marker
1579 //
1580 // MemBarVolatile (card mark)
1581 // C | \ . . .
1582 // | StoreCM . . .
1583 // . . .
1584 //
1585 // The second problem is how the code generator is to translate the
1586 // card mark barrier? It always needs to be translated to a "dmb
1587 // ish" instruction whether or not it occurs as part of a volatile
1588 // put. A StoreLoad barrier is needed after the object put to ensure
1589 // i) visibility to GC threads of the object put and ii) visibility
1590 // to the mutator thread of any card clearing write by a GC
1591 // thread. Clearly a normal store (str) will not guarantee this
1592 // ordering but neither will a releasing store (stlr). The latter
1593 // guarantees that the object put is visible but does not guarantee
1594 // that writes by other threads have also been observed.
1595 //
1596 // So, returning to the task of translating the object put and the
1597 // leading/trailing membar nodes: what do the non-normal node graph
1598 // look like for these 2 special cases? and how can we determine the
1599 // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1600 // in both normal and non-normal cases?
1601 //
1602 // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1603 // which selects conditonal execution based on the value loaded
1604 // (LoadB) from the card. Ctl and Mem are fed to the If via an
1605 // intervening StoreLoad barrier (MemBarVolatile).
1606 //
1607 // So, with CMS we may see a node graph for a volatile object store
1608 // which looks like this
1609 //
1610 // MemBarRelease
1611 // MemBarCPUOrder_(leading)__________________
1612 // C | M \ \\ C \
1613 // | \ StoreN/P[mo_release] CastP2X
1614 // | Bot \ /
1615 // | MergeMem
1616 // | /
1617 // MemBarVolatile (card mark)
1618 // C | || M |
1619 // | LoadB |
1620 // | | |
1621 // | Cmp |\
1622 // | / | \
1623 // If | \
1624 // | \ | \
1625 // IfFalse IfTrue | \
1626 // \ / \ | \
1627 // \ / StoreCM |
1628 // \ / | |
1629 // Region . . . |
1630 // | \ /
1631 // | . . . \ / Bot
1632 // | MergeMem
1633 // | |
1634 // MemBarVolatile (trailing)
1635 //
1636 // The first MergeMem merges the AliasIdxBot Mem slice from the
1637 // leading membar and the oopptr Mem slice from the Store into the
1638 // card mark membar. The trailing MergeMem merges the AliasIdxBot
1639 // Mem slice from the card mark membar and the AliasIdxRaw slice
1640 // from the StoreCM into the trailing membar (n.b. the latter
1641 // proceeds via a Phi associated with the If region).
1642 //
1643 // The graph for a CAS varies slightly, the obvious difference being
1644 // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1645 // and the trailing MemBarVolatile by a MemBarCPUOrder +
1646 // MemBarAcquire pair. The other important difference is that the
1647 // CompareAndSwap node's SCMemProj is not merged into the card mark
1648 // membar - it still feeds the trailing MergeMem. This also means
1649 // that the card mark membar receives its Mem feed directly from the
1650 // leading membar rather than via a MergeMem.
1651 //
1652 // MemBarRelease
1653 // MemBarCPUOrder__(leading)_________________________
1654 // || \\ C \
1655 // MemBarVolatile (card mark) CompareAndSwapN/P CastP2X
1656 // C | || M | |
1657 // | LoadB | ______/|
1658 // | | | / |
1659 // | Cmp | / SCMemProj
1660 // | / | / |
1661 // If | / /
1662 // | \ | / /
1663 // IfFalse IfTrue | / /
1664 // \ / \ |/ prec /
1665 // \ / StoreCM /
1666 // \ / | /
1667 // Region . . . /
1668 // | \ /
1669 // | . . . \ / Bot
1670 // | MergeMem
1671 // | |
1672 // MemBarCPUOrder
1673 // MemBarAcquire (trailing)
1674 //
1675 // This has a slightly different memory subgraph to the one seen
1676 // previously but the core of it is the same as for the CAS normal
1677 // sungraph
1678 //
1679 // MemBarRelease
1680 // MemBarCPUOrder____
1681 // || \ . . .
1682 // MemBarVolatile CompareAndSwapX . . .
1683 // | \ |
1684 // . . . SCMemProj
1685 // | / . . .
1686 // MergeMem
1687 // |
1688 // MemBarCPUOrder
1689 // MemBarAcquire
1690 //
1691 //
1692 // G1 is quite a lot more complicated. The nodes inserted on behalf
1693 // of G1 may comprise: a pre-write graph which adds the old value to
1694 // the SATB queue; the releasing store itself; and, finally, a
1695 // post-write graph which performs a card mark.
1696 //
1697 // The pre-write graph may be omitted, but only when the put is
1698 // writing to a newly allocated (young gen) object and then only if
1699 // there is a direct memory chain to the Initialize node for the
1700 // object allocation. This will not happen for a volatile put since
1701 // any memory chain passes through the leading membar.
1702 //
1703 // The pre-write graph includes a series of 3 If tests. The outermost
1704 // If tests whether SATB is enabled (no else case). The next If tests
1705 // whether the old value is non-NULL (no else case). The third tests
1706 // whether the SATB queue index is > 0, if so updating the queue. The
1707 // else case for this third If calls out to the runtime to allocate a
1708 // new queue buffer.
1709 //
1710 // So with G1 the pre-write and releasing store subgraph looks like
1711 // this (the nested Ifs are omitted).
1712 //
1713 // MemBarRelease (leading)____________
1714 // C | || M \ M \ M \ M \ . . .
1715 // | LoadB \ LoadL LoadN \
1716 // | / \ \
1717 // If |\ \
1718 // | \ | \ \
1719 // IfFalse IfTrue | \ \
1720 // | | | \ |
1721 // | If | /\ |
1722 // | | \ |
1723 // | \ |
1724 // | . . . \ |
1725 // | / | / | |
1726 // Region Phi[M] | |
1727 // | \ | | |
1728 // | \_____ | ___ | |
1729 // C | C \ | C \ M | |
1730 // | CastP2X | StoreN/P[mo_release] |
1731 // | | | |
1732 // C | M | M | M |
1733 // \ | | /
1734 // . . .
1735 // (post write subtree elided)
1736 // . . .
1737 // C \ M /
1738 // MemBarVolatile (trailing)
1739 //
1740 // n.b. the LoadB in this subgraph is not the card read -- it's a
1741 // read of the SATB queue active flag.
1742 //
1743 // Once again the CAS graph is a minor variant on the above with the
1744 // expected substitutions of CompareAndSawpX for StoreN/P and
1745 // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1746 //
1747 // The G1 post-write subtree is also optional, this time when the
1748 // new value being written is either null or can be identified as a
1749 // newly allocated (young gen) object with no intervening control
1750 // flow. The latter cannot happen but the former may, in which case
1751 // the card mark membar is omitted and the memory feeds form the
1752 // leading membar and the SToreN/P are merged direct into the
1753 // trailing membar as per the normal subgraph. So, the only special
1754 // case which arises is when the post-write subgraph is generated.
1755 //
1756 // The kernel of the post-write G1 subgraph is the card mark itself
1757 // which includes a card mark memory barrier (MemBarVolatile), a
1758 // card test (LoadB), and a conditional update (If feeding a
1759 // StoreCM). These nodes are surrounded by a series of nested Ifs
1760 // which try to avoid doing the card mark. The top level If skips if
1761 // the object reference does not cross regions (i.e. it tests if
1762 // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1763 // need not be recorded. The next If, which skips on a NULL value,
1764 // may be absent (it is not generated if the type of value is >=
1765 // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1766 // checking if card_val != young). n.b. although this test requires
1767 // a pre-read of the card it can safely be done before the StoreLoad
1768 // barrier. However that does not bypass the need to reread the card
1769 // after the barrier.
1770 //
1771 // (pre-write subtree elided)
1772 // . . . . . . . . . . . .
1773 // C | M | M | M |
1774 // Region Phi[M] StoreN |
1775 // | / \ | |
1776 // / \_______ / \ | |
1777 // C / C \ . . . \ | |
1778 // If CastP2X . . . | | |
1779 // / \ | | |
1780 // / \ | | |
1781 // IfFalse IfTrue | | |
1782 // | | | | /|
1783 // | If | | / |
1784 // | / \ | | / |
1785 // | / \ \ | / |
1786 // | IfFalse IfTrue MergeMem |
1787 // | . . . / \ / |
1788 // | / \ / |
1789 // | IfFalse IfTrue / |
1790 // | . . . | / |
1791 // | If / |
1792 // | / \ / |
1793 // | / \ / |
1794 // | IfFalse IfTrue / |
1795 // | . . . | / |
1796 // | \ / |
1797 // | \ / |
1798 // | MemBarVolatile__(card mark) |
1799 // | || C | M \ M \ |
1800 // | LoadB If | | |
1801 // | / \ | | |
1802 // | . . . | | |
1803 // | \ | | /
1804 // | StoreCM | /
1805 // | . . . | /
1806 // | _________/ /
1807 // | / _____________/
1808 // | . . . . . . | / /
1809 // | | | / _________/
1810 // | | Phi[M] / /
1811 // | | | / /
1812 // | | | / /
1813 // | Region . . . Phi[M] _____/
1814 // | / | /
1815 // | | /
1816 // | . . . . . . | /
1817 // | / | /
1818 // Region | | Phi[M]
1819 // | | | / Bot
1820 // \ MergeMem
1821 // \ /
1822 // MemBarVolatile
1823 //
1824 // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1825 // from the leading membar and the oopptr Mem slice from the Store
1826 // into the card mark membar i.e. the memory flow to the card mark
1827 // membar still looks like a normal graph.
1828 //
1829 // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1830 // Mem slices (from the StoreCM and other card mark queue stores).
1831 // However in this case the AliasIdxBot Mem slice does not come
1832 // direct from the card mark membar. It is merged through a series
1833 // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1834 // from the leading membar with the Mem feed from the card mark
1835 // membar. Each Phi corresponds to one of the Ifs which may skip
1836 // around the card mark membar. So when the If implementing the NULL
1837 // value check has been elided the total number of Phis is 2
1838 // otherwise it is 3.
1839 //
1840 // The CAS graph when using G1GC also includes a pre-write subgraph
1841 // and an optional post-write subgraph. Teh sam evarioations are
1842 // introduced as for CMS with conditional card marking i.e. the
1843 // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1844 // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1845 // Mem feed from the CompareAndSwapP/N includes a precedence
1846 // dependency feed to the StoreCM and a feed via an SCMemProj to the
1847 // trailing membar. So, as before the configuration includes the
1848 // normal CAS graph as a subgraph of the memory flow.
1849 //
1850 // So, the upshot is that in all cases the volatile put graph will
1851 // include a *normal* memory subgraph betwen the leading membar and
1852 // its child membar, either a volatile put graph (including a
1853 // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1854 // When that child is not a card mark membar then it marks the end
1855 // of the volatile put or CAS subgraph. If the child is a card mark
1856 // membar then the normal subgraph will form part of a volatile put
1857 // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1858 // to a trailing barrier via a MergeMem. That feed is either direct
1859 // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1860 // memory flow (for G1).
1861 //
1862 // The predicates controlling generation of instructions for store
1863 // and barrier nodes employ a few simple helper functions (described
1864 // below) which identify the presence or absence of all these
1865 // subgraph configurations and provide a means of traversing from
1866 // one node in the subgraph to another.
1867
1868 // is_CAS(int opcode)
1869 //
1870 // return true if opcode is one of the possible CompareAndSwapX
1871 // values otherwise false.
1872
1873 bool is_CAS(int opcode)
1874 {
1875 return (opcode == Op_CompareAndSwapI ||
1876 opcode == Op_CompareAndSwapL ||
1877 opcode == Op_CompareAndSwapN ||
1878 opcode == Op_CompareAndSwapP);
1879 }
1880
1881 // leading_to_normal
1882 //
1883 //graph traversal helper which detects the normal case Mem feed from
1884 // a release membar (or, optionally, its cpuorder child) to a
1885 // dependent volatile membar i.e. it ensures that one or other of
1886 // the following Mem flow subgraph is present.
1887 //
1888 // MemBarRelease
1889 // MemBarCPUOrder {leading}
1890 // | \ . . .
1891 // | StoreN/P[mo_release] . . .
1892 // | /
1893 // MergeMem
1894 // |
1895 // MemBarVolatile {trailing or card mark}
1896 //
1897 // MemBarRelease
1898 // MemBarCPUOrder {leading}
1899 // | \ . . .
1900 // | CompareAndSwapX . . .
1901 // |
1902 // . . . SCMemProj
1903 // \ |
1904 // | MergeMem
1905 // | /
1906 // MemBarCPUOrder
1907 // MemBarAcquire {trailing}
1908 //
1909 // if the correct configuration is present returns the trailing
1910 // membar otherwise NULL.
1911 //
1912 // the input membar is expected to be either a cpuorder membar or a
1913 // release membar. in the latter case it should not have a cpu membar
1914 // child.
1915 //
1916 // the returned value may be a card mark or trailing membar
1917 //
1918
1919 MemBarNode *leading_to_normal(MemBarNode *leading)
1920 {
1921 assert((leading->Opcode() == Op_MemBarRelease ||
1922 leading->Opcode() == Op_MemBarCPUOrder),
1923 "expecting a volatile or cpuroder membar!");
1924
1925 // check the mem flow
1926 ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1927
1928 if (!mem) {
1929 return NULL;
1930 }
1931
1932 Node *x = NULL;
1933 StoreNode * st = NULL;
1934 LoadStoreNode *cas = NULL;
1935 MergeMemNode *mm = NULL;
1936
1937 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1938 x = mem->fast_out(i);
1939 if (x->is_MergeMem()) {
1940 if (mm != NULL) {
1941 return NULL;
1942 }
1943 // two merge mems is one too many
1944 mm = x->as_MergeMem();
1945 } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1946 // two releasing stores/CAS nodes is one too many
1947 if (st != NULL || cas != NULL) {
1948 return NULL;
1949 }
1950 st = x->as_Store();
1951 } else if (is_CAS(x->Opcode())) {
1952 if (st != NULL || cas != NULL) {
1953 return NULL;
1954 }
1955 cas = x->as_LoadStore();
1956 }
1957 }
1958
1959 // must have a store or a cas
1960 if (!st && !cas) {
1961 return NULL;
1962 }
1963
1964 // must have a merge if we also have st
1965 if (st && !mm) {
1966 return NULL;
1967 }
1968
1969 Node *y = NULL;
1970 if (cas) {
1971 // look for an SCMemProj
1972 for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
1973 x = cas->fast_out(i);
1974 if (x->is_Proj()) {
1975 y = x;
1976 break;
1977 }
1978 }
1979 if (y == NULL) {
1980 return NULL;
1981 }
1982 // the proj must feed a MergeMem
1983 for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
1984 x = y->fast_out(i);
1985 if (x->is_MergeMem()) {
1986 mm = x->as_MergeMem();
1987 break;
1988 }
1989 }
1990 if (mm == NULL)
1991 return NULL;
1992 } else {
1993 // ensure the store feeds the existing mergemem;
1994 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1995 if (st->fast_out(i) == mm) {
1996 y = st;
1997 break;
1998 }
1999 }
2000 if (y == NULL) {
2001 return NULL;
2002 }
2003 }
2004
2005 MemBarNode *mbar = NULL;
2006 // ensure the merge feeds to the expected type of membar
2007 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2008 x = mm->fast_out(i);
2009 if (x->is_MemBar()) {
2010 int opcode = x->Opcode();
2011 if (opcode == Op_MemBarVolatile && st) {
2012 mbar = x->as_MemBar();
2013 } else if (cas && opcode == Op_MemBarCPUOrder) {
2014 MemBarNode *y = x->as_MemBar();
2015 y = child_membar(y);
2016 if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2017 mbar = y;
2018 }
2019 }
2020 break;
2021 }
2022 }
2023
2024 return mbar;
2025 }
2026
2027 // normal_to_leading
2028 //
2029 // graph traversal helper which detects the normal case Mem feed
2030 // from either a card mark or a trailing membar to a preceding
2031 // release membar (optionally its cpuorder child) i.e. it ensures
2032 // that one or other of the following Mem flow subgraphs is present.
2033 //
2034 // MemBarRelease
2035 // MemBarCPUOrder {leading}
2036 // | \ . . .
2037 // | StoreN/P[mo_release] . . .
2038 // | /
2039 // MergeMem
2040 // |
2041 // MemBarVolatile {card mark or trailing}
2042 //
2043 // MemBarRelease
2044 // MemBarCPUOrder {leading}
2045 // | \ . . .
2046 // | CompareAndSwapX . . .
2047 // |
2048 // . . . SCMemProj
2049 // \ |
2050 // | MergeMem
2051 // | /
2052 // MemBarCPUOrder
2053 // MemBarAcquire {trailing}
2054 //
2055 // this predicate checks for the same flow as the previous predicate
2056 // but starting from the bottom rather than the top.
2057 //
2058 // if the configuration is present returns the cpuorder member for
2059 // preference or when absent the release membar otherwise NULL.
2060 //
2061 // n.b. the input membar is expected to be a MemBarVolatile but
2062 // need not be a card mark membar.
2063
2064 MemBarNode *normal_to_leading(const MemBarNode *barrier)
2065 {
2066 // input must be a volatile membar
2067 assert((barrier->Opcode() == Op_MemBarVolatile ||
2068 barrier->Opcode() == Op_MemBarAcquire),
2069 "expecting a volatile or an acquire membar");
2070 Node *x;
2071 bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2072
2073 // if we have an acquire membar then it must be fed via a CPUOrder
2074 // membar
2075
2076 if (is_cas) {
2077 // skip to parent barrier which must be a cpuorder
2078 x = parent_membar(barrier);
2079 if (x->Opcode() != Op_MemBarCPUOrder)
2080 return NULL;
2081 } else {
2082 // start from the supplied barrier
2083 x = (Node *)barrier;
2084 }
2085
2086 // the Mem feed to the membar should be a merge
2087 x = x ->in(TypeFunc::Memory);
2088 if (!x->is_MergeMem())
2089 return NULL;
2162 if (st == NULL) {
2163 // nothing more to check
2164 return leading;
2165 } else {
2166 // we should not have a store if we started from an acquire
2167 if (is_cas) {
2168 return NULL;
2169 }
2170
2171 // the store should feed the merge we used to get here
2172 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2173 if (st->fast_out(i) == mm) {
2174 return leading;
2175 }
2176 }
2177 }
2178
2179 return NULL;
2180 }
2181
2182 // card_mark_to_trailing
2183 //
2184 // graph traversal helper which detects extra, non-normal Mem feed
2185 // from a card mark volatile membar to a trailing membar i.e. it
2186 // ensures that one of the following three GC post-write Mem flow
2187 // subgraphs is present.
2188 //
2189 // 1)
2190 // . . .
2191 // |
2192 // MemBarVolatile (card mark)
2193 // | |
2194 // | StoreCM
2195 // | |
2196 // | . . .
2197 // Bot | /
2198 // MergeMem
2199 // |
2200 // |
2201 // MemBarVolatile {trailing}
2202 //
2203 // 2)
2204 // MemBarRelease/CPUOrder (leading)
2205 // |
2206 // |
2207 // |\ . . .
2208 // | \ |
2209 // | \ MemBarVolatile (card mark)
2210 // | \ | |
2211 // \ \ | StoreCM . . .
2212 // \ \ |
2213 // \ Phi
2214 // \ /
2215 // Phi . . .
2216 // Bot | /
2217 // MergeMem
2218 // |
2219 // MemBarVolatile {trailing}
2220 //
2221 //
2222 // 3)
2223 // MemBarRelease/CPUOrder (leading)
2224 // |
2225 // |\
2226 // | \
2227 // | \ . . .
2228 // | \ |
2229 // |\ \ MemBarVolatile (card mark)
2230 // | \ \ | |
2231 // | \ \ | StoreCM . . .
2232 // | \ \ |
2233 // \ \ Phi
2234 // \ \ /
2235 // \ Phi
2236 // \ /
2237 // Phi . . .
2238 // Bot | /
2239 // MergeMem
2240 // |
2241 // |
2242 // MemBarVolatile {trailing}
2243 //
2244 // configuration 1 is only valid if UseConcMarkSweepGC &&
2245 // UseCondCardMark
2246 //
2247 // configurations 2 and 3 are only valid if UseG1GC.
2248 //
2249 // if a valid configuration is present returns the trailing membar
2250 // otherwise NULL.
2251 //
2252 // n.b. the supplied membar is expected to be a card mark
2253 // MemBarVolatile i.e. the caller must ensure the input node has the
2254 // correct operand and feeds Mem to a StoreCM node
2255
2256 MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2257 {
2258 // input must be a card mark volatile membar
2259 assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2260
2261 Node *feed = barrier->proj_out(TypeFunc::Memory);
2262 Node *x;
2263 MergeMemNode *mm = NULL;
2264
2265 const int MAX_PHIS = 3; // max phis we will search through
2266 int phicount = 0; // current search count
2267
2268 bool retry_feed = true;
2269 while (retry_feed) {
2270 // see if we have a direct MergeMem feed
2271 for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2272 x = feed->fast_out(i);
2273 // the correct Phi will be merging a Bot memory slice
2274 if (x->is_MergeMem()) {
2275 mm = x->as_MergeMem();
2276 break;
2277 }
2278 }
2279 if (mm) {
2280 retry_feed = false;
2281 } else if (UseG1GC & phicount++ < MAX_PHIS) {
2282 // the barrier may feed indirectly via one or two Phi nodes
2283 PhiNode *phi = NULL;
2284 for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2285 x = feed->fast_out(i);
2286 // the correct Phi will be merging a Bot memory slice
2287 if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2288 phi = x->as_Phi();
2289 break;
2290 }
2291 }
2292 if (!phi) {
2293 return NULL;
2294 }
2295 // look for another merge below this phi
2296 feed = phi;
2297 } else {
2298 // couldn't find a merge
2299 return NULL;
2300 }
2301 }
2302
2303 // sanity check this feed turns up as the expected slice
2304 assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2305
2306 MemBarNode *trailing = NULL;
2307 // be sure we have a trailing membar the merge
2308 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2309 x = mm->fast_out(i);
2310 if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2311 trailing = x->as_MemBar();
2312 break;
2313 }
2314 }
2315
2316 return trailing;
2317 }
2318
2319 // trailing_to_card_mark
2320 //
2321 // graph traversal helper which detects extra, non-normal Mem feed
2322 // from a trailing volatile membar to a preceding card mark volatile
2323 // membar i.e. it identifies whether one of the three possible extra
2324 // GC post-write Mem flow subgraphs is present
2325 //
2326 // this predicate checks for the same flow as the previous predicate
2327 // but starting from the bottom rather than the top.
2328 //
2329 // if the configuration is present returns the card mark membar
2330 // otherwise NULL
2331 //
2332 // n.b. the supplied membar is expected to be a trailing
2333 // MemBarVolatile i.e. the caller must ensure the input node has the
2334 // correct opcode
2335
2336 MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2337 {
2338 assert(trailing->Opcode() == Op_MemBarVolatile,
2339 "expecting a volatile membar");
2340 assert(!is_card_mark_membar(trailing),
2341 "not expecting a card mark membar");
2342
2343 // the Mem feed to the membar should be a merge
2344 Node *x = trailing->in(TypeFunc::Memory);
2345 if (!x->is_MergeMem()) {
2346 return NULL;
2347 }
2348
2349 MergeMemNode *mm = x->as_MergeMem();
2350
2351 x = mm->in(Compile::AliasIdxBot);
2352 // with G1 we may possibly see a Phi or two before we see a Memory
2353 // Proj from the card mark membar
2354
2355 const int MAX_PHIS = 3; // max phis we will search through
2356 int phicount = 0; // current search count
2357
2358 bool retry_feed = !x->is_Proj();
2359
2360 while (retry_feed) {
2361 if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2362 PhiNode *phi = x->as_Phi();
2363 ProjNode *proj = NULL;
2364 PhiNode *nextphi = NULL;
2365 bool found_leading = false;
2366 for (uint i = 1; i < phi->req(); i++) {
2367 x = phi->in(i);
2368 if (x->is_Phi()) {
2369 nextphi = x->as_Phi();
2370 } else if (x->is_Proj()) {
2371 int opcode = x->in(0)->Opcode();
2372 if (opcode == Op_MemBarVolatile) {
2373 proj = x->as_Proj();
2374 } else if (opcode == Op_MemBarRelease ||
2375 opcode == Op_MemBarCPUOrder) {
2376 // probably a leading membar
2377 found_leading = true;
2378 }
2379 }
2380 }
2381 // if we found a correct looking proj then retry from there
2382 // otherwise we must see a leading and a phi or this the
2383 // wrong config
2384 if (proj != NULL) {
2385 x = proj;
2386 retry_feed = false;
2387 } else if (found_leading && nextphi != NULL) {
2388 // retry from this phi to check phi2
2389 x = nextphi;
2390 } else {
2391 // not what we were looking for
2392 return NULL;
2393 }
2394 } else {
2395 return NULL;
2396 }
2397 }
2398 // the proj has to come from the card mark membar
2399 x = x->in(0);
2400 if (!x->is_MemBar()) {
2401 return NULL;
2402 }
2403
2404 MemBarNode *card_mark_membar = x->as_MemBar();
2405
2406 if (!is_card_mark_membar(card_mark_membar)) {
2407 return NULL;
2408 }
2409
2410 return card_mark_membar;
2411 }
2412
2413 // trailing_to_leading
2414 //
2415 // graph traversal helper which checks the Mem flow up the graph
2416 // from a (non-card mark) trailing membar attempting to locate and
2417 // return an associated leading membar. it first looks for a
2418 // subgraph in the normal configuration (relying on helper
2419 // normal_to_leading). failing that it then looks for one of the
2420 // possible post-write card mark subgraphs linking the trailing node
2421 // to a the card mark membar (relying on helper
2422 // trailing_to_card_mark), and then checks that the card mark membar
2423 // is fed by a leading membar (once again relying on auxiliary
2424 // predicate normal_to_leading).
2425 //
2426 // if the configuration is valid returns the cpuorder member for
2427 // preference or when absent the release membar otherwise NULL.
2428 //
2429 // n.b. the input membar is expected to be either a volatile or
2430 // acquire membar but in the former case must *not* be a card mark
2431 // membar.
2432
2433 MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2434 {
2435 assert((trailing->Opcode() == Op_MemBarAcquire ||
2436 trailing->Opcode() == Op_MemBarVolatile),
2437 "expecting an acquire or volatile membar");
2438 assert((trailing->Opcode() != Op_MemBarVolatile ||
2439 !is_card_mark_membar(trailing)),
2440 "not expecting a card mark membar");
2441
2442 MemBarNode *leading = normal_to_leading(trailing);
2443
2444 if (leading) {
2445 return leading;
2446 }
2447
2448 // nothing more to do if this is an acquire
2449 if (trailing->Opcode() == Op_MemBarAcquire) {
2450 return NULL;
2451 }
2452
2453 MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2454
2455 if (!card_mark_membar) {
2456 return NULL;
2457 }
2458
2459 return normal_to_leading(card_mark_membar);
2460 }
2461
2462 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2463
2464 bool unnecessary_acquire(const Node *barrier)
2465 {
2466 assert(barrier->is_MemBar(), "expecting a membar");
2467
2468 if (UseBarriersForVolatile) {
2469 // we need to plant a dmb
2470 return false;
2471 }
2472
2473 // a volatile read derived from bytecode (or also from an inlined
2474 // SHA field read via LibraryCallKit::load_field_from_object)
2475 // manifests as a LoadX[mo_acquire] followed by an acquire membar
2476 // with a bogus read dependency on it's preceding load. so in those
2477 // cases we will find the load node at the PARMS offset of the
2478 // acquire membar. n.b. there may be an intervening DecodeN node.
2479 //
2480 // a volatile load derived from an inlined unsafe field access
2481 // manifests as a cpuorder membar with Ctl and Mem projections
2482 // feeding both an acquire membar and a LoadX[mo_acquire]. The
2483 // acquire then feeds another cpuorder membar via Ctl and Mem
2658 n->Opcode() == Op_MemBarRelease),
2659 "expecting a release membar");
2660
2661 if (UseBarriersForVolatile) {
2662 // we need to plant a dmb
2663 return false;
2664 }
2665
2666 // if there is a dependent CPUOrder barrier then use that as the
2667 // leading
2668
2669 MemBarNode *barrier = n->as_MemBar();
2670 // check for an intervening cpuorder membar
2671 MemBarNode *b = child_membar(barrier);
2672 if (b && b->Opcode() == Op_MemBarCPUOrder) {
2673 // ok, so start the check from the dependent cpuorder barrier
2674 barrier = b;
2675 }
2676
2677 // must start with a normal feed
2678 MemBarNode *child_barrier = leading_to_normal(barrier);
2679
2680 if (!child_barrier) {
2681 return false;
2682 }
2683
2684 if (!is_card_mark_membar(child_barrier)) {
2685 // this is the trailing membar and we are done
2686 return true;
2687 }
2688
2689 // must be sure this card mark feeds a trailing membar
2690 MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2691 return (trailing != NULL);
2692 }
2693
2694 bool unnecessary_volatile(const Node *n)
2695 {
2696 // assert n->is_MemBar();
2697 if (UseBarriersForVolatile) {
2698 // we need to plant a dmb
2699 return false;
2700 }
2701
2702 MemBarNode *mbvol = n->as_MemBar();
2703
2704 // first we check if this is part of a card mark. if so then we have
2705 // to generate a StoreLoad barrier
2706
2707 if (is_card_mark_membar(mbvol)) {
2708 return false;
2709 }
2710
2711 // ok, if it's not a card mark then we still need to check if it is
2712 // a trailing membar of a volatile put hgraph.
2713
2714 return (trailing_to_leading(mbvol) != NULL);
2715 }
2716
2717 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2718
2719 bool needs_releasing_store(const Node *n)
2720 {
2721 // assert n->is_Store();
2722 if (UseBarriersForVolatile) {
2723 // we use a normal store and dmb combination
2724 return false;
2725 }
2726
2727 StoreNode *st = n->as_Store();
2728
2729 // the store must be marked as releasing
2730 if (!st->is_release()) {
2731 return false;
2732 }
2742 ProjNode *proj = x->as_Proj();
2743
2744 x = proj->lookup(0);
2745
2746 if (!x || !x->is_MemBar()) {
2747 return false;
2748 }
2749
2750 MemBarNode *barrier = x->as_MemBar();
2751
2752 // if the barrier is a release membar or a cpuorder mmebar fed by a
2753 // release membar then we need to check whether that forms part of a
2754 // volatile put graph.
2755
2756 // reject invalid candidates
2757 if (!leading_membar(barrier)) {
2758 return false;
2759 }
2760
2761 // does this lead a normal subgraph?
2762 MemBarNode *mbvol = leading_to_normal(barrier);
2763
2764 if (!mbvol) {
2765 return false;
2766 }
2767
2768 // all done unless this is a card mark
2769 if (!is_card_mark_membar(mbvol)) {
2770 return true;
2771 }
2772
2773 // we found a card mark -- just make sure we have a trailing barrier
2774
2775 return (card_mark_to_trailing(mbvol) != NULL);
2776 }
2777
2778 // predicate controlling translation of CAS
2779 //
2780 // returns true if CAS needs to use an acquiring load otherwise false
2781
2782 bool needs_acquiring_load_exclusive(const Node *n)
2783 {
2784 assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2785 if (UseBarriersForVolatile) {
2786 return false;
2787 }
2788
2789 // CAS nodes only ought to turn up in inlined unsafe CAS operations
2790 #ifdef ASSERT
2791 LoadStoreNode *st = n->as_LoadStore();
2792
2793 // the store must be fed by a membar
2794
2795 Node *x = st->lookup(StoreNode::Memory);
2797 assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2798
2799 ProjNode *proj = x->as_Proj();
2800
2801 x = proj->lookup(0);
2802
2803 assert (x && x->is_MemBar(), "CAS not fed by membar!");
2804
2805 MemBarNode *barrier = x->as_MemBar();
2806
2807 // the barrier must be a cpuorder mmebar fed by a release membar
2808
2809 assert(barrier->Opcode() == Op_MemBarCPUOrder,
2810 "CAS not fed by cpuorder membar!");
2811
2812 MemBarNode *b = parent_membar(barrier);
2813 assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2814 "CAS not fed by cpuorder+release membar pair!");
2815
2816 // does this lead a normal subgraph?
2817 MemBarNode *mbar = leading_to_normal(barrier);
2818
2819 assert(mbar != NULL, "CAS not embedded in normal graph!");
2820
2821 assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2822 #endif // ASSERT
2823 // so we can just return true here
2824 return true;
2825 }
2826
2827 // predicate controlling translation of StoreCM
2828 //
2829 // returns true if a StoreStore must precede the card write otherwise
2830 // false
2831
2832 bool unnecessary_storestore(const Node *storecm)
2833 {
2834 assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM");
2835
2836 // we only ever need to generate a dmb ishst between an object put
2837 // and the associated card mark when we are using CMS without
2838 // conditional card marking
2839
2840 if (!UseConcMarkSweepGC || UseCondCardMark) {
2841 return true;
2842 }
2843
2844 // if we are implementing volatile puts using barriers then the
2845 // object put as an str so we must insert the dmb ishst
2846
2847 if (UseBarriersForVolatile) {
2848 return false;
2849 }
2850
2851 // we can omit the dmb ishst if this StoreCM is part of a volatile
2852 // put because in thta case the put will be implemented by stlr
2853 //
2854 // we need to check for a normal subgraph feeding this StoreCM.
2855 // that means the StoreCM must be fed Memory from a leading membar,
2856 // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2857 // leading membar must be part of a normal subgraph
2858
2859 Node *x = storecm->in(StoreNode::Memory);
2860
2861 if (!x->is_Proj()) {
2862 return false;
2863 }
2864
2865 x = x->in(0);
2866
2867 if (!x->is_MemBar()) {
2868 return false;
2869 }
2870
2871 MemBarNode *leading = x->as_MemBar();
2872
2873 // reject invalid candidates
2874 if (!leading_membar(leading)) {
2875 return false;
2876 }
2877
2878 // we can omit the StoreStore if it is the head of a normal subgraph
2879 return (leading_to_normal(leading) != NULL);
2880 }
2881
2882
2883 #define __ _masm.
2884
2885 // advance declarations for helper functions to convert register
2886 // indices to register objects
2887
2888 // the ad file has to provide implementations of certain methods
2889 // expected by the generic code
2890 //
2891 // REQUIRED FUNCTIONALITY
2892
2893 //=============================================================================
2894
2895 // !!!!! Special hack to get all types of calls to specify the byte offset
2896 // from the start of the call to the point where the return address
2897 // will point.
2898
2899 int MachCallStaticJavaNode::ret_addr_offset()
|
1024
1025 static uint size_exception_handler() {
1026 return MacroAssembler::far_branch_size();
1027 }
1028
1029 static uint size_deopt_handler() {
1030 // count one adr and one far branch instruction
1031 return 4 * NativeInstruction::instruction_size;
1032 }
1033 };
1034
1035 // graph traversal helpers
1036
1037 MemBarNode *parent_membar(const Node *n);
1038 MemBarNode *child_membar(const MemBarNode *n);
1039 bool leading_membar(const MemBarNode *barrier);
1040
1041 bool is_card_mark_membar(const MemBarNode *barrier);
1042 bool is_CAS(int opcode);
1043
1044 MemBarNode *leading_to_trailing(MemBarNode *leading);
1045 MemBarNode *card_mark_to_leading(const MemBarNode *barrier);
1046 MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1047
1048 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1049
1050 bool unnecessary_acquire(const Node *barrier);
1051 bool needs_acquiring_load(const Node *load);
1052
1053 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1054
1055 bool unnecessary_release(const Node *barrier);
1056 bool unnecessary_volatile(const Node *barrier);
1057 bool needs_releasing_store(const Node *store);
1058
1059 // predicate controlling translation of CompareAndSwapX
1060 bool needs_acquiring_load_exclusive(const Node *load);
1061
1062 // predicate controlling translation of StoreCM
1063 bool unnecessary_storestore(const Node *storecm);
1064 %}
1065
1403
1404 return false;
1405 }
1406
1407
1408 // 3) helper predicates to traverse volatile put or CAS graphs which
1409 // may contain GC barrier subgraphs
1410
1411 // Preamble
1412 // --------
1413 //
1414 // for volatile writes we can omit generating barriers and employ a
1415 // releasing store when we see a node sequence sequence with a
1416 // leading MemBarRelease and a trailing MemBarVolatile as follows
1417 //
1418 // MemBarRelease
1419 // { || } -- optional
1420 // {MemBarCPUOrder}
1421 // || \\
1422 // || StoreX[mo_release]
1423 // | \ Bot / ???
1424 // | MergeMem
1425 // | /
1426 // MemBarVolatile
1427 //
1428 // where
1429 // || and \\ represent Ctl and Mem feeds via Proj nodes
1430 // | \ and / indicate further routing of the Ctl and Mem feeds
1431 //
1432 // Note that the memory feed from the CPUOrder membar to the
1433 // MergeMem node is an AliasIdxBot slice while the feed from the
1434 // StoreX is for a slice determined by the type of value being
1435 // written.
1436 //
1437 // the diagram above shows the graph we see for non-object stores.
1438 // for a volatile Object store (StoreN/P) we may see other nodes
1439 // below the leading membar because of the need for a GC pre- or
1440 // post-write barrier.
1441 //
1442 // with most GC configurations we with see this simple variant which
1443 // includes a post-write barrier card mark.
1444 //
1445 // MemBarRelease______________________________
1446 // || \\ Ctl \ \\
1447 // || StoreN/P[mo_release] CastP2X StoreB/CM
1448 // | \ Bot / oop . . . /
1449 // | MergeMem
1450 // | /
1451 // || /
1452 // MemBarVolatile
1453 //
1454 // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1455 // the object address to an int used to compute the card offset) and
1456 // Ctl+Mem to a StoreB node (which does the actual card mark).
1457 //
1458 // n.b. a StoreCM node is only ever used when CMS (with or without
1459 // CondCardMark) or G1 is configured. This abstract instruction
1460 // differs from a normal card mark write (StoreB) because it implies
1461 // a requirement to order visibility of the card mark (StoreCM)
1462 // after that of the object put (StoreP/N) using a StoreStore memory
1463 // barrier. Note that this is /not/ a requirement to order the
1464 // instructions in the generated code (that is already guaranteed by
1465 // the order of memory dependencies). Rather it is a requirement to
1466 // ensure visibility order which only applies on architectures like
1467 // AArch64 which do not implement TSO. This ordering is required for
1468 // both non-volatile and volatile puts.
1469 //
1470 // That implies that we need to translate a StoreCM using the
1471 // sequence
1472 //
1473 // dmb ishst
1474 // stlrb
1475 //
1476 // This dmb cannot be omitted even when the associated StoreX or
1477 // CompareAndSwapX is implemented using stlr. However, as described
1478 // below there are circumstances where a specific GC configuration
1479 // requires a stronger barrier in which case it can be omitted.
1480 //
1481 // With the Serial or Parallel GC using +CondCardMark the card mark
1482 // is performed conditionally on it currently being unmarked in
1483 // which case the volatile put graph looks slightly different
1484 //
1485 // MemBarRelease____________________________________________
1486 // || \\ Ctl \ Ctl \ \\ Mem \
1487 // || StoreN/P[mo_release] CastP2X If LoadB |
1488 // | \ Bot / oop \ |
1489 // | MergeMem . . . StoreB
1490 // | / /
1491 // || /
1492 // MemBarVolatile
1493 //
1494 // It is worth noting at this stage that all the above
1495 // configurations can be uniquely identified by checking that the
1496 // memory flow includes the following subgraph:
1497 //
1498 // MemBarRelease
1499 // {MemBarCPUOrder}
1500 // | \ . . .
1501 // | StoreX[mo_release] . . .
1502 // Bot | / oop
1503 // MergeMem
1504 // |
1505 // MemBarVolatile
1506 //
1507 // This is referred to as a *normal* volatile store subgraph. It can
1508 // easily be detected starting from any candidate MemBarRelease,
1509 // StoreX[mo_release] or MemBarVolatile node.
1510 //
1511 // A small variation on this normal case occurs for an unsafe CAS
1512 // operation. The basic memory flow subgraph for a non-object CAS is
1513 // as follows
1514 //
1515 // MemBarRelease
1516 // ||
1517 // MemBarCPUOrder
1518 // | \\ . . .
1519 // | CompareAndSwapX
1520 // | |
1521 // Bot | SCMemProj
1522 // \ / Bot
1523 // MergeMem
1524 // /
1525 // MemBarCPUOrder
1526 // ||
1527 // MemBarAcquire
1528 //
1529 // The same basic variations on this arrangement (mutatis mutandis)
1530 // occur when a card mark is introduced. i.e. the CPUOrder MemBar
1531 // feeds the extra CastP2X, LoadB etc nodes but the above memory
1532 // flow subgraph is still present.
1533 //
1534 // This is referred to as a *normal* CAS subgraph. It can easily be
1535 // detected starting from any candidate MemBarRelease,
1536 // StoreX[mo_release] or MemBarAcquire node.
1537 //
1538 // The code below uses two helper predicates, leading_to_trailing
1539 // and trailing_to_leading to identify these normal graphs, one
1540 // validating the layout starting from the top membar and searching
1541 // down and the other validating the layout starting from the lower
1542 // membar and searching up.
1543 //
1544 // There are two special case GC configurations when the simple
1545 // normal graphs above may not be generated: when using G1 (which
1546 // always employs a conditional card mark); and when using CMS with
1547 // conditional card marking (+CondCardMark) configured. These GCs
1548 // are both concurrent rather than stop-the world GCs. So they
1549 // introduce extra Ctl+Mem flow into the graph between the leading
1550 // and trailing membar nodes, in particular enforcing stronger
1551 // memory serialisation beween the object put and the corresponding
1552 // conditional card mark. CMS employs a post-write GC barrier while
1553 // G1 employs both a pre- and post-write GC barrier.
1554 //
1555 // The post-write barrier subgraph for these configurations includes
1556 // a MemBarVolatile node -- referred to as a card mark membar --
1557 // which is needed to order the card write (StoreCM) operation in
1558 // the barrier, the preceding StoreX (or CompareAndSwapX) and Store
1559 // operations performed by GC threads i.e. a card mark membar
1560 // constitutes a StoreLoad barrier hence must be translated to a dmb
1561 // ish (whether or not it sits inside a volatile store sequence).
1562 //
1563 // Of course, the use of the dmb ish for the card mark membar also
1564 // implies theat the StoreCM which follows can omit the dmb ishst
1565 // instruction. The necessary visibility ordering will already be
1566 // guaranteed by the dmb ish. In sum, the dmb ishst instruction only
1567 // needs to be generated for as part of the StoreCM sequence with GC
1568 // configuration +CMS -CondCardMark.
1569 //
1570 // Of course all these extra barrier nodes may well be absent --
1571 // they are only inserted for object puts. Their potential presence
1572 // significantly complicates the task of identifying whether a
1573 // MemBarRelease, StoreX[mo_release], MemBarVolatile or
1574 // MemBarAcquire forms part of a volatile put or CAS when using
1575 // these GC configurations (see below) and also complicates the
1576 // decision as to how to translate a MemBarVolatile and StoreCM.
1577 //
1578 // So, thjis means that a card mark MemBarVolatile occurring in the
1579 // post-barrier graph it needs to be distinguished from a normal
1580 // trailing MemBarVolatile. Resolving this is straightforward: a
1581 // card mark MemBarVolatile always projects a Mem feed to a StoreCM
1582 // node and that is a unique marker
1583 //
1584 // MemBarVolatile (card mark)
1585 // C | \ . . .
1586 // | StoreCM . . .
1587 // . . .
1588 //
1589 // Returning to the task of translating the object put and the
1590 // leading/trailing membar nodes: what do the node graphs look like
1591 // for these 2 special cases? and how can we determine the status of
1592 // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both
1593 // normal and non-normal cases?
1594 //
1595 // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1596 // which selects conditonal execution based on the value loaded
1597 // (LoadB) from the card. Ctl and Mem are fed to the If via an
1598 // intervening StoreLoad barrier (MemBarVolatile).
1599 //
1600 // So, with CMS we may see a node graph for a volatile object store
1601 // which looks like this
1602 //
1603 // MemBarRelease
1604 // MemBarCPUOrder_(leading)____________________
1605 // C | | M \ \\ M | C \
1606 // | | \ StoreN/P[mo_release] | CastP2X
1607 // | | Bot \ / oop \ |
1608 // | | MergeMem \ /
1609 // | | / | /
1610 // MemBarVolatile (card mark) | /
1611 // C | || M | | /
1612 // | LoadB | Bot oop | / Bot
1613 // | | | / /
1614 // | Cmp |\ / /
1615 // | / | \ / /
1616 // If | \ / /
1617 // | \ | \ / /
1618 // IfFalse IfTrue | \ / /
1619 // \ / \ | | / /
1620 // \ / StoreCM | / /
1621 // \ / \ / / /
1622 // Region Phi / /
1623 // | \ Raw | / /
1624 // | . . . | / /
1625 // | MergeMem
1626 // | |
1627 // MemBarVolatile (trailing)
1628 //
1629 // Notice that there are two MergeMem nodes below the leading
1630 // membar. The first MergeMem merges the AliasIdxBot Mem slice from
1631 // the leading membar and the oopptr Mem slice from the Store into
1632 // the card mark membar. The trailing MergeMem merges the
1633 // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw
1634 // slice from the StoreCM and an oop slice from the StoreN/P node
1635 // into the trailing membar (n.b. the raw slice proceeds via a Phi
1636 // associated with the If region).
1637 //
1638 // So, in the case of CMS + CondCardMark the volatile object store
1639 // graph still includes a normal volatile store subgraph from the
1640 // leading membar to the trailing membar. However, it also contains
1641 // the same shape memory flow to the card mark membar. The two flows
1642 // can be distinguished by testing whether or not the downstream
1643 // membar is a card mark membar.
1644 //
1645 // The graph for a CAS also varies with CMS + CondCardMark, in
1646 // particular employing a control feed from the CompareAndSwapX node
1647 // through a CmpI and If to the card mark membar and StoreCM which
1648 // updates the associated card. This avoids executing the card mark
1649 // if the CAS fails. However, it can be seen from the diagram below
1650 // that the presence of the barrier does not alter the normal CAS
1651 // memory subgraph where the leading membar feeds a CompareAndSwapX,
1652 // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and
1653 // MemBarAcquire pair.
1654 //
1655 // MemBarRelease
1656 // MemBarCPUOrder__(leading)_______________________
1657 // C / M | \\ C \
1658 // . . . | Bot CompareAndSwapN/P CastP2X
1659 // | C / M |
1660 // | CmpI |
1661 // | / |
1662 // | . . . |
1663 // | IfTrue |
1664 // | / |
1665 // MemBarVolatile (card mark) |
1666 // C | || M | |
1667 // | LoadB | Bot ______/|
1668 // | | | / |
1669 // | Cmp | / SCMemProj
1670 // | / | / |
1671 // If | / /
1672 // | \ | / / Bot
1673 // IfFalse IfTrue | / /
1674 // | / \ / / prec /
1675 // . . . | / StoreCM /
1676 // \ | / | raw /
1677 // Region . . . /
1678 // | \ /
1679 // | . . . \ / Bot
1680 // | MergeMem
1681 // | /
1682 // MemBarCPUOrder
1683 // MemBarAcquire (trailing)
1684 //
1685 // This has a slightly different memory subgraph to the one seen
1686 // previously but the core of it has a similar memory flow to the
1687 // CAS normal subgraph:
1688 //
1689 // MemBarRelease
1690 // MemBarCPUOrder____
1691 // | \ . . .
1692 // | CompareAndSwapX . . .
1693 // | C / M |
1694 // | CmpI |
1695 // | / |
1696 // | . . /
1697 // Bot | IfTrue /
1698 // | / /
1699 // MemBarVolatile /
1700 // | ... /
1701 // StoreCM ... /
1702 // | /
1703 // . . . SCMemProj
1704 // Raw \ / Bot
1705 // MergeMem
1706 // |
1707 // MemBarCPUOrder
1708 // MemBarAcquire
1709 //
1710 // The G1 graph for a volatile object put is a lot more complicated.
1711 // Nodes inserted on behalf of G1 may comprise: a pre-write graph
1712 // which adds the old value to the SATB queue; the releasing store
1713 // itself; and, finally, a post-write graph which performs a card
1714 // mark.
1715 //
1716 // The pre-write graph may be omitted, but only when the put is
1717 // writing to a newly allocated (young gen) object and then only if
1718 // there is a direct memory chain to the Initialize node for the
1719 // object allocation. This will not happen for a volatile put since
1720 // any memory chain passes through the leading membar.
1721 //
1722 // The pre-write graph includes a series of 3 If tests. The outermost
1723 // If tests whether SATB is enabled (no else case). The next If tests
1724 // whether the old value is non-NULL (no else case). The third tests
1725 // whether the SATB queue index is > 0, if so updating the queue. The
1726 // else case for this third If calls out to the runtime to allocate a
1727 // new queue buffer.
1728 //
1729 // So with G1 the pre-write and releasing store subgraph looks like
1730 // this (the nested Ifs are omitted).
1731 //
1732 // MemBarRelease (leading)____________
1733 // C | || M \ M \ M \ M \ . . .
1734 // | LoadB \ LoadL LoadN \
1735 // | / \ \
1736 // If |\ \
1737 // | \ | \ \
1738 // IfFalse IfTrue | \ \
1739 // | | | \ |
1740 // | If | /\ |
1741 // | | \ |
1742 // | \ |
1743 // | . . . \ |
1744 // | / | / | |
1745 // Region Phi[M] | |
1746 // | \ | | |
1747 // | \_____ | ___ | |
1748 // C | C \ | C \ M | |
1749 // | CastP2X | StoreN/P[mo_release] |
1750 // | | | |
1751 // C | M | M | M |
1752 // \ | Raw | oop / Bot
1753 // . . .
1754 // (post write subtree elided)
1755 // . . .
1756 // C \ M /
1757 // MemBarVolatile (trailing)
1758 //
1759 // Note that the three memory feeds into the post-write tree are an
1760 // AliasRawIdx slice associated with the writes in the pre-write
1761 // tree, an oop type slice from the StoreX specific to the type of
1762 // the volatile field and the AliasBotIdx slice emanating from the
1763 // leading membar.
1764 //
1765 // n.b. the LoadB in this subgraph is not the card read -- it's a
1766 // read of the SATB queue active flag.
1767 //
1768 // The CAS graph is once again a variant of the above with a
1769 // CompareAndSwapX node and SCMemProj in place of the StoreX. The
1770 // value from the CompareAndSwapX node is fed into the post-write
1771 // graph aling with the AliasIdxRaw feed from the pre-barrier and
1772 // the AliasIdxBot feeds from the leading membar and the ScMemProj.
1773 //
1774 // MemBarRelease (leading)____________
1775 // C | || M \ M \ M \ M \ . . .
1776 // | LoadB \ LoadL LoadN \
1777 // | / \ \
1778 // If |\ \
1779 // | \ | \ \
1780 // IfFalse IfTrue | \ \
1781 // | | | \ \
1782 // | If | \ |
1783 // | | \ |
1784 // | \ |
1785 // | . . . \ |
1786 // | / | / \ |
1787 // Region Phi[M] \ |
1788 // | \ | \ |
1789 // | \_____ | | |
1790 // C | C \ | | |
1791 // | CastP2X | CompareAndSwapX |
1792 // | | res | | |
1793 // C | M | | SCMemProj M |
1794 // \ | Raw | | Bot / Bot
1795 // . . .
1796 // (post write subtree elided)
1797 // . . .
1798 // C \ M /
1799 // MemBarVolatile (trailing)
1800 //
1801 // The G1 post-write subtree is also optional, this time when the
1802 // new value being written is either null or can be identified as a
1803 // newly allocated (young gen) object with no intervening control
1804 // flow. The latter cannot happen but the former may, in which case
1805 // the card mark membar is omitted and the memory feeds from the
1806 // leading membar and the SToreN/P are merged direct into the
1807 // trailing membar as per the normal subgraph. So, the only special
1808 // case which arises is when the post-write subgraph is generated.
1809 //
1810 // The kernel of the post-write G1 subgraph is the card mark itself
1811 // which includes a card mark memory barrier (MemBarVolatile), a
1812 // card test (LoadB), and a conditional update (If feeding a
1813 // StoreCM). These nodes are surrounded by a series of nested Ifs
1814 // which try to avoid doing the card mark. The top level If skips if
1815 // the object reference does not cross regions (i.e. it tests if
1816 // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1817 // need not be recorded. The next If, which skips on a NULL value,
1818 // may be absent (it is not generated if the type of value is >=
1819 // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1820 // checking if card_val != young). n.b. although this test requires
1821 // a pre-read of the card it can safely be done before the StoreLoad
1822 // barrier. However that does not bypass the need to reread the card
1823 // after the barrier.
1824 //
1825 // (pre-write subtree elided)
1826 // . . . . . . . . . . . .
1827 // C | M | M | M |
1828 // Region Phi[M] StoreN |
1829 // | Raw | oop | Bot |
1830 // / \_______ |\ |\ |\
1831 // C / C \ . . . | \ | \ | \
1832 // If CastP2X . . . | \ | \ | \
1833 // / \ | \ | \ | \
1834 // / \ | \ | \ | \
1835 // IfFalse IfTrue | | | \
1836 // | | \ | / |
1837 // | If \ | \ / \ |
1838 // | / \ \ | / \ |
1839 // | / \ \ | / \ | |
1840 // | IfFalse IfTrue MergeMem \ | |
1841 // | . . . / \ | \ | |
1842 // | / \ | | | |
1843 // | IfFalse IfTrue | | | |
1844 // | . . . | | | | |
1845 // | If / | | |
1846 // | / \ / | | |
1847 // | / \ / | | |
1848 // | IfFalse IfTrue / | | |
1849 // | . . . | / | | |
1850 // | \ / | | |
1851 // | \ / | | |
1852 // | MemBarVolatile__(card mark ) | | |
1853 // | || C | \ | | |
1854 // | LoadB If | / | |
1855 // | / \ Raw | / / /
1856 // | . . . | / / /
1857 // | \ | / / /
1858 // | StoreCM / / /
1859 // | | / / /
1860 // | . . . / /
1861 // | / /
1862 // | . . . / /
1863 // | | | / / /
1864 // | | Phi[M] / / /
1865 // | | | / / /
1866 // | | | / / /
1867 // | Region . . . Phi[M] / /
1868 // | | | / /
1869 // \ | | / /
1870 // \ | . . . | / /
1871 // \ | | / /
1872 // Region Phi[M] / /
1873 // | \ / /
1874 // \ MergeMem
1875 // \ /
1876 // MemBarVolatile
1877 //
1878 // As with CMS + CondCardMark the first MergeMem merges the
1879 // AliasIdxBot Mem slice from the leading membar and the oopptr Mem
1880 // slice from the Store into the card mark membar. However, in this
1881 // case it may also merge an AliasRawIdx mem slice from the pre
1882 // barrier write.
1883 //
1884 // The trailing MergeMem merges an AliasIdxBot Mem slice from the
1885 // leading membar with an oop slice from the StoreN and an
1886 // AliasRawIdx slice from the post barrier writes. In this case the
1887 // AliasIdxRaw Mem slice is merged through a series of Phi nodes
1888 // which combine feeds from the If regions in the post barrier
1889 // subgraph.
1890 //
1891 // So, for G1 the same characteristic subgraph arises as for CMS +
1892 // CondCardMark. There is a normal subgraph feeding the card mark
1893 // membar and a normal subgraph feeding the trailing membar.
1894 //
1895 // The CAS graph when using G1GC also includes an optional
1896 // post-write subgraph. It is very similar to the above graph except
1897 // for a few details.
1898 //
1899 // - The control flow is gated by an additonal If which tests the
1900 // result from the CompareAndSwapX node
1901 //
1902 // - The MergeMem which feeds the card mark membar only merges the
1903 // AliasIdxBot slice from the leading membar and the AliasIdxRaw
1904 // slice from the pre-barrier. It does not merge the SCMemProj
1905 // AliasIdxBot slice. So, this subgraph does not look like the
1906 // normal CAS subgraph.
1907 //
1908 // - The MergeMem which feeds the trailing membar merges the
1909 // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice
1910 // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it
1911 // has two AliasIdxBot input slices. However, this subgraph does
1912 // still look like the normal CAS subgraph.
1913 //
1914 // So, the upshot is:
1915 //
1916 // In all cases a volatile put graph will include a *normal*
1917 // volatile store subgraph betwen the leading membar and the
1918 // trailing membar. It may also include a normal volatile store
1919 // subgraph betwen the leading membar and the card mark membar.
1920 //
1921 // In all cases a CAS graph will contain a unique normal CAS graph
1922 // feeding the trailing membar.
1923 //
1924 // In all cases where there is a card mark membar (either as part of
1925 // a volatile object put or CAS) it will be fed by a MergeMem whose
1926 // AliasIdxBot slice feed will be a leading membar.
1927 //
1928 // The predicates controlling generation of instructions for store
1929 // and barrier nodes employ a few simple helper functions (described
1930 // below) which identify the presence or absence of all these
1931 // subgraph configurations and provide a means of traversing from
1932 // one node in the subgraph to another.
1933
1934 // is_CAS(int opcode)
1935 //
1936 // return true if opcode is one of the possible CompareAndSwapX
1937 // values otherwise false.
1938
1939 bool is_CAS(int opcode)
1940 {
1941 return (opcode == Op_CompareAndSwapI ||
1942 opcode == Op_CompareAndSwapL ||
1943 opcode == Op_CompareAndSwapN ||
1944 opcode == Op_CompareAndSwapP);
1945 }
1946
1947 // leading_to_trailing
1948 //
1949 //graph traversal helper which detects the normal case Mem feed from
1950 // a release membar (or, optionally, its cpuorder child) to a
1951 // dependent volatile membar i.e. it ensures that one or other of
1952 // the following Mem flow subgraph is present.
1953 //
1954 // MemBarRelease {leading}
1955 // {MemBarCPUOrder} {optional}
1956 // Bot | \ . . .
1957 // | StoreN/P[mo_release] . . .
1958 // | /
1959 // MergeMem
1960 // |
1961 // MemBarVolatile {not card mark}
1962 //
1963 // MemBarRelease {leading}
1964 // {MemBarCPUOrder} {optional}
1965 // | \ . . .
1966 // | CompareAndSwapX . . .
1967 // |
1968 // . . . SCMemProj
1969 // \ |
1970 // | MergeMem
1971 // | /
1972 // MemBarCPUOrder
1973 // MemBarAcquire {trailing}
1974 //
1975 // the predicate needs to be capable of distinguishing the following
1976 // volatile put graph which may arises when a GC post barrier
1977 // inserts a card mark membar
1978 //
1979 // MemBarRelease {leading}
1980 // {MemBarCPUOrder}__
1981 // Bot | \ \
1982 // | StoreN/P \
1983 // | / \ |
1984 // MergeMem \ |
1985 // | \ |
1986 // MemBarVolatile \ |
1987 // {card mark} \ |
1988 // MergeMem
1989 // |
1990 // {not card mark} MemBarVolatile
1991 //
1992 // if the correct configuration is present returns the trailing
1993 // membar otherwise NULL.
1994 //
1995 // the input membar is expected to be either a cpuorder membar or a
1996 // release membar. in the latter case it should not have a cpu membar
1997 // child.
1998 //
1999 // the returned value may be a card mark or trailing membar
2000 //
2001
2002 MemBarNode *leading_to_trailing(MemBarNode *leading)
2003 {
2004 assert((leading->Opcode() == Op_MemBarRelease ||
2005 leading->Opcode() == Op_MemBarCPUOrder),
2006 "expecting a volatile or cpuroder membar!");
2007
2008 // check the mem flow
2009 ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2010
2011 if (!mem) {
2012 return NULL;
2013 }
2014
2015 Node *x = NULL;
2016 StoreNode * st = NULL;
2017 LoadStoreNode *cas = NULL;
2018 MergeMemNode *mm = NULL;
2019 MergeMemNode *mm2 = NULL;
2020
2021 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2022 x = mem->fast_out(i);
2023 if (x->is_MergeMem()) {
2024 if (mm != NULL) {
2025 if (mm2 != NULL) {
2026 // should not see more than 2 merge mems
2027 return NULL;
2028 } else {
2029 mm2 = x->as_MergeMem();
2030 }
2031 } else {
2032 mm = x->as_MergeMem();
2033 }
2034 } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2035 // two releasing stores/CAS nodes is one too many
2036 if (st != NULL || cas != NULL) {
2037 return NULL;
2038 }
2039 st = x->as_Store();
2040 } else if (is_CAS(x->Opcode())) {
2041 if (st != NULL || cas != NULL) {
2042 return NULL;
2043 }
2044 cas = x->as_LoadStore();
2045 }
2046 }
2047
2048 // must have a store or a cas
2049 if (!st && !cas) {
2050 return NULL;
2051 }
2052
2053 // must have at least one merge if we also have st
2054 if (st && !mm) {
2055 return NULL;
2056 }
2057
2058 if (cas) {
2059 Node *y = NULL;
2060 // look for an SCMemProj
2061 for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2062 x = cas->fast_out(i);
2063 if (x->is_Proj()) {
2064 y = x;
2065 break;
2066 }
2067 }
2068 if (y == NULL) {
2069 return NULL;
2070 }
2071 // the proj must feed a MergeMem
2072 for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2073 x = y->fast_out(i);
2074 if (x->is_MergeMem()) {
2075 mm = x->as_MergeMem();
2076 break;
2077 }
2078 }
2079 if (mm == NULL) {
2080 return NULL;
2081 }
2082 MemBarNode *mbar = NULL;
2083 // ensure the merge feeds a trailing membar cpuorder + acquire pair
2084 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2085 x = mm->fast_out(i);
2086 if (x->is_MemBar()) {
2087 int opcode = x->Opcode();
2088 if (opcode == Op_MemBarCPUOrder) {
2089 MemBarNode *z = x->as_MemBar();
2090 z = child_membar(z);
2091 if (z != NULL && z->Opcode() == Op_MemBarAcquire) {
2092 mbar = z;
2093 }
2094 }
2095 break;
2096 }
2097 }
2098 return mbar;
2099 } else {
2100 Node *y = NULL;
2101 // ensure the store feeds the first mergemem;
2102 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2103 if (st->fast_out(i) == mm) {
2104 y = st;
2105 break;
2106 }
2107 }
2108 if (y == NULL) {
2109 return NULL;
2110 }
2111 if (mm2 != NULL) {
2112 // ensure the store feeds the second mergemem;
2113 y = NULL;
2114 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2115 if (st->fast_out(i) == mm2) {
2116 y = st;
2117 }
2118 }
2119 if (y == NULL) {
2120 return NULL;
2121 }
2122 }
2123
2124 MemBarNode *mbar = NULL;
2125 // ensure the first mergemem feeds a volatile membar
2126 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2127 x = mm->fast_out(i);
2128 if (x->is_MemBar()) {
2129 int opcode = x->Opcode();
2130 if (opcode == Op_MemBarVolatile) {
2131 mbar = x->as_MemBar();
2132 }
2133 break;
2134 }
2135 }
2136 if (mm2 == NULL) {
2137 // this is our only option for a trailing membar
2138 return mbar;
2139 }
2140 // ensure the second mergemem feeds a volatile membar
2141 MemBarNode *mbar2 = NULL;
2142 for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) {
2143 x = mm2->fast_out(i);
2144 if (x->is_MemBar()) {
2145 int opcode = x->Opcode();
2146 if (opcode == Op_MemBarVolatile) {
2147 mbar2 = x->as_MemBar();
2148 }
2149 break;
2150 }
2151 }
2152 // if we have two merge mems we must have two volatile membars
2153 if (mbar == NULL || mbar2 == NULL) {
2154 return NULL;
2155 }
2156 // return the trailing membar
2157 if (is_card_mark_membar(mbar2)) {
2158 return mbar;
2159 } else {
2160 if (is_card_mark_membar(mbar)) {
2161 return mbar2;
2162 } else {
2163 return NULL;
2164 }
2165 }
2166 }
2167 }
2168
2169 // trailing_to_leading
2170 //
2171 // graph traversal helper which detects the normal case Mem feed
2172 // from a trailing membar to a preceding release membar (optionally
2173 // its cpuorder child) i.e. it ensures that one or other of the
2174 // following Mem flow subgraphs is present.
2175 //
2176 // MemBarRelease {leading}
2177 // MemBarCPUOrder {optional}
2178 // | Bot | \ . . .
2179 // | | StoreN/P[mo_release] . . .
2180 // | | /
2181 // | MergeMem
2182 // | |
2183 // MemBarVolatile {not card mark}
2184 //
2185 // MemBarRelease {leading}
2186 // MemBarCPUOrder {optional}
2187 // | \ . . .
2188 // | CompareAndSwapX . . .
2189 // |
2190 // . . . SCMemProj
2191 // \ |
2192 // | MergeMem
2193 // | |
2194 // MemBarCPUOrder
2195 // MemBarAcquire {trailing}
2196 //
2197 // this predicate checks for the same flow as the previous predicate
2198 // but starting from the bottom rather than the top.
2199 //
2200 // if the configuration is present returns the cpuorder member for
2201 // preference or when absent the release membar otherwise NULL.
2202 //
2203 // n.b. the input membar is expected to be a MemBarVolatile or
2204 // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card
2205 // mark membar.
2206
2207 MemBarNode *trailing_to_leading(const MemBarNode *barrier)
2208 {
2209 // input must be a volatile membar
2210 assert((barrier->Opcode() == Op_MemBarVolatile ||
2211 barrier->Opcode() == Op_MemBarAcquire),
2212 "expecting a volatile or an acquire membar");
2213
2214 assert((barrier->Opcode() != Op_MemBarVolatile) ||
2215 !is_card_mark_membar(barrier),
2216 "not expecting a card mark membar");
2217 Node *x;
2218 bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2219
2220 // if we have an acquire membar then it must be fed via a CPUOrder
2221 // membar
2222
2223 if (is_cas) {
2224 // skip to parent barrier which must be a cpuorder
2225 x = parent_membar(barrier);
2226 if (x->Opcode() != Op_MemBarCPUOrder)
2227 return NULL;
2228 } else {
2229 // start from the supplied barrier
2230 x = (Node *)barrier;
2231 }
2232
2233 // the Mem feed to the membar should be a merge
2234 x = x ->in(TypeFunc::Memory);
2235 if (!x->is_MergeMem())
2236 return NULL;
2309 if (st == NULL) {
2310 // nothing more to check
2311 return leading;
2312 } else {
2313 // we should not have a store if we started from an acquire
2314 if (is_cas) {
2315 return NULL;
2316 }
2317
2318 // the store should feed the merge we used to get here
2319 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2320 if (st->fast_out(i) == mm) {
2321 return leading;
2322 }
2323 }
2324 }
2325
2326 return NULL;
2327 }
2328
2329 // card_mark_to_leading
2330 //
2331 // graph traversal helper which traverses from a card mark volatile
2332 // membar to a leading membar i.e. it ensures that the following Mem
2333 // flow subgraph is present.
2334 //
2335 // MemBarRelease {leading}
2336 // {MemBarCPUOrder} {optional}
2337 // | . . .
2338 // Bot | /
2339 // MergeMem
2340 // |
2341 // MemBarVolatile (card mark)
2342 // | \
2343 // . . . StoreCM
2344 //
2345 // if the configuration is present returns the cpuorder member for
2346 // preference or when absent the release membar otherwise NULL.
2347 //
2348 // n.b. the input membar is expected to be a MemBarVolatile amd must
2349 // be a card mark membar.
2350
2351 MemBarNode *card_mark_to_leading(const MemBarNode *barrier)
2352 {
2353 // input must be a card mark volatile membar
2354 assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2355
2356 // the Mem feed to the membar should be a merge
2357 Node *x = barrier->in(TypeFunc::Memory);
2358 if (!x->is_MergeMem()) {
2359 return NULL;
2360 }
2361
2362 MergeMemNode *mm = x->as_MergeMem();
2363
2364 x = mm->in(Compile::AliasIdxBot);
2365
2366 if (!x->is_MemBar()) {
2367 return NULL;
2368 }
2369
2370 MemBarNode *leading = x->as_MemBar();
2371
2372 if (leading_membar(leading)) {
2373 return leading;
2374 }
2375
2376 return NULL;
2377 }
2378
2379 bool unnecessary_acquire(const Node *barrier)
2380 {
2381 assert(barrier->is_MemBar(), "expecting a membar");
2382
2383 if (UseBarriersForVolatile) {
2384 // we need to plant a dmb
2385 return false;
2386 }
2387
2388 // a volatile read derived from bytecode (or also from an inlined
2389 // SHA field read via LibraryCallKit::load_field_from_object)
2390 // manifests as a LoadX[mo_acquire] followed by an acquire membar
2391 // with a bogus read dependency on it's preceding load. so in those
2392 // cases we will find the load node at the PARMS offset of the
2393 // acquire membar. n.b. there may be an intervening DecodeN node.
2394 //
2395 // a volatile load derived from an inlined unsafe field access
2396 // manifests as a cpuorder membar with Ctl and Mem projections
2397 // feeding both an acquire membar and a LoadX[mo_acquire]. The
2398 // acquire then feeds another cpuorder membar via Ctl and Mem
2573 n->Opcode() == Op_MemBarRelease),
2574 "expecting a release membar");
2575
2576 if (UseBarriersForVolatile) {
2577 // we need to plant a dmb
2578 return false;
2579 }
2580
2581 // if there is a dependent CPUOrder barrier then use that as the
2582 // leading
2583
2584 MemBarNode *barrier = n->as_MemBar();
2585 // check for an intervening cpuorder membar
2586 MemBarNode *b = child_membar(barrier);
2587 if (b && b->Opcode() == Op_MemBarCPUOrder) {
2588 // ok, so start the check from the dependent cpuorder barrier
2589 barrier = b;
2590 }
2591
2592 // must start with a normal feed
2593 MemBarNode *trailing = leading_to_trailing(barrier);
2594
2595 return (trailing != NULL);
2596 }
2597
2598 bool unnecessary_volatile(const Node *n)
2599 {
2600 // assert n->is_MemBar();
2601 if (UseBarriersForVolatile) {
2602 // we need to plant a dmb
2603 return false;
2604 }
2605
2606 MemBarNode *mbvol = n->as_MemBar();
2607
2608 // first we check if this is part of a card mark. if so then we have
2609 // to generate a StoreLoad barrier
2610
2611 if (is_card_mark_membar(mbvol)) {
2612 return false;
2613 }
2614
2615 // ok, if it's not a card mark then we still need to check if it is
2616 // a trailing membar of a volatile put graph.
2617
2618 return (trailing_to_leading(mbvol) != NULL);
2619 }
2620
2621 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2622
2623 bool needs_releasing_store(const Node *n)
2624 {
2625 // assert n->is_Store();
2626 if (UseBarriersForVolatile) {
2627 // we use a normal store and dmb combination
2628 return false;
2629 }
2630
2631 StoreNode *st = n->as_Store();
2632
2633 // the store must be marked as releasing
2634 if (!st->is_release()) {
2635 return false;
2636 }
2646 ProjNode *proj = x->as_Proj();
2647
2648 x = proj->lookup(0);
2649
2650 if (!x || !x->is_MemBar()) {
2651 return false;
2652 }
2653
2654 MemBarNode *barrier = x->as_MemBar();
2655
2656 // if the barrier is a release membar or a cpuorder mmebar fed by a
2657 // release membar then we need to check whether that forms part of a
2658 // volatile put graph.
2659
2660 // reject invalid candidates
2661 if (!leading_membar(barrier)) {
2662 return false;
2663 }
2664
2665 // does this lead a normal subgraph?
2666 MemBarNode *trailing = leading_to_trailing(barrier);
2667
2668 return (trailing != NULL);
2669 }
2670
2671 // predicate controlling translation of CAS
2672 //
2673 // returns true if CAS needs to use an acquiring load otherwise false
2674
2675 bool needs_acquiring_load_exclusive(const Node *n)
2676 {
2677 assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2678 if (UseBarriersForVolatile) {
2679 return false;
2680 }
2681
2682 // CAS nodes only ought to turn up in inlined unsafe CAS operations
2683 #ifdef ASSERT
2684 LoadStoreNode *st = n->as_LoadStore();
2685
2686 // the store must be fed by a membar
2687
2688 Node *x = st->lookup(StoreNode::Memory);
2690 assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2691
2692 ProjNode *proj = x->as_Proj();
2693
2694 x = proj->lookup(0);
2695
2696 assert (x && x->is_MemBar(), "CAS not fed by membar!");
2697
2698 MemBarNode *barrier = x->as_MemBar();
2699
2700 // the barrier must be a cpuorder mmebar fed by a release membar
2701
2702 assert(barrier->Opcode() == Op_MemBarCPUOrder,
2703 "CAS not fed by cpuorder membar!");
2704
2705 MemBarNode *b = parent_membar(barrier);
2706 assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2707 "CAS not fed by cpuorder+release membar pair!");
2708
2709 // does this lead a normal subgraph?
2710 MemBarNode *mbar = leading_to_trailing(barrier);
2711
2712 assert(mbar != NULL, "CAS not embedded in normal graph!");
2713
2714 assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2715 #endif // ASSERT
2716 // so we can just return true here
2717 return true;
2718 }
2719
2720 // predicate controlling translation of StoreCM
2721 //
2722 // returns true if a StoreStore must precede the card write otherwise
2723 // false
2724
2725 bool unnecessary_storestore(const Node *storecm)
2726 {
2727 assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM");
2728
2729 // we only ever need to generate a dmb ishst between an object put
2730 // and the associated card mark when we are using CMS without
2731 // conditional card marking. Any other occurence will happen when
2732 // performing a card mark using CMS with conditional card marking or
2733 // G1. In those cases the preceding MamBarVolatile will be
2734 // translated to a dmb ish which guarantes visibility of the
2735 // preceding StoreN/P before this StoreCM
2736
2737 if (!UseConcMarkSweepGC || UseCondCardMark) {
2738 return true;
2739 }
2740
2741 // if we are implementing volatile puts using barriers then we must
2742 // insert the dmb ishst
2743
2744 if (UseBarriersForVolatile) {
2745 return false;
2746 }
2747
2748 // we must be using CMS with conditional card marking so we ahve to
2749 // generate the StoreStore
2750
2751 return false;
2752 }
2753
2754
2755 #define __ _masm.
2756
2757 // advance declarations for helper functions to convert register
2758 // indices to register objects
2759
2760 // the ad file has to provide implementations of certain methods
2761 // expected by the generic code
2762 //
2763 // REQUIRED FUNCTIONALITY
2764
2765 //=============================================================================
2766
2767 // !!!!! Special hack to get all types of calls to specify the byte offset
2768 // from the start of the call to the point where the return address
2769 // will point.
2770
2771 int MachCallStaticJavaNode::ret_addr_offset()
|