< prev index next >

src/cpu/aarch64/vm/aarch64.ad

Print this page
rev 8967 : 8080293: AARCH64: Remove unnecessary dmbs from generated CAS code
Summary: The current encoding for CAS generates unnecessary leading and trailing dmbs for the MemBarAcquire and MemBarRelease which ought to be elided
Reviewed-by: kvn


1022   static int emit_exception_handler(CodeBuffer &cbuf);
1023   static int emit_deopt_handler(CodeBuffer& cbuf);
1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036 
1037   MemBarNode *parent_membar(const Node *n);
1038   MemBarNode *child_membar(const MemBarNode *n);
1039   bool leading_membar(const MemBarNode *barrier);
1040 
1041   bool is_card_mark_membar(const MemBarNode *barrier);

1042 
1043   MemBarNode *leading_to_normal(MemBarNode *leading);
1044   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1045   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1046   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1047   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1048 
1049   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1050 
1051   bool unnecessary_acquire(const Node *barrier);
1052   bool needs_acquiring_load(const Node *load);
1053 
1054   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1055 
1056   bool unnecessary_release(const Node *barrier);
1057   bool unnecessary_volatile(const Node *barrier);
1058   bool needs_releasing_store(const Node *store);
1059 



1060   // predicate controlling translation of StoreCM
1061   bool unnecessary_storestore(const Node *storecm);
1062 %}
1063 
1064 source %{
1065 
1066   // Optimizaton of volatile gets and puts
1067   // -------------------------------------
1068   //
1069   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1070   // use to implement volatile reads and writes. For a volatile read
1071   // we simply need
1072   //
1073   //   ldar<x>
1074   //
1075   // and for a volatile write we need
1076   //
1077   //   stlr<x>
1078   // 
1079   // Alternatively, we can implement them by pairing a normal
1080   // load/store with a memory barrier. For a volatile read we need
1081   // 
1082   //   ldr<x>
1083   //   dmb ishld
1084   //
1085   // for a volatile write
1086   //
1087   //   dmb ish
1088   //   str<x>
1089   //   dmb ish
1090   //











































1091   // In order to generate the desired instruction sequence we need to
1092   // be able to identify specific 'signature' ideal graph node
1093   // sequences which i) occur as a translation of a volatile reads or
1094   // writes and ii) do not occur through any other translation or
1095   // graph transformation. We can then provide alternative aldc
1096   // matching rules which translate these node sequences to the
1097   // desired machine code sequences. Selection of the alternative
1098   // rules can be implemented by predicates which identify the
1099   // relevant node sequences.
1100   //
1101   // The ideal graph generator translates a volatile read to the node
1102   // sequence
1103   //
1104   //   LoadX[mo_acquire]
1105   //   MemBarAcquire
1106   //
1107   // As a special case when using the compressed oops optimization we
1108   // may also see this variant
1109   //
1110   //   LoadN[mo_acquire]
1111   //   DecodeN
1112   //   MemBarAcquire
1113   //
1114   // A volatile write is translated to the node sequence
1115   //
1116   //   MemBarRelease
1117   //   StoreX[mo_release] {CardMark}-optional
1118   //   MemBarVolatile
1119   //


1146   // predicates need to detect its presence in order to correctly
1147   // select the desired adlc rules.
1148   //
1149   // Inlined unsafe volatile gets manifest as a somewhat different
1150   // node sequence to a normal volatile get
1151   //
1152   //   MemBarCPUOrder
1153   //        ||       \\
1154   //   MemBarAcquire LoadX[mo_acquire]
1155   //        ||
1156   //   MemBarCPUOrder
1157   //
1158   // In this case the acquire membar does not directly depend on the
1159   // load. However, we can be sure that the load is generated from an
1160   // inlined unsafe volatile get if we see it dependent on this unique
1161   // sequence of membar nodes. Similarly, given an acquire membar we
1162   // can know that it was added because of an inlined unsafe volatile
1163   // get if it is fed and feeds a cpuorder membar and if its feed
1164   // membar also feeds an acquiring load.
1165   //









1166   // So, where we can identify these volatile read and write
1167   // signatures we can choose to plant either of the above two code
1168   // sequences. For a volatile read we can simply plant a normal
1169   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1170   // also choose to inhibit translation of the MemBarAcquire and
1171   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1172   //
1173   // When we recognise a volatile store signature we can choose to
1174   // plant at a dmb ish as a translation for the MemBarRelease, a
1175   // normal str<x> and then a dmb ish for the MemBarVolatile.
1176   // Alternatively, we can inhibit translation of the MemBarRelease
1177   // and MemBarVolatile and instead plant a simple stlr<x>
1178   // instruction.
1179   //








1180   // Of course, the above only applies when we see these signature
1181   // configurations. We still want to plant dmb instructions in any
1182   // other cases where we may see a MemBarAcquire, MemBarRelease or
1183   // MemBarVolatile. For example, at the end of a constructor which
1184   // writes final/volatile fields we will see a MemBarRelease
1185   // instruction and this needs a 'dmb ish' lest we risk the
1186   // constructed object being visible without making the
1187   // final/volatile field writes visible.
1188   //
1189   // n.b. the translation rules below which rely on detection of the
1190   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1191   // If we see anything other than the signature configurations we
1192   // always just translate the loads and stores to ldr<x> and str<x>
1193   // and translate acquire, release and volatile membars to the
1194   // relevant dmb instructions.
1195   //
1196 
1197   // graph traversal helpers used for volatile put/get optimization

1198 
1199   // 1) general purpose helpers
1200 
1201   // if node n is linked to a parent MemBarNode by an intervening
1202   // Control and Memory ProjNode return the MemBarNode otherwise return
1203   // NULL.
1204   //
1205   // n may only be a Load or a MemBar.
1206 
1207   MemBarNode *parent_membar(const Node *n)
1208   {
1209     Node *ctl = NULL;
1210     Node *mem = NULL;
1211     Node *membar = NULL;
1212 
1213     if (n->is_Load()) {
1214       ctl = n->lookup(LoadNode::Control);
1215       mem = n->lookup(LoadNode::Memory);
1216     } else if (n->is_MemBar()) {
1217       ctl = n->lookup(TypeFunc::Control);
1218       mem = n->lookup(TypeFunc::Memory);
1219     } else {
1220         return NULL;
1221     }
1222 
1223     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj())
1224       return NULL;

1225 
1226     membar = ctl->lookup(0);
1227 
1228     if (!membar || !membar->is_MemBar())
1229       return NULL;

1230 
1231     if (mem->lookup(0) != membar)
1232       return NULL;

1233 
1234     return membar->as_MemBar();
1235   }
1236 
1237   // if n is linked to a child MemBarNode by intervening Control and
1238   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1239 
1240   MemBarNode *child_membar(const MemBarNode *n)
1241   {
1242     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1243     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1244 
1245     // MemBar needs to have both a Ctl and Mem projection
1246     if (! ctl || ! mem)
1247       return NULL;
1248 
1249     MemBarNode *child = NULL;
1250     Node *x;
1251 
1252     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1253       x = ctl->fast_out(i);
1254       // if we see a membar we keep hold of it. we may also see a new
1255       // arena copy of the original but it will appear later
1256       if (x->is_MemBar()) {
1257           child = x->as_MemBar();
1258           break;
1259       }
1260     }
1261 
1262     if (child == NULL)
1263       return NULL;

1264 
1265     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1266       x = mem->fast_out(i);
1267       // if we see a membar we keep hold of it. we may also see a new
1268       // arena copy of the original but it will appear later
1269       if (x == child) {
1270         return child;
1271       }
1272     }
1273     return NULL;
1274   }
1275 
1276   // helper predicate use to filter candidates for a leading memory
1277   // barrier
1278   //
1279   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1280   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1281 
1282   bool leading_membar(const MemBarNode *barrier)
1283   {
1284     int opcode = barrier->Opcode();
1285     // if this is a release membar we are ok
1286     if (opcode == Op_MemBarRelease)
1287       return true;

1288     // if its a cpuorder membar . . .
1289     if (opcode != Op_MemBarCPUOrder)
1290       return false;

1291     // then the parent has to be a release membar
1292     MemBarNode *parent = parent_membar(barrier);
1293     if (!parent)
1294       return false;

1295     opcode = parent->Opcode();
1296     return opcode == Op_MemBarRelease;
1297   }
1298  
1299   // 2) card mark detection helper
1300 
1301   // helper predicate which can be used to detect a volatile membar
1302   // introduced as part of a conditional card mark sequence either by
1303   // G1 or by CMS when UseCondCardMark is true.
1304   //
1305   // membar can be definitively determined to be part of a card mark
1306   // sequence if and only if all the following hold
1307   //
1308   // i) it is a MemBarVolatile
1309   //
1310   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1311   // true
1312   //
1313   // iii) the node's Mem projection feeds a StoreCM node.
1314   
1315   bool is_card_mark_membar(const MemBarNode *barrier)
1316   {
1317     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark))
1318       return false;

1319 
1320     if (barrier->Opcode() != Op_MemBarVolatile)
1321       return false;

1322 
1323     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1324 
1325     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1326       Node *y = mem->fast_out(i);
1327       if (y->Opcode() == Op_StoreCM) {
1328         return true;
1329       }
1330     }
1331   
1332     return false;
1333   }
1334 
1335 
1336   // 3) helper predicates to traverse volatile put graphs which may
1337   // contain GC barrier subgraphs
1338 
1339   // Preamble
1340   // --------
1341   //
1342   // for volatile writes we can omit generating barriers and employ a
1343   // releasing store when we see a node sequence sequence with a
1344   // leading MemBarRelease and a trailing MemBarVolatile as follows
1345   //
1346   //   MemBarRelease
1347   //  {      ||      } -- optional
1348   //  {MemBarCPUOrder}
1349   //         ||     \\
1350   //         ||     StoreX[mo_release]
1351   //         | \     /
1352   //         | MergeMem
1353   //         | /
1354   //   MemBarVolatile
1355   //
1356   // where
1357   //  || and \\ represent Ctl and Mem feeds via Proj nodes


1387   // ordering is required for both non-volatile and volatile
1388   // puts. Normally that means we need to translate a StoreCM using
1389   // the sequence
1390   //
1391   //   dmb ishst
1392   //   stlrb
1393   //
1394   // However, in the case of a volatile put if we can recognise this
1395   // configuration and plant an stlr for the object write then we can
1396   // omit the dmb and just plant an strb since visibility of the stlr
1397   // is ordered before visibility of subsequent stores. StoreCM nodes
1398   // also arise when using G1 or using CMS with conditional card
1399   // marking. In these cases (as we shall see) we don't need to insert
1400   // the dmb when translating StoreCM because there is already an
1401   // intervening StoreLoad barrier between it and the StoreP/N.
1402   //
1403   // It is also possible to perform the card mark conditionally on it
1404   // currently being unmarked in which case the volatile put graph
1405   // will look slightly different
1406   //
1407   //   MemBarRelease
1408   //   MemBarCPUOrder___________________________________________
1409   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1410   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1411   //         | \     /                              \            |
1412   //         | MergeMem                            . . .      StoreB
1413   //         | /                                                /
1414   //         ||     /
1415   //   MemBarVolatile
1416   //
1417   // It is worth noting at this stage that both the above
1418   // configurations can be uniquely identified by checking that the
1419   // memory flow includes the following subgraph:
1420   //
1421   //   MemBarRelease
1422   //   MemBarCPUOrder
1423   //          |  \      . . .
1424   //          |  StoreX[mo_release]  . . .
1425   //          |   /
1426   //         MergeMem
1427   //          |
1428   //   MemBarVolatile
1429   //
1430   // This is referred to as a *normal* subgraph. It can easily be
1431   // detected starting from any candidate MemBarRelease,
1432   // StoreX[mo_release] or MemBarVolatile.
1433   //








































1434   // the code below uses two helper predicates, leading_to_normal and
1435   // normal_to_leading to identify this configuration, one validating
1436   // the layout starting from the top membar and searching down and
1437   // the other validating the layout starting from the lower membar
1438   // and searching up.
1439   //
1440   // There are two special case GC configurations when a normal graph
1441   // may not be generated: when using G1 (which always employs a
1442   // conditional card mark); and when using CMS with conditional card
1443   // marking configured. These GCs are both concurrent rather than
1444   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1445   // graph between the leading and trailing membar nodes, in
1446   // particular enforcing stronger memory serialisation beween the
1447   // object put and the corresponding conditional card mark. CMS
1448   // employs a post-write GC barrier while G1 employs both a pre- and
1449   // post-write GC barrier. Of course the extra nodes may be absent --
1450   // they are only inserted for object puts. This significantly
1451   // complicates the task of identifying whether a MemBarRelease,
1452   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1453   // when using these GC configurations (see below).


1454   //
1455   // In both cases the post-write subtree includes an auxiliary
1456   // MemBarVolatile (StoreLoad barrier) separating the object put and
1457   // the read of the corresponding card. This poses two additional
1458   // problems.
1459   //
1460   // Firstly, a card mark MemBarVolatile needs to be distinguished
1461   // from a normal trailing MemBarVolatile. Resolving this first
1462   // problem is straightforward: a card mark MemBarVolatile always
1463   // projects a Mem feed to a StoreCM node and that is a unique marker
1464   //
1465   //      MemBarVolatile (card mark)
1466   //       C |    \     . . .
1467   //         |   StoreCM   . . .
1468   //       . . .
1469   //
1470   // The second problem is how the code generator is to translate the
1471   // card mark barrier? It always needs to be translated to a "dmb
1472   // ish" instruction whether or not it occurs as part of a volatile
1473   // put. A StoreLoad barrier is needed after the object put to ensure
1474   // i) visibility to GC threads of the object put and ii) visibility
1475   // to the mutator thread of any card clearing write by a GC
1476   // thread. Clearly a normal store (str) will not guarantee this
1477   // ordering but neither will a releasing store (stlr). The latter
1478   // guarantees that the object put is visible but does not guarantee
1479   // that writes by other threads have also been observed.
1480   // 
1481   // So, returning to the task of translating the object put and the
1482   // leading/trailing membar nodes: what do the non-normal node graph
1483   // look like for these 2 special cases? and how can we determine the
1484   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1485   // in both normal and non-normal cases?
1486   //
1487   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1488   // which selects conditonal execution based on the value loaded
1489   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1490   // intervening StoreLoad barrier (MemBarVolatile).
1491   //
1492   // So, with CMS we may see a node graph which looks like this

1493   //
1494   //   MemBarRelease
1495   //   MemBarCPUOrder_(leading)__________________
1496   //     C |    M \       \\                   C \
1497   //       |       \    StoreN/P[mo_release]  CastP2X
1498   //       |    Bot \    /
1499   //       |       MergeMem
1500   //       |         /
1501   //      MemBarVolatile (card mark)
1502   //     C |  ||    M |
1503   //       | LoadB    |
1504   //       |   |      |
1505   //       | Cmp      |\
1506   //       | /        | \
1507   //       If         |  \
1508   //       | \        |   \
1509   // IfFalse  IfTrue  |    \
1510   //       \     / \  |     \
1511   //        \   / StoreCM    |
1512   //         \ /      |      |
1513   //        Region   . . .   |
1514   //          | \           /
1515   //          |  . . .  \  / Bot
1516   //          |       MergeMem
1517   //          |          |
1518   //        MemBarVolatile (trailing)
1519   //
1520   // The first MergeMem merges the AliasIdxBot Mem slice from the
1521   // leading membar and the oopptr Mem slice from the Store into the
1522   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1523   // Mem slice from the card mark membar and the AliasIdxRaw slice
1524   // from the StoreCM into the trailing membar (n.b. the latter
1525   // proceeds via a Phi associated with the If region).
1526   //

















































1527   // G1 is quite a lot more complicated. The nodes inserted on behalf
1528   // of G1 may comprise: a pre-write graph which adds the old value to
1529   // the SATB queue; the releasing store itself; and, finally, a
1530   // post-write graph which performs a card mark.
1531   //
1532   // The pre-write graph may be omitted, but only when the put is
1533   // writing to a newly allocated (young gen) object and then only if
1534   // there is a direct memory chain to the Initialize node for the
1535   // object allocation. This will not happen for a volatile put since
1536   // any memory chain passes through the leading membar.
1537   //
1538   // The pre-write graph includes a series of 3 If tests. The outermost
1539   // If tests whether SATB is enabled (no else case). The next If tests
1540   // whether the old value is non-NULL (no else case). The third tests
1541   // whether the SATB queue index is > 0, if so updating the queue. The
1542   // else case for this third If calls out to the runtime to allocate a
1543   // new queue buffer.
1544   //
1545   // So with G1 the pre-write and releasing store subgraph looks like
1546   // this (the nested Ifs are omitted).


1558   //       |                 \              |
1559   //       |    . . .         \             |
1560   //       | /       | /       |            |
1561   //      Region  Phi[M]       |            |
1562   //       | \       |         |            |
1563   //       |  \_____ | ___     |            |
1564   //     C | C \     |   C \ M |            |
1565   //       | CastP2X | StoreN/P[mo_release] |
1566   //       |         |         |            |
1567   //     C |       M |       M |          M |
1568   //        \        |         |           /
1569   //                  . . . 
1570   //          (post write subtree elided)
1571   //                    . . .
1572   //             C \         M /
1573   //         MemBarVolatile (trailing)
1574   //
1575   // n.b. the LoadB in this subgraph is not the card read -- it's a
1576   // read of the SATB queue active flag.
1577   //




1578   // The G1 post-write subtree is also optional, this time when the
1579   // new value being written is either null or can be identified as a
1580   // newly allocated (young gen) object with no intervening control
1581   // flow. The latter cannot happen but the former may, in which case
1582   // the card mark membar is omitted and the memory feeds from the
1583   // leading membar and the StoreN/P are merged direct into the
1584   // trailing membar as per the normal subgraph. So, the only special
1585   // case which arises is when the post-write subgraph is generated.
1586   //
1587   // The kernel of the post-write G1 subgraph is the card mark itself
1588   // which includes a card mark memory barrier (MemBarVolatile), a
1589   // card test (LoadB), and a conditional update (If feeding a
1590   // StoreCM). These nodes are surrounded by a series of nested Ifs
1591   // which try to avoid doing the card mark. The top level If skips if
1592   // the object reference does not cross regions (i.e. it tests if
1593   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1594   // need not be recorded. The next If, which skips on a NULL value,
1595   // may be absent (it is not generated if the type of value is >=
1596   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1597   // checking if card_val != young).  n.b. although this test requires
1598   // a pre-read of the card it can safely be done before the StoreLoad
1599   // barrier. However that does not bypass the need to reread the card
1600   // after the barrier.
1601   //
1602   //                (pre-write subtree elided)
1603   //        . . .                  . . .    . . .  . . .


1651   //    \            MergeMem 
1652   //     \            /
1653   //     MemBarVolatile
1654   //
1655   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1656   // from the leading membar and the oopptr Mem slice from the Store
1657   // into the card mark membar i.e. the memory flow to the card mark
1658   // membar still looks like a normal graph.
1659   //
1660   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1661   // Mem slices (from the StoreCM and other card mark queue stores).
1662   // However in this case the AliasIdxBot Mem slice does not come
1663   // direct from the card mark membar. It is merged through a series
1664   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1665   // from the leading membar with the Mem feed from the card mark
1666   // membar. Each Phi corresponds to one of the Ifs which may skip
1667   // around the card mark membar. So when the If implementing the NULL
1668   // value check has been elided the total number of Phis is 2
1669   // otherwise it is 3.
1670   //










1671   // So, the upshot is that in all cases the volatile put graph will
1672   // include a *normal* memory subgraph betwen the leading membar and
1673   // its child membar. When that child is not a card mark membar then
1674   // it marks the end of a volatile put subgraph. If the child is a
1675   // card mark membar then the normal subgraph will form part of a
1676   // volatile put subgraph if and only if the child feeds an
1677   // AliasIdxBot Mem feed to a trailing barrier via a MergeMem. That
1678   // feed is either direct (for CMS) or via 2 or 3 Phi nodes merging
1679   // the leading barrier memory flow (for G1).


1680   // 
1681   // The predicates controlling generation of instructions for store
1682   // and barrier nodes employ a few simple helper functions (described
1683   // below) which identify the presence or absence of these subgraph
1684   // configurations and provide a means of traversing from one node in
1685   // the subgraph to another.













1686 
1687   // leading_to_normal
1688   //
1689   //graph traversal helper which detects the normal case Mem feed
1690   // from a release membar (or, optionally, its cpuorder child) to a
1691   // dependent volatile membar i.e. it ensures that the following Mem
1692   // flow subgraph is present.
1693   //
1694   //   MemBarRelease
1695   //   MemBarCPUOrder
1696   //          |  \      . . .
1697   //          |  StoreN/P[mo_release]  . . .
1698   //          |   /
1699   //         MergeMem
1700   //          |
1701   //   MemBarVolatile












1702   //
1703   // if the correct configuration is present returns the volatile
1704   // membar otherwise NULL.
1705   //
1706   // the input membar is expected to be either a cpuorder membar or a
1707   // release membar. in the latter case it should not have a cpu membar
1708   // child.
1709   //
1710   // the returned membar may be a card mark membar rather than a
1711   // trailing membar.
1712 
1713   MemBarNode *leading_to_normal(MemBarNode *leading)
1714   {
1715     assert((leading->Opcode() == Op_MemBarRelease ||
1716             leading->Opcode() == Op_MemBarCPUOrder),
1717            "expecting a volatile or cpuroder membar!");
1718 
1719     // check the mem flow
1720     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1721 
1722     if (!mem)
1723       return NULL;

1724 
1725     Node *x = NULL;
1726     StoreNode * st = NULL;

1727     MergeMemNode *mm = NULL;
1728 
1729     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1730       x = mem->fast_out(i);
1731       if (x->is_MergeMem()) {
1732         if (mm != NULL)
1733           return NULL;

1734         // two merge mems is one too many
1735         mm = x->as_MergeMem();
1736       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1737         // two releasing stores is one too many
1738         if (st != NULL)
1739           return NULL;

1740         st = x->as_Store();





1741       }
1742     }
1743 
1744     if (!mm || !st)

1745       return NULL;

1746 
1747     bool found = false;
1748     // ensure the store feeds the merge




























1749     for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1750       if (st->fast_out(i) == mm) {
1751         found = true;
1752         break;
1753       }
1754     }
1755 
1756     if (!found)
1757       return NULL;


1758 
1759     MemBarNode *mbvol = NULL;
1760     // ensure the merge feeds a volatile membar
1761     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1762       x = mm->fast_out(i);
1763       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1764         mbvol = x->as_MemBar();









1765         break;
1766       }
1767     }
1768 
1769     return mbvol;
1770   }
1771 
1772   // normal_to_leading
1773   //
1774   // graph traversal helper which detects the normal case Mem feed
1775   // from either a card mark or a trailing membar to a preceding
1776   // release membar (optionally its cpuorder child) i.e. it ensures
1777   // that the following Mem flow subgraph is present.
1778   //
1779   //   MemBarRelease
1780   //   MemBarCPUOrder {leading}
1781   //          |  \      . . .
1782   //          |  StoreN/P[mo_release]  . . .
1783   //          |   /
1784   //         MergeMem
1785   //          |
1786   //   MemBarVolatile












1787   //
1788   // this predicate checks for the same flow as the previous predicate
1789   // but starting from the bottom rather than the top.
1790   //
1791   // if the configuration is present returns the cpuorder member for
1792   // preference or when absent the release membar otherwise NULL.
1793   //
1794   // n.b. the input membar is expected to be a MemBarVolatile but
1795   // need not be a card mark membar.
1796 
1797   MemBarNode *normal_to_leading(const MemBarNode *barrier)
1798   {
1799     // input must be a volatile membar
1800     assert(barrier->Opcode() == Op_MemBarVolatile, "expecting a volatile membar");


1801     Node *x;














1802 
1803     // the Mem feed to the membar should be a merge
1804     x = barrier->in(TypeFunc::Memory);
1805     if (!x->is_MergeMem())
1806       return NULL;
1807 
1808     MergeMemNode *mm = x->as_MergeMem();
1809 
1810     // the AliasIdxBot slice should be another MemBar projection





















1811     x = mm->in(Compile::AliasIdxBot);


1812     // ensure this is a non control projection
1813     if (!x->is_Proj() || x->is_CFG())
1814       return NULL;

1815     // if it is fed by a membar that's the one we want
1816     x = x->in(0);
1817 
1818     if (!x->is_MemBar())
1819       return NULL;

1820 
1821     MemBarNode *leading = x->as_MemBar();
1822     // reject invalid candidates
1823     if (!leading_membar(leading))
1824       return NULL;

1825 
1826     // ok, we have a leading ReleaseMembar, now for the sanity clauses
1827 
1828     // the leading membar must feed Mem to a releasing store
1829     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1830     StoreNode *st = NULL;

1831     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1832       x = mem->fast_out(i);
1833       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {




1834         st = x->as_Store();
1835         break;




1836       }
1837     }
1838     if (st == NULL)


1839       return NULL;

1840 
1841     // the releasing store has to feed the same merge









1842     for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1843       if (st->fast_out(i) == mm)
1844         return leading;
1845     }


1846 
1847     return NULL;
1848   }
1849 
1850   // card_mark_to_trailing
1851   //
1852   // graph traversal helper which detects extra, non-normal Mem feed
1853   // from a card mark volatile membar to a trailing membar i.e. it
1854   // ensures that one of the following three GC post-write Mem flow
1855   // subgraphs is present.
1856   //
1857   // 1)
1858   //     . . .
1859   //       |
1860   //   MemBarVolatile (card mark)
1861   //      |          |     
1862   //      |        StoreCM
1863   //      |          |
1864   //      |        . . .
1865   //  Bot |  / 
1866   //   MergeMem 
1867   //      |
1868   //   MemBarVolatile (trailing)
1869   //
1870   //
1871   // 2)
1872   //   MemBarRelease/CPUOrder (leading)
1873   //    |
1874   //    | 
1875   //    |\       . . .
1876   //    | \        | 
1877   //    |  \  MemBarVolatile (card mark) 
1878   //    |   \   |     |
1879   //     \   \  |   StoreCM    . . .
1880   //      \   \ |
1881   //       \  Phi
1882   //        \ /
1883   //        Phi  . . .
1884   //     Bot |   /
1885   //       MergeMem
1886   //         |
1887   //   MemBarVolatile (trailing)

1888   //
1889   // 3)
1890   //   MemBarRelease/CPUOrder (leading)
1891   //    |
1892   //    |\
1893   //    | \
1894   //    |  \      . . .
1895   //    |   \       |
1896   //    |\   \  MemBarVolatile (card mark)
1897   //    | \   \   |     |
1898   //    |  \   \  |   StoreCM    . . .
1899   //    |   \   \ |
1900   //     \   \  Phi
1901   //      \   \ /  
1902   //       \  Phi
1903   //        \ /
1904   //        Phi  . . .
1905   //     Bot |   /
1906   //       MergeMem
1907   //         |
1908   //   MemBarVolatile (trailing)

1909   //
1910   // configuration 1 is only valid if UseConcMarkSweepGC &&
1911   // UseCondCardMark
1912   //
1913   // configurations 2 and 3 are only valid if UseG1GC.
1914   //
1915   // if a valid configuration is present returns the trailing membar
1916   // otherwise NULL.
1917   //
1918   // n.b. the supplied membar is expected to be a card mark
1919   // MemBarVolatile i.e. the caller must ensure the input node has the
1920   // correct operand and feeds Mem to a StoreCM node
1921 
1922   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
1923   {
1924     // input must be a card mark volatile membar
1925     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
1926 
1927     Node *feed = barrier->proj_out(TypeFunc::Memory);
1928     Node *x;


1938         x = feed->fast_out(i);
1939         // the correct Phi will be merging a Bot memory slice
1940         if (x->is_MergeMem()) {
1941           mm = x->as_MergeMem();
1942           break;
1943         }
1944       }
1945       if (mm) {
1946         retry_feed = false;
1947       } else if (UseG1GC & phicount++ < MAX_PHIS) {
1948         // the barrier may feed indirectly via one or two Phi nodes
1949         PhiNode *phi = NULL;
1950         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
1951           x = feed->fast_out(i);
1952           // the correct Phi will be merging a Bot memory slice
1953           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
1954             phi = x->as_Phi();
1955             break;
1956           }
1957         }
1958         if (!phi)
1959           return NULL;

1960         // look for another merge below this phi
1961         feed = phi;
1962       } else {
1963         // couldn't find a merge
1964         return NULL;
1965       }
1966     }
1967 
1968     // sanity check this feed turns up as the expected slice
1969     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
1970 
1971     MemBarNode *trailing = NULL;
1972     // be sure we have a volatile membar below the merge
1973     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1974       x = mm->fast_out(i);
1975       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1976         trailing = x->as_MemBar();
1977         break;
1978       }
1979     }
1980 
1981     return trailing;
1982   }
1983 
1984   // trailing_to_card_mark
1985   //
1986   // graph traversal helper which detects extra, non-normal Mem feed
1987   // from a trailing membar to a preceding card mark volatile membar
1988   // i.e. it identifies whether one of the three possible extra GC
1989   // post-write Mem flow subgraphs is present
1990   //
1991   // this predicate checks for the same flow as the previous predicate
1992   // but starting from the bottom rather than the top.
1993   //
1994   // if the configurationis present returns the card mark membar
1995   // otherwise NULL




1996 
1997   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
1998   {
1999     assert(!is_card_mark_membar(trailing), "not expecting a card mark membar");



2000 
2001     Node *x = trailing->in(TypeFunc::Memory);
2002     // the Mem feed to the membar should be a merge
2003     if (!x->is_MergeMem())

2004       return NULL;

2005 
2006     MergeMemNode *mm = x->as_MergeMem();
2007 
2008     x = mm->in(Compile::AliasIdxBot);
2009     // with G1 we may possibly see a Phi or two before we see a Memory
2010     // Proj from the card mark membar
2011 
2012     const int MAX_PHIS = 3;     // max phis we will search through
2013     int phicount = 0;           // current search count
2014 
2015     bool retry_feed = !x->is_Proj();
2016 
2017     while (retry_feed) {
2018       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2019         PhiNode *phi = x->as_Phi();
2020         ProjNode *proj = NULL;
2021         PhiNode *nextphi = NULL;
2022         bool found_leading = false;
2023         for (uint i = 1; i < phi->req(); i++) {
2024           x = phi->in(i);


2037         }
2038         // if we found a correct looking proj then retry from there
2039         // otherwise we must see a leading and a phi or this the
2040         // wrong config
2041         if (proj != NULL) {
2042           x = proj;
2043           retry_feed = false;
2044         } else if (found_leading && nextphi != NULL) {
2045           // retry from this phi to check phi2
2046           x = nextphi;
2047         } else {
2048           // not what we were looking for
2049           return NULL;
2050         }
2051       } else {
2052         return NULL;
2053       }
2054     }
2055     // the proj has to come from the card mark membar
2056     x = x->in(0);
2057     if (!x->is_MemBar())
2058       return NULL;

2059 
2060     MemBarNode *card_mark_membar = x->as_MemBar();
2061 
2062     if (!is_card_mark_membar(card_mark_membar))
2063       return NULL;

2064 
2065     return card_mark_membar;
2066   }
2067 
2068   // trailing_to_leading
2069   //
2070   // graph traversal helper which checks the Mem flow up the graph
2071   // from a (non-card mark) volatile membar attempting to locate and
2072   // return an associated leading membar. it first looks for a
2073   // subgraph in the normal configuration (relying on helper
2074   // normal_to_leading). failing that it then looks for one of the
2075   // possible post-write card mark subgraphs linking the trailing node
2076   // to a the card mark membar (relying on helper
2077   // trailing_to_card_mark), and then checks that the card mark membar
2078   // is fed by a leading membar (once again relying on auxiliary
2079   // predicate normal_to_leading).
2080   //
2081   // if the configuration is valid returns the cpuorder member for
2082   // preference or when absent the release membar otherwise NULL.
2083   //
2084   // n.b. the input membar is expected to be a volatile membar but
2085   // must *not* be a card mark membar.

2086 
2087   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2088   {
2089     assert(!is_card_mark_membar(trailing), "not expecting a card mark membar");





2090 
2091     MemBarNode *leading = normal_to_leading(trailing);
2092 
2093     if (leading)
2094       return leading;






2095 
2096     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2097 
2098     if (!card_mark_membar)
2099       return NULL;

2100 
2101     return normal_to_leading(card_mark_membar);
2102   }
2103 
2104   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2105 
2106 bool unnecessary_acquire(const Node *barrier)
2107 {
2108   // assert barrier->is_MemBar();
2109   if (UseBarriersForVolatile)

2110     // we need to plant a dmb
2111     return false;

2112 
2113   // a volatile read derived from bytecode (or also from an inlined
2114   // SHA field read via LibraryCallKit::load_field_from_object)
2115   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2116   // with a bogus read dependency on it's preceding load. so in those
2117   // cases we will find the load node at the PARMS offset of the
2118   // acquire membar.  n.b. there may be an intervening DecodeN node.
2119   //
2120   // a volatile load derived from an inlined unsafe field access
2121   // manifests as a cpuorder membar with Ctl and Mem projections
2122   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2123   // acquire then feeds another cpuorder membar via Ctl and Mem
2124   // projections. The load has no output dependency on these trailing
2125   // membars because subsequent nodes inserted into the graph take
2126   // their control feed from the final membar cpuorder meaning they
2127   // are all ordered after the load.
2128 
2129   Node *x = barrier->lookup(TypeFunc::Parms);
2130   if (x) {
2131     // we are starting from an acquire and it has a fake dependency
2132     //
2133     // need to check for
2134     //
2135     //   LoadX[mo_acquire]
2136     //   {  |1   }
2137     //   {DecodeN}
2138     //      |Parms
2139     //   MemBarAcquire*
2140     //
2141     // where * tags node we were passed
2142     // and |k means input k
2143     if (x->is_DecodeNarrowPtr())
2144       x = x->in(1);

2145 
2146     return (x->is_Load() && x->as_Load()->is_acquire());
2147   }
2148   
2149   // now check for an unsafe volatile get
2150 
2151   // need to check for
2152   //
2153   //   MemBarCPUOrder
2154   //        ||       \\
2155   //   MemBarAcquire* LoadX[mo_acquire]
2156   //        ||
2157   //   MemBarCPUOrder
2158   //
2159   // where * tags node we were passed
2160   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2161 
2162   // check for a parent MemBarCPUOrder
2163   ProjNode *ctl;
2164   ProjNode *mem;
2165   MemBarNode *parent = parent_membar(barrier);
2166   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2167     return false;
2168   ctl = parent->proj_out(TypeFunc::Control);
2169   mem = parent->proj_out(TypeFunc::Memory);
2170   if (!ctl || !mem)
2171     return false;

2172   // ensure the proj nodes both feed a LoadX[mo_acquire]
2173   LoadNode *ld = NULL;
2174   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2175     x = ctl->fast_out(i);
2176     // if we see a load we keep hold of it and stop searching
2177     if (x->is_Load()) {
2178       ld = x->as_Load();
2179       break;
2180     }
2181   }
2182   // it must be an acquiring load
2183   if (! ld || ! ld->is_acquire())
2184     return false;
2185   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2186     x = mem->fast_out(i);
2187     // if we see the same load we drop it and stop searching
2188     if (x == ld) {
2189       ld = NULL;
2190       break;
2191     }
2192   }
2193   // we must have dropped the load
2194   if (ld)
2195     return false;
2196   // check for a child cpuorder membar
2197   MemBarNode *child  = child_membar(barrier->as_MemBar());
2198   if (!child || child->Opcode() != Op_MemBarCPUOrder)
2199     return false;
2200 
2201   return true;









2202 }
2203 
2204 bool needs_acquiring_load(const Node *n)
2205 {
2206   // assert n->is_Load();
2207   if (UseBarriersForVolatile)
2208     // we use a normal load and a dmb
2209     return false;

2210 
2211   LoadNode *ld = n->as_Load();
2212 
2213   if (!ld->is_acquire())
2214     return false;

2215 
2216   // check if this load is feeding an acquire membar
2217   //
2218   //   LoadX[mo_acquire]
2219   //   {  |1   }
2220   //   {DecodeN}
2221   //      |Parms
2222   //   MemBarAcquire*
2223   //
2224   // where * tags node we were passed
2225   // and |k means input k
2226 
2227   Node *start = ld;
2228   Node *mbacq = NULL;
2229 
2230   // if we hit a DecodeNarrowPtr we reset the start node and restart
2231   // the search through the outputs
2232  restart:
2233 
2234   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {


2244   }
2245 
2246   if (mbacq) {
2247     return true;
2248   }
2249 
2250   // now check for an unsafe volatile get
2251 
2252   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2253   //
2254   //     MemBarCPUOrder
2255   //        ||       \\
2256   //   MemBarAcquire* LoadX[mo_acquire]
2257   //        ||
2258   //   MemBarCPUOrder
2259 
2260   MemBarNode *membar;
2261 
2262   membar = parent_membar(ld);
2263 
2264   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
2265     return false;

2266 
2267   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2268 
2269   membar = child_membar(membar);
2270 
2271   if (!membar || !membar->Opcode() == Op_MemBarAcquire)
2272     return false;

2273 
2274   membar = child_membar(membar);
2275   
2276   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
2277     return false;

2278 
2279   return true;
2280 }
2281 
2282 bool unnecessary_release(const Node *n)
2283 {
2284   assert((n->is_MemBar() &&
2285           n->Opcode() == Op_MemBarRelease),
2286          "expecting a release membar");
2287 
2288   if (UseBarriersForVolatile)
2289     // we need to plant a dmb
2290     return false;

2291 
2292   // if there is a dependent CPUOrder barrier then use that as the
2293   // leading
2294 
2295   MemBarNode *barrier = n->as_MemBar();
2296   // check for an intervening cpuorder membar
2297   MemBarNode *b = child_membar(barrier);
2298   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2299     // ok, so start the check from the dependent cpuorder barrier
2300     barrier = b;
2301   }
2302 
2303   // must start with a normal feed
2304   MemBarNode *child_barrier = leading_to_normal(barrier);
2305 
2306   if (!child_barrier)
2307     return false;

2308 
2309   if (!is_card_mark_membar(child_barrier))
2310     // this is the trailing membar and we are done
2311     return true;

2312 
2313   // must be sure this card mark feeds a trailing membar
2314   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2315   return (trailing != NULL);
2316 }
2317 
2318 bool unnecessary_volatile(const Node *n)
2319 {
2320   // assert n->is_MemBar();
2321   if (UseBarriersForVolatile)
2322     // we need to plant a dmb
2323     return false;

2324 
2325   MemBarNode *mbvol = n->as_MemBar();
2326 
2327   // first we check if this is part of a card mark. if so then we have
2328   // to generate a StoreLoad barrier
2329   
2330   if (is_card_mark_membar(mbvol))
2331       return false;

2332 
2333   // ok, if it's not a card mark then we still need to check if it is
2334   // a trailing membar of a volatile put hgraph.
2335 
2336   return (trailing_to_leading(mbvol) != NULL);
2337 }
2338 
2339 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2340 
2341 bool needs_releasing_store(const Node *n)
2342 {
2343   // assert n->is_Store();
2344   if (UseBarriersForVolatile)
2345     // we use a normal store and dmb combination
2346     return false;

2347 
2348   StoreNode *st = n->as_Store();
2349 
2350   // the store must be marked as releasing
2351   if (!st->is_release())
2352     return false;

2353 
2354   // the store must be fed by a membar
2355 
2356   Node *x = st->lookup(StoreNode::Memory);
2357 
2358   if (! x || !x->is_Proj())
2359     return false;

2360 
2361   ProjNode *proj = x->as_Proj();
2362 
2363   x = proj->lookup(0);
2364 
2365   if (!x || !x->is_MemBar())
2366     return false;

2367 
2368   MemBarNode *barrier = x->as_MemBar();
2369 
2370   // if the barrier is a release membar or a cpuorder mmebar fed by a
2371   // release membar then we need to check whether that forms part of a
2372   // volatile put graph.
2373 
2374   // reject invalid candidates
2375   if (!leading_membar(barrier))
2376     return false;

2377 
2378   // does this lead a normal subgraph?
2379   MemBarNode *mbvol = leading_to_normal(barrier);
2380 
2381   if (!mbvol)
2382     return false;

2383 
2384   // all done unless this is a card mark
2385   if (!is_card_mark_membar(mbvol))
2386     return true;

2387   
2388   // we found a card mark -- just make sure we have a trailing barrier
2389 
2390   return (card_mark_to_trailing(mbvol) != NULL);
2391 }
2392 

















































2393 // predicate controlling translation of StoreCM
2394 //
2395 // returns true if a StoreStore must precede the card write otherwise
2396 // false
2397 
2398 bool unnecessary_storestore(const Node *storecm)
2399 {
2400   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2401 
2402   // we only ever need to generate a dmb ishst between an object put
2403   // and the associated card mark when we are using CMS without
2404   // conditional card marking
2405 
2406   if (!UseConcMarkSweepGC || UseCondCardMark)
2407     return true;

2408 
2409   // if we are implementing volatile puts using barriers then the
2410   // object put as an str so we must insert the dmb ishst
2411 
2412   if (UseBarriersForVolatile)
2413     return false;

2414 
2415   // we can omit the dmb ishst if this StoreCM is part of a volatile
2416   // put because in thta case the put will be implemented by stlr
2417   //
2418   // we need to check for a normal subgraph feeding this StoreCM.
2419   // that means the StoreCM must be fed Memory from a leading membar,
2420   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2421   // leading membar must be part of a normal subgraph
2422 
2423   Node *x = storecm->in(StoreNode::Memory);
2424 
2425   if (!x->is_Proj())
2426     return false;

2427 
2428   x = x->in(0);
2429 
2430   if (!x->is_MemBar())
2431     return false;

2432 
2433   MemBarNode *leading = x->as_MemBar();
2434 
2435   // reject invalid candidates
2436   if (!leading_membar(leading))
2437     return false;

2438 
2439   // we can omit the StoreStore if it is the head of a normal subgraph
2440   return (leading_to_normal(leading) != NULL);
2441 }
2442 
2443 
2444 #define __ _masm.
2445 
2446 // advance declarations for helper functions to convert register
2447 // indices to register objects
2448 
2449 // the ad file has to provide implementations of certain methods
2450 // expected by the generic code
2451 //
2452 // REQUIRED FUNCTIONALITY
2453 
2454 //=============================================================================
2455 
2456 // !!!!! Special hack to get all types of calls to specify the byte offset
2457 //       from the start of the call to the point where the return address


8348 // comment storeIConditional was not used anywhere by AArch64.
8349 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8350 %{
8351   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8352 
8353   ins_cost(VOLATILE_REF_COST);
8354 
8355   format %{
8356     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8357     "cmpw rscratch1, zr\t# EQ on successful write"
8358   %}
8359 
8360   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8361 
8362   ins_pipe(pipe_slow);
8363 %}
8364 
8365 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8366 // can't match them
8367 



8368 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8369 
8370   match(Set res (CompareAndSwapI mem (Binary oldval newval)));

8371 
8372   effect(KILL cr);
8373 
8374  format %{
8375     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8376     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8377  %}
8378 
8379  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8380             aarch64_enc_cset_eq(res));
8381 
8382   ins_pipe(pipe_slow);
8383 %}
8384 
8385 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8386 
8387   match(Set res (CompareAndSwapL mem (Binary oldval newval)));

8388 
8389   effect(KILL cr);
8390 
8391  format %{
8392     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8393     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8394  %}
8395 
8396  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8397             aarch64_enc_cset_eq(res));
8398 
8399   ins_pipe(pipe_slow);
8400 %}
8401 
8402 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8403 
8404   match(Set res (CompareAndSwapP mem (Binary oldval newval)));

8405 
8406   effect(KILL cr);
8407 
8408  format %{
8409     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8410     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8411  %}
8412 
8413  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8414             aarch64_enc_cset_eq(res));
8415 
8416   ins_pipe(pipe_slow);
8417 %}
8418 
8419 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8420 
8421   match(Set res (CompareAndSwapN mem (Binary oldval newval)));

8422 
8423   effect(KILL cr);
8424 
8425  format %{
8426     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8427     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8428  %}
8429 
8430  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),














































































8431             aarch64_enc_cset_eq(res));
8432 
8433   ins_pipe(pipe_slow);
8434 %}
8435 
8436 
8437 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
8438   match(Set prev (GetAndSetI mem newv));
8439   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8440   ins_encode %{
8441     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8442   %}
8443   ins_pipe(pipe_serial);
8444 %}
8445 
8446 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
8447   match(Set prev (GetAndSetL mem newv));
8448   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8449   ins_encode %{
8450     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));




1022   static int emit_exception_handler(CodeBuffer &cbuf);
1023   static int emit_deopt_handler(CodeBuffer& cbuf);
1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036 
1037   MemBarNode *parent_membar(const Node *n);
1038   MemBarNode *child_membar(const MemBarNode *n);
1039   bool leading_membar(const MemBarNode *barrier);
1040 
1041   bool is_card_mark_membar(const MemBarNode *barrier);
1042   bool is_CAS(int opcode);
1043 
1044   MemBarNode *leading_to_normal(MemBarNode *leading);
1045   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1046   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1047   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1048   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1049 
1050   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1051 
1052   bool unnecessary_acquire(const Node *barrier);
1053   bool needs_acquiring_load(const Node *load);
1054 
1055   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1056 
1057   bool unnecessary_release(const Node *barrier);
1058   bool unnecessary_volatile(const Node *barrier);
1059   bool needs_releasing_store(const Node *store);
1060 
1061   // predicate controlling translation of CompareAndSwapX
1062   bool needs_acquiring_load_exclusive(const Node *load);
1063 
1064   // predicate controlling translation of StoreCM
1065   bool unnecessary_storestore(const Node *storecm);
1066 %}
1067 
1068 source %{
1069 
1070   // Optimizaton of volatile gets and puts
1071   // -------------------------------------
1072   //
1073   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1074   // use to implement volatile reads and writes. For a volatile read
1075   // we simply need
1076   //
1077   //   ldar<x>
1078   //
1079   // and for a volatile write we need
1080   //
1081   //   stlr<x>
1082   // 
1083   // Alternatively, we can implement them by pairing a normal
1084   // load/store with a memory barrier. For a volatile read we need
1085   // 
1086   //   ldr<x>
1087   //   dmb ishld
1088   //
1089   // for a volatile write
1090   //
1091   //   dmb ish
1092   //   str<x>
1093   //   dmb ish
1094   //
1095   // We can also use ldaxr and stlxr to implement compare and swap CAS
1096   // sequences. These are normally translated to an instruction
1097   // sequence like the following
1098   //
1099   //   dmb      ish
1100   // retry:
1101   //   ldxr<x>   rval raddr
1102   //   cmp       rval rold
1103   //   b.ne done
1104   //   stlxr<x>  rval, rnew, rold
1105   //   cbnz      rval retry
1106   // done:
1107   //   cset      r0, eq
1108   //   dmb ishld
1109   //
1110   // Note that the exclusive store is already using an stlxr
1111   // instruction. That is required to ensure visibility to other
1112   // threads of the exclusive write (assuming it succeeds) before that
1113   // of any subsequent writes.
1114   //
1115   // The following instruction sequence is an improvement on the above
1116   //
1117   // retry:
1118   //   ldaxr<x>  rval raddr
1119   //   cmp       rval rold
1120   //   b.ne done
1121   //   stlxr<x>  rval, rnew, rold
1122   //   cbnz      rval retry
1123   // done:
1124   //   cset      r0, eq
1125   //
1126   // We don't need the leading dmb ish since the stlxr guarantees
1127   // visibility of prior writes in the case that the swap is
1128   // successful. Crucially we don't have to worry about the case where
1129   // the swap is not successful since no valid program should be
1130   // relying on visibility of prior changes by the attempting thread
1131   // in the case where the CAS fails.
1132   //
1133   // Similarly, we don't need the trailing dmb ishld if we substitute
1134   // an ldaxr instruction since that will provide all the guarantees we
1135   // require regarding observation of changes made by other threads
1136   // before any change to the CAS address observed by the load.
1137   //
1138   // In order to generate the desired instruction sequence we need to
1139   // be able to identify specific 'signature' ideal graph node
1140   // sequences which i) occur as a translation of a volatile reads or
1141   // writes or CAS operations and ii) do not occur through any other
1142   // translation or graph transformation. We can then provide
1143   // alternative aldc matching rules which translate these node
1144   // sequences to the desired machine code sequences. Selection of the
1145   // alternative rules can be implemented by predicates which identify
1146   // the relevant node sequences.
1147   //
1148   // The ideal graph generator translates a volatile read to the node
1149   // sequence
1150   //
1151   //   LoadX[mo_acquire]
1152   //   MemBarAcquire
1153   //
1154   // As a special case when using the compressed oops optimization we
1155   // may also see this variant
1156   //
1157   //   LoadN[mo_acquire]
1158   //   DecodeN
1159   //   MemBarAcquire
1160   //
1161   // A volatile write is translated to the node sequence
1162   //
1163   //   MemBarRelease
1164   //   StoreX[mo_release] {CardMark}-optional
1165   //   MemBarVolatile
1166   //


1193   // predicates need to detect its presence in order to correctly
1194   // select the desired adlc rules.
1195   //
1196   // Inlined unsafe volatile gets manifest as a somewhat different
1197   // node sequence to a normal volatile get
1198   //
1199   //   MemBarCPUOrder
1200   //        ||       \\
1201   //   MemBarAcquire LoadX[mo_acquire]
1202   //        ||
1203   //   MemBarCPUOrder
1204   //
1205   // In this case the acquire membar does not directly depend on the
1206   // load. However, we can be sure that the load is generated from an
1207   // inlined unsafe volatile get if we see it dependent on this unique
1208   // sequence of membar nodes. Similarly, given an acquire membar we
1209   // can know that it was added because of an inlined unsafe volatile
1210   // get if it is fed and feeds a cpuorder membar and if its feed
1211   // membar also feeds an acquiring load.
1212   //
1213   // Finally an inlined (Unsafe) CAS operation is translated to the
1214   // following ideal graph
1215   //
1216   //   MemBarRelease
1217   //   MemBarCPUOrder
1218   //   CompareAndSwapX {CardMark}-optional
1219   //   MemBarCPUOrder
1220   //   MemBarAcquire
1221   //
1222   // So, where we can identify these volatile read and write
1223   // signatures we can choose to plant either of the above two code
1224   // sequences. For a volatile read we can simply plant a normal
1225   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1226   // also choose to inhibit translation of the MemBarAcquire and
1227   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1228   //
1229   // When we recognise a volatile store signature we can choose to
1230   // plant at a dmb ish as a translation for the MemBarRelease, a
1231   // normal str<x> and then a dmb ish for the MemBarVolatile.
1232   // Alternatively, we can inhibit translation of the MemBarRelease
1233   // and MemBarVolatile and instead plant a simple stlr<x>
1234   // instruction.
1235   //
1236   // when we recognise a CAS signature we can choose to plant a dmb
1237   // ish as a translation for the MemBarRelease, the conventional
1238   // macro-instruction sequence for the CompareAndSwap node (which
1239   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1240   // Alternatively, we can elide generation of the dmb instructions
1241   // and plant the alternative CompareAndSwap macro-instruction
1242   // sequence (which uses ldaxr<x>).
1243   // 
1244   // Of course, the above only applies when we see these signature
1245   // configurations. We still want to plant dmb instructions in any
1246   // other cases where we may see a MemBarAcquire, MemBarRelease or
1247   // MemBarVolatile. For example, at the end of a constructor which
1248   // writes final/volatile fields we will see a MemBarRelease
1249   // instruction and this needs a 'dmb ish' lest we risk the
1250   // constructed object being visible without making the
1251   // final/volatile field writes visible.
1252   //
1253   // n.b. the translation rules below which rely on detection of the
1254   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1255   // If we see anything other than the signature configurations we
1256   // always just translate the loads and stores to ldr<x> and str<x>
1257   // and translate acquire, release and volatile membars to the
1258   // relevant dmb instructions.
1259   //
1260 
1261   // graph traversal helpers used for volatile put/get and CAS
1262   // optimization
1263 
1264   // 1) general purpose helpers
1265 
1266   // if node n is linked to a parent MemBarNode by an intervening
1267   // Control and Memory ProjNode return the MemBarNode otherwise return
1268   // NULL.
1269   //
1270   // n may only be a Load or a MemBar.
1271 
1272   MemBarNode *parent_membar(const Node *n)
1273   {
1274     Node *ctl = NULL;
1275     Node *mem = NULL;
1276     Node *membar = NULL;
1277 
1278     if (n->is_Load()) {
1279       ctl = n->lookup(LoadNode::Control);
1280       mem = n->lookup(LoadNode::Memory);
1281     } else if (n->is_MemBar()) {
1282       ctl = n->lookup(TypeFunc::Control);
1283       mem = n->lookup(TypeFunc::Memory);
1284     } else {
1285         return NULL;
1286     }
1287 
1288     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1289       return NULL;
1290     }
1291 
1292     membar = ctl->lookup(0);
1293 
1294     if (!membar || !membar->is_MemBar()) {
1295       return NULL;
1296     }
1297 
1298     if (mem->lookup(0) != membar) {
1299       return NULL;
1300     }
1301 
1302     return membar->as_MemBar();
1303   }
1304 
1305   // if n is linked to a child MemBarNode by intervening Control and
1306   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1307 
1308   MemBarNode *child_membar(const MemBarNode *n)
1309   {
1310     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1311     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1312 
1313     // MemBar needs to have both a Ctl and Mem projection
1314     if (! ctl || ! mem)
1315       return NULL;
1316 
1317     MemBarNode *child = NULL;
1318     Node *x;
1319 
1320     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1321       x = ctl->fast_out(i);
1322       // if we see a membar we keep hold of it. we may also see a new
1323       // arena copy of the original but it will appear later
1324       if (x->is_MemBar()) {
1325           child = x->as_MemBar();
1326           break;
1327       }
1328     }
1329 
1330     if (child == NULL) {
1331       return NULL;
1332     }
1333 
1334     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1335       x = mem->fast_out(i);
1336       // if we see a membar we keep hold of it. we may also see a new
1337       // arena copy of the original but it will appear later
1338       if (x == child) {
1339         return child;
1340       }
1341     }
1342     return NULL;
1343   }
1344 
1345   // helper predicate use to filter candidates for a leading memory
1346   // barrier
1347   //
1348   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1349   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1350 
1351   bool leading_membar(const MemBarNode *barrier)
1352   {
1353     int opcode = barrier->Opcode();
1354     // if this is a release membar we are ok
1355     if (opcode == Op_MemBarRelease) {
1356       return true;
1357     }
1358     // if its a cpuorder membar . . .
1359     if (opcode != Op_MemBarCPUOrder) {
1360       return false;
1361     }
1362     // then the parent has to be a release membar
1363     MemBarNode *parent = parent_membar(barrier);
1364     if (!parent) {
1365       return false;
1366     }
1367     opcode = parent->Opcode();
1368     return opcode == Op_MemBarRelease;
1369   }
1370  
1371   // 2) card mark detection helper
1372 
1373   // helper predicate which can be used to detect a volatile membar
1374   // introduced as part of a conditional card mark sequence either by
1375   // G1 or by CMS when UseCondCardMark is true.
1376   //
1377   // membar can be definitively determined to be part of a card mark
1378   // sequence if and only if all the following hold
1379   //
1380   // i) it is a MemBarVolatile
1381   //
1382   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1383   // true
1384   //
1385   // iii) the node's Mem projection feeds a StoreCM node.
1386   
1387   bool is_card_mark_membar(const MemBarNode *barrier)
1388   {
1389     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1390       return false;
1391     }
1392 
1393     if (barrier->Opcode() != Op_MemBarVolatile) {
1394       return false;
1395     }
1396 
1397     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1398 
1399     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1400       Node *y = mem->fast_out(i);
1401       if (y->Opcode() == Op_StoreCM) {
1402         return true;
1403       }
1404     }
1405   
1406     return false;
1407   }
1408 
1409 
1410   // 3) helper predicates to traverse volatile put or CAS graphs which
1411   // may contain GC barrier subgraphs
1412 
1413   // Preamble
1414   // --------
1415   //
1416   // for volatile writes we can omit generating barriers and employ a
1417   // releasing store when we see a node sequence sequence with a
1418   // leading MemBarRelease and a trailing MemBarVolatile as follows
1419   //
1420   //   MemBarRelease
1421   //  {      ||      } -- optional
1422   //  {MemBarCPUOrder}
1423   //         ||     \\
1424   //         ||     StoreX[mo_release]
1425   //         | \     /
1426   //         | MergeMem
1427   //         | /
1428   //   MemBarVolatile
1429   //
1430   // where
1431   //  || and \\ represent Ctl and Mem feeds via Proj nodes


1461   // ordering is required for both non-volatile and volatile
1462   // puts. Normally that means we need to translate a StoreCM using
1463   // the sequence
1464   //
1465   //   dmb ishst
1466   //   stlrb
1467   //
1468   // However, in the case of a volatile put if we can recognise this
1469   // configuration and plant an stlr for the object write then we can
1470   // omit the dmb and just plant an strb since visibility of the stlr
1471   // is ordered before visibility of subsequent stores. StoreCM nodes
1472   // also arise when using G1 or using CMS with conditional card
1473   // marking. In these cases (as we shall see) we don't need to insert
1474   // the dmb when translating StoreCM because there is already an
1475   // intervening StoreLoad barrier between it and the StoreP/N.
1476   //
1477   // It is also possible to perform the card mark conditionally on it
1478   // currently being unmarked in which case the volatile put graph
1479   // will look slightly different
1480   //
1481   //   MemBarRelease____________________________________________

1482   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1483   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1484   //         | \     /                              \            |
1485   //         | MergeMem                            . . .      StoreB
1486   //         | /                                                /
1487   //         ||     /
1488   //   MemBarVolatile
1489   //
1490   // It is worth noting at this stage that both the above
1491   // configurations can be uniquely identified by checking that the
1492   // memory flow includes the following subgraph:
1493   //
1494   //   MemBarRelease
1495   //  {MemBarCPUOrder}
1496   //          |  \      . . .
1497   //          |  StoreX[mo_release]  . . .
1498   //          |   /
1499   //         MergeMem
1500   //          |
1501   //   MemBarVolatile
1502   //
1503   // This is referred to as a *normal* subgraph. It can easily be
1504   // detected starting from any candidate MemBarRelease,
1505   // StoreX[mo_release] or MemBarVolatile.
1506   //
1507   // A simple variation on this normal case occurs for an unsafe CAS
1508   // operation. The basic graph for a non-object CAS is
1509   //
1510   //   MemBarRelease
1511   //         ||
1512   //   MemBarCPUOrder
1513   //         ||     \\   . . .
1514   //         ||     CompareAndSwapX
1515   //         ||       |
1516   //         ||     SCMemProj
1517   //         | \     /
1518   //         | MergeMem
1519   //         | /
1520   //   MemBarCPUOrder
1521   //         ||
1522   //   MemBarAcquire
1523   //
1524   // The same basic variations on this arrangement (mutatis mutandis)
1525   // occur when a card mark is introduced. i.e. we se the same basic
1526   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1527   // tail of the graph is a pair comprising a MemBarCPUOrder +
1528   // MemBarAcquire.
1529   //
1530   // So, in the case of a CAS the normal graph has the variant form
1531   //
1532   //   MemBarRelease
1533   //   MemBarCPUOrder
1534   //          |   \      . . .
1535   //          |  CompareAndSwapX  . . .
1536   //          |    |
1537   //          |   SCMemProj
1538   //          |   /  . . .
1539   //         MergeMem
1540   //          |
1541   //   MemBarCPUOrder
1542   //   MemBarAcquire
1543   //
1544   // This graph can also easily be detected starting from any
1545   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1546   //
1547   // the code below uses two helper predicates, leading_to_normal and
1548   // normal_to_leading to identify these normal graphs, one validating
1549   // the layout starting from the top membar and searching down and
1550   // the other validating the layout starting from the lower membar
1551   // and searching up.
1552   //
1553   // There are two special case GC configurations when a normal graph
1554   // may not be generated: when using G1 (which always employs a
1555   // conditional card mark); and when using CMS with conditional card
1556   // marking configured. These GCs are both concurrent rather than
1557   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1558   // graph between the leading and trailing membar nodes, in
1559   // particular enforcing stronger memory serialisation beween the
1560   // object put and the corresponding conditional card mark. CMS
1561   // employs a post-write GC barrier while G1 employs both a pre- and
1562   // post-write GC barrier. Of course the extra nodes may be absent --
1563   // they are only inserted for object puts. This significantly
1564   // complicates the task of identifying whether a MemBarRelease,
1565   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1566   // when using these GC configurations (see below). It adds similar
1567   // complexity to the task of identifying whether a MemBarRelease,
1568   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1569   //
1570   // In both cases the post-write subtree includes an auxiliary
1571   // MemBarVolatile (StoreLoad barrier) separating the object put and
1572   // the read of the corresponding card. This poses two additional
1573   // problems.
1574   //
1575   // Firstly, a card mark MemBarVolatile needs to be distinguished
1576   // from a normal trailing MemBarVolatile. Resolving this first
1577   // problem is straightforward: a card mark MemBarVolatile always
1578   // projects a Mem feed to a StoreCM node and that is a unique marker
1579   //
1580   //      MemBarVolatile (card mark)
1581   //       C |    \     . . .
1582   //         |   StoreCM   . . .
1583   //       . . .
1584   //
1585   // The second problem is how the code generator is to translate the
1586   // card mark barrier? It always needs to be translated to a "dmb
1587   // ish" instruction whether or not it occurs as part of a volatile
1588   // put. A StoreLoad barrier is needed after the object put to ensure
1589   // i) visibility to GC threads of the object put and ii) visibility
1590   // to the mutator thread of any card clearing write by a GC
1591   // thread. Clearly a normal store (str) will not guarantee this
1592   // ordering but neither will a releasing store (stlr). The latter
1593   // guarantees that the object put is visible but does not guarantee
1594   // that writes by other threads have also been observed.
1595   // 
1596   // So, returning to the task of translating the object put and the
1597   // leading/trailing membar nodes: what do the non-normal node graph
1598   // look like for these 2 special cases? and how can we determine the
1599   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1600   // in both normal and non-normal cases?
1601   //
1602   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1603   // which selects conditonal execution based on the value loaded
1604   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1605   // intervening StoreLoad barrier (MemBarVolatile).
1606   //
1607   // So, with CMS we may see a node graph for a volatile object store
1608   // which looks like this
1609   //
1610   //   MemBarRelease
1611   //   MemBarCPUOrder_(leading)__________________
1612   //     C |    M \       \\                   C \
1613   //       |       \    StoreN/P[mo_release]  CastP2X
1614   //       |    Bot \    /
1615   //       |       MergeMem
1616   //       |         /
1617   //      MemBarVolatile (card mark)
1618   //     C |  ||    M |
1619   //       | LoadB    |
1620   //       |   |      |
1621   //       | Cmp      |\
1622   //       | /        | \
1623   //       If         |  \
1624   //       | \        |   \
1625   // IfFalse  IfTrue  |    \
1626   //       \     / \  |     \
1627   //        \   / StoreCM    |
1628   //         \ /      |      |
1629   //        Region   . . .   |
1630   //          | \           /
1631   //          |  . . .  \  / Bot
1632   //          |       MergeMem
1633   //          |          |
1634   //        MemBarVolatile (trailing)
1635   //
1636   // The first MergeMem merges the AliasIdxBot Mem slice from the
1637   // leading membar and the oopptr Mem slice from the Store into the
1638   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1639   // Mem slice from the card mark membar and the AliasIdxRaw slice
1640   // from the StoreCM into the trailing membar (n.b. the latter
1641   // proceeds via a Phi associated with the If region).
1642   //
1643   // The graph for a CAS varies slightly, the obvious difference being
1644   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1645   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1646   // MemBarAcquire pair. The other important difference is that the
1647   // CompareAndSwap node's SCMemProj is not merged into the card mark
1648   // membar - it still feeds the trailing MergeMem. This also means
1649   // that the card mark membar receives its Mem feed directly from the
1650   // leading membar rather than via a MergeMem.
1651   //
1652   //   MemBarRelease
1653   //   MemBarCPUOrder__(leading)_________________________
1654   //       ||                       \\                 C \
1655   //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
1656   //     C |  ||    M |              |
1657   //       | LoadB    |       ______/|
1658   //       |   |      |      /       |
1659   //       | Cmp      |     /      SCMemProj
1660   //       | /        |    /         |
1661   //       If         |   /         /
1662   //       | \        |  /         /
1663   // IfFalse  IfTrue  | /         /
1664   //       \     / \  |/ prec    /
1665   //        \   / StoreCM       /
1666   //         \ /      |        /
1667   //        Region   . . .    /
1668   //          | \            /
1669   //          |  . . .  \   / Bot
1670   //          |       MergeMem
1671   //          |          |
1672   //        MemBarCPUOrder
1673   //        MemBarAcquire (trailing)
1674   //
1675   // This has a slightly different memory subgraph to the one seen
1676   // previously but the core of it is the same as for the CAS normal
1677   // sungraph
1678   //
1679   //   MemBarRelease
1680   //   MemBarCPUOrder____
1681   //      ||             \      . . .
1682   //   MemBarVolatile  CompareAndSwapX  . . .
1683   //      |  \            |
1684   //        . . .   SCMemProj
1685   //          |     /  . . .
1686   //         MergeMem
1687   //          |
1688   //   MemBarCPUOrder
1689   //   MemBarAcquire
1690   //
1691   //
1692   // G1 is quite a lot more complicated. The nodes inserted on behalf
1693   // of G1 may comprise: a pre-write graph which adds the old value to
1694   // the SATB queue; the releasing store itself; and, finally, a
1695   // post-write graph which performs a card mark.
1696   //
1697   // The pre-write graph may be omitted, but only when the put is
1698   // writing to a newly allocated (young gen) object and then only if
1699   // there is a direct memory chain to the Initialize node for the
1700   // object allocation. This will not happen for a volatile put since
1701   // any memory chain passes through the leading membar.
1702   //
1703   // The pre-write graph includes a series of 3 If tests. The outermost
1704   // If tests whether SATB is enabled (no else case). The next If tests
1705   // whether the old value is non-NULL (no else case). The third tests
1706   // whether the SATB queue index is > 0, if so updating the queue. The
1707   // else case for this third If calls out to the runtime to allocate a
1708   // new queue buffer.
1709   //
1710   // So with G1 the pre-write and releasing store subgraph looks like
1711   // this (the nested Ifs are omitted).


1723   //       |                 \              |
1724   //       |    . . .         \             |
1725   //       | /       | /       |            |
1726   //      Region  Phi[M]       |            |
1727   //       | \       |         |            |
1728   //       |  \_____ | ___     |            |
1729   //     C | C \     |   C \ M |            |
1730   //       | CastP2X | StoreN/P[mo_release] |
1731   //       |         |         |            |
1732   //     C |       M |       M |          M |
1733   //        \        |         |           /
1734   //                  . . . 
1735   //          (post write subtree elided)
1736   //                    . . .
1737   //             C \         M /
1738   //         MemBarVolatile (trailing)
1739   //
1740   // n.b. the LoadB in this subgraph is not the card read -- it's a
1741   // read of the SATB queue active flag.
1742   //
1743   // Once again the CAS graph is a minor variant on the above with the
1744   // expected substitutions of CompareAndSawpX for StoreN/P and
1745   // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1746   //
1747   // The G1 post-write subtree is also optional, this time when the
1748   // new value being written is either null or can be identified as a
1749   // newly allocated (young gen) object with no intervening control
1750   // flow. The latter cannot happen but the former may, in which case
1751   // the card mark membar is omitted and the memory feeds form the
1752   // leading membar and the SToreN/P are merged direct into the
1753   // trailing membar as per the normal subgraph. So, the only special
1754   // case which arises is when the post-write subgraph is generated.
1755   //
1756   // The kernel of the post-write G1 subgraph is the card mark itself
1757   // which includes a card mark memory barrier (MemBarVolatile), a
1758   // card test (LoadB), and a conditional update (If feeding a
1759   // StoreCM). These nodes are surrounded by a series of nested Ifs
1760   // which try to avoid doing the card mark. The top level If skips if
1761   // the object reference does not cross regions (i.e. it tests if
1762   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1763   // need not be recorded. The next If, which skips on a NULL value,
1764   // may be absent (it is not generated if the type of value is >=
1765   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1766   // checking if card_val != young).  n.b. although this test requires
1767   // a pre-read of the card it can safely be done before the StoreLoad
1768   // barrier. However that does not bypass the need to reread the card
1769   // after the barrier.
1770   //
1771   //                (pre-write subtree elided)
1772   //        . . .                  . . .    . . .  . . .


1820   //    \            MergeMem 
1821   //     \            /
1822   //     MemBarVolatile
1823   //
1824   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1825   // from the leading membar and the oopptr Mem slice from the Store
1826   // into the card mark membar i.e. the memory flow to the card mark
1827   // membar still looks like a normal graph.
1828   //
1829   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1830   // Mem slices (from the StoreCM and other card mark queue stores).
1831   // However in this case the AliasIdxBot Mem slice does not come
1832   // direct from the card mark membar. It is merged through a series
1833   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1834   // from the leading membar with the Mem feed from the card mark
1835   // membar. Each Phi corresponds to one of the Ifs which may skip
1836   // around the card mark membar. So when the If implementing the NULL
1837   // value check has been elided the total number of Phis is 2
1838   // otherwise it is 3.
1839   //
1840   // The CAS graph when using G1GC also includes a pre-write subgraph
1841   // and an optional post-write subgraph. Teh sam evarioations are
1842   // introduced as for CMS with conditional card marking i.e. the
1843   // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1844   // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1845   // Mem feed from the CompareAndSwapP/N includes a precedence
1846   // dependency feed to the StoreCM and a feed via an SCMemProj to the
1847   // trailing membar. So, as before the configuration includes the
1848   // normal CAS graph as a subgraph of the memory flow.
1849   //
1850   // So, the upshot is that in all cases the volatile put graph will
1851   // include a *normal* memory subgraph betwen the leading membar and
1852   // its child membar, either a volatile put graph (including a
1853   // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1854   // When that child is not a card mark membar then it marks the end
1855   // of the volatile put or CAS subgraph. If the child is a card mark
1856   // membar then the normal subgraph will form part of a volatile put
1857   // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1858   // to a trailing barrier via a MergeMem. That feed is either direct
1859   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1860   // memory flow (for G1).
1861   // 
1862   // The predicates controlling generation of instructions for store
1863   // and barrier nodes employ a few simple helper functions (described
1864   // below) which identify the presence or absence of all these
1865   // subgraph configurations and provide a means of traversing from
1866   // one node in the subgraph to another.
1867 
1868   // is_CAS(int opcode)
1869   //
1870   // return true if opcode is one of the possible CompareAndSwapX
1871   // values otherwise false.
1872 
1873   bool is_CAS(int opcode)
1874   {
1875     return (opcode == Op_CompareAndSwapI ||
1876             opcode == Op_CompareAndSwapL ||
1877             opcode == Op_CompareAndSwapN ||
1878             opcode == Op_CompareAndSwapP);
1879   }
1880 
1881   // leading_to_normal
1882   //
1883   //graph traversal helper which detects the normal case Mem feed from
1884   // a release membar (or, optionally, its cpuorder child) to a
1885   // dependent volatile membar i.e. it ensures that one or other of
1886   // the following Mem flow subgraph is present.
1887   //
1888   //   MemBarRelease
1889   //   MemBarCPUOrder {leading}
1890   //          |  \      . . .
1891   //          |  StoreN/P[mo_release]  . . .
1892   //          |   /
1893   //         MergeMem
1894   //          |
1895   //   MemBarVolatile {trailing or card mark}
1896   //
1897   //   MemBarRelease
1898   //   MemBarCPUOrder {leading}
1899   //      |       \      . . .
1900   //      |     CompareAndSwapX  . . .
1901   //               |
1902   //     . . .    SCMemProj
1903   //           \   |
1904   //      |    MergeMem
1905   //      |       /
1906   //    MemBarCPUOrder
1907   //    MemBarAcquire {trailing}
1908   //
1909   // if the correct configuration is present returns the trailing
1910   // membar otherwise NULL.
1911   //
1912   // the input membar is expected to be either a cpuorder membar or a
1913   // release membar. in the latter case it should not have a cpu membar
1914   // child.
1915   //
1916   // the returned value may be a card mark or trailing membar
1917   //
1918 
1919   MemBarNode *leading_to_normal(MemBarNode *leading)
1920   {
1921     assert((leading->Opcode() == Op_MemBarRelease ||
1922             leading->Opcode() == Op_MemBarCPUOrder),
1923            "expecting a volatile or cpuroder membar!");
1924 
1925     // check the mem flow
1926     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1927 
1928     if (!mem) {
1929       return NULL;
1930     }
1931 
1932     Node *x = NULL;
1933     StoreNode * st = NULL;
1934     LoadStoreNode *cas = NULL;
1935     MergeMemNode *mm = NULL;
1936 
1937     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1938       x = mem->fast_out(i);
1939       if (x->is_MergeMem()) {
1940         if (mm != NULL) {
1941           return NULL;
1942         }
1943         // two merge mems is one too many
1944         mm = x->as_MergeMem();
1945       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1946         // two releasing stores/CAS nodes is one too many
1947         if (st != NULL || cas != NULL) {
1948           return NULL;
1949         }
1950         st = x->as_Store();
1951       } else if (is_CAS(x->Opcode())) {
1952         if (st != NULL || cas != NULL) {
1953           return NULL;
1954         }
1955         cas = x->as_LoadStore();
1956       }
1957     }
1958 
1959     // must have a store or a cas
1960     if (!st && !cas) {
1961       return NULL;
1962     }
1963 
1964     // must have a merge if we also have st
1965     if (st && !mm) {
1966       return NULL;
1967     }
1968 
1969     Node *y = NULL;
1970     if (cas) {
1971       // look for an SCMemProj
1972       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
1973         x = cas->fast_out(i);
1974         if (x->is_Proj()) {
1975           y = x;
1976           break;
1977         }
1978       }
1979       if (y == NULL) {
1980         return NULL;
1981       }
1982       // the proj must feed a MergeMem
1983       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
1984         x = y->fast_out(i);
1985         if (x->is_MergeMem()) {
1986           mm = x->as_MergeMem();
1987           break;
1988         }
1989       }
1990       if (mm == NULL)
1991         return NULL;
1992     } else {
1993       // ensure the store feeds the existing mergemem;
1994       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1995         if (st->fast_out(i) == mm) {
1996           y = st;
1997           break;
1998         }
1999       }
2000       if (y == NULL) {

2001         return NULL;
2002       }
2003     }
2004 
2005     MemBarNode *mbar = NULL;
2006     // ensure the merge feeds to the expected type of membar
2007     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2008       x = mm->fast_out(i);
2009       if (x->is_MemBar()) {
2010         int opcode = x->Opcode();
2011         if (opcode == Op_MemBarVolatile && st) {
2012           mbar = x->as_MemBar();
2013         } else if (cas && opcode == Op_MemBarCPUOrder) {
2014           MemBarNode *y =  x->as_MemBar();
2015           y = child_membar(y);
2016           if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2017             mbar = y;
2018           }
2019         }
2020         break;
2021       }
2022     }
2023 
2024     return mbar;
2025   }
2026 
2027   // normal_to_leading
2028   //
2029   // graph traversal helper which detects the normal case Mem feed
2030   // from either a card mark or a trailing membar to a preceding
2031   // release membar (optionally its cpuorder child) i.e. it ensures
2032   // that one or other of the following Mem flow subgraphs is present.
2033   //
2034   //   MemBarRelease
2035   //   MemBarCPUOrder {leading}
2036   //          |  \      . . .
2037   //          |  StoreN/P[mo_release]  . . .
2038   //          |   /
2039   //         MergeMem
2040   //          |
2041   //   MemBarVolatile {card mark or trailing}
2042   //
2043   //   MemBarRelease
2044   //   MemBarCPUOrder {leading}
2045   //      |       \      . . .
2046   //      |     CompareAndSwapX  . . .
2047   //               |
2048   //     . . .    SCMemProj
2049   //           \   |
2050   //      |    MergeMem
2051   //      |        /
2052   //    MemBarCPUOrder
2053   //    MemBarAcquire {trailing}
2054   //
2055   // this predicate checks for the same flow as the previous predicate
2056   // but starting from the bottom rather than the top.
2057   //
2058   // if the configuration is present returns the cpuorder member for
2059   // preference or when absent the release membar otherwise NULL.
2060   //
2061   // n.b. the input membar is expected to be a MemBarVolatile but
2062   // need not be a card mark membar.
2063 
2064   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2065   {
2066     // input must be a volatile membar
2067     assert((barrier->Opcode() == Op_MemBarVolatile ||
2068             barrier->Opcode() == Op_MemBarAcquire),
2069            "expecting a volatile or an acquire membar");
2070     Node *x;
2071     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2072 
2073     // if we have an acquire membar then it must be fed via a CPUOrder
2074     // membar
2075 
2076     if (is_cas) {
2077       // skip to parent barrier which must be a cpuorder
2078       x = parent_membar(barrier);
2079       if (x->Opcode() != Op_MemBarCPUOrder)
2080         return NULL;
2081     } else {
2082       // start from the supplied barrier
2083       x = (Node *)barrier;
2084     }
2085 
2086     // the Mem feed to the membar should be a merge
2087     x = x ->in(TypeFunc::Memory);
2088     if (!x->is_MergeMem())
2089       return NULL;
2090 
2091     MergeMemNode *mm = x->as_MergeMem();
2092 
2093     if (is_cas) {
2094       // the merge should be fed from the CAS via an SCMemProj node
2095       x = NULL;
2096       for (uint idx = 1; idx < mm->req(); idx++) {
2097         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2098           x = mm->in(idx);
2099           break;
2100         }
2101       }
2102       if (x == NULL) {
2103         return NULL;
2104       }
2105       // check for a CAS feeding this proj
2106       x = x->in(0);
2107       int opcode = x->Opcode();
2108       if (!is_CAS(opcode)) {
2109         return NULL;
2110       }
2111       // the CAS should get its mem feed from the leading membar
2112       x = x->in(MemNode::Memory);
2113     } else {
2114       // the merge should get its Bottom mem feed from the leading membar
2115       x = mm->in(Compile::AliasIdxBot);      
2116     } 
2117 
2118     // ensure this is a non control projection
2119     if (!x->is_Proj() || x->is_CFG()) {
2120       return NULL;
2121     }
2122     // if it is fed by a membar that's the one we want
2123     x = x->in(0);
2124 
2125     if (!x->is_MemBar()) {
2126       return NULL;
2127     }
2128 
2129     MemBarNode *leading = x->as_MemBar();
2130     // reject invalid candidates
2131     if (!leading_membar(leading)) {
2132       return NULL;
2133     }
2134 
2135     // ok, we have a leading membar, now for the sanity clauses
2136 
2137     // the leading membar must feed Mem to a releasing store or CAS
2138     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2139     StoreNode *st = NULL;
2140     LoadStoreNode *cas = NULL;
2141     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2142       x = mem->fast_out(i);
2143       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2144         // two stores or CASes is one too many
2145         if (st != NULL || cas != NULL) {
2146           return NULL;
2147         }
2148         st = x->as_Store();
2149       } else if (is_CAS(x->Opcode())) {
2150         if (st != NULL || cas != NULL) {
2151           return NULL;
2152         }
2153         cas = x->as_LoadStore();
2154       }
2155     }
2156 
2157     // we should not have both a store and a cas
2158     if (st == NULL & cas == NULL) {
2159       return NULL;
2160     }
2161 
2162     if (st == NULL) {
2163       // nothing more to check
2164       return leading;
2165     } else {
2166       // we should not have a store if we started from an acquire
2167       if (is_cas) {
2168         return NULL;
2169       }
2170 
2171       // the store should feed the merge we used to get here
2172       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2173         if (st->fast_out(i) == mm) {
2174           return leading;
2175         }
2176       }
2177     }
2178 
2179     return NULL;
2180   }
2181 
2182   // card_mark_to_trailing
2183   //
2184   // graph traversal helper which detects extra, non-normal Mem feed
2185   // from a card mark volatile membar to a trailing membar i.e. it
2186   // ensures that one of the following three GC post-write Mem flow
2187   // subgraphs is present.
2188   //
2189   // 1)
2190   //     . . .
2191   //       |
2192   //   MemBarVolatile (card mark)
2193   //      |          |     
2194   //      |        StoreCM
2195   //      |          |
2196   //      |        . . .
2197   //  Bot |  / 
2198   //   MergeMem 
2199   //      |
2200   //      |
2201   //    MemBarVolatile {trailing}
2202   //
2203   // 2)
2204   //   MemBarRelease/CPUOrder (leading)
2205   //    |
2206   //    | 
2207   //    |\       . . .
2208   //    | \        | 
2209   //    |  \  MemBarVolatile (card mark) 
2210   //    |   \   |     |
2211   //     \   \  |   StoreCM    . . .
2212   //      \   \ |
2213   //       \  Phi
2214   //        \ /
2215   //        Phi  . . .
2216   //     Bot |   /
2217   //       MergeMem
2218   //         |
2219   //    MemBarVolatile {trailing}
2220   //
2221   //
2222   // 3)
2223   //   MemBarRelease/CPUOrder (leading)
2224   //    |
2225   //    |\
2226   //    | \
2227   //    |  \      . . .
2228   //    |   \       |
2229   //    |\   \  MemBarVolatile (card mark)
2230   //    | \   \   |     |
2231   //    |  \   \  |   StoreCM    . . .
2232   //    |   \   \ |
2233   //     \   \  Phi
2234   //      \   \ /  
2235   //       \  Phi
2236   //        \ /
2237   //        Phi  . . .
2238   //     Bot |   /
2239   //       MergeMem
2240   //         |
2241   //         |
2242   //    MemBarVolatile {trailing}
2243   //
2244   // configuration 1 is only valid if UseConcMarkSweepGC &&
2245   // UseCondCardMark
2246   //
2247   // configurations 2 and 3 are only valid if UseG1GC.
2248   //
2249   // if a valid configuration is present returns the trailing membar
2250   // otherwise NULL.
2251   //
2252   // n.b. the supplied membar is expected to be a card mark
2253   // MemBarVolatile i.e. the caller must ensure the input node has the
2254   // correct operand and feeds Mem to a StoreCM node
2255 
2256   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2257   {
2258     // input must be a card mark volatile membar
2259     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2260 
2261     Node *feed = barrier->proj_out(TypeFunc::Memory);
2262     Node *x;


2272         x = feed->fast_out(i);
2273         // the correct Phi will be merging a Bot memory slice
2274         if (x->is_MergeMem()) {
2275           mm = x->as_MergeMem();
2276           break;
2277         }
2278       }
2279       if (mm) {
2280         retry_feed = false;
2281       } else if (UseG1GC & phicount++ < MAX_PHIS) {
2282         // the barrier may feed indirectly via one or two Phi nodes
2283         PhiNode *phi = NULL;
2284         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2285           x = feed->fast_out(i);
2286           // the correct Phi will be merging a Bot memory slice
2287           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2288             phi = x->as_Phi();
2289             break;
2290           }
2291         }
2292         if (!phi) {
2293           return NULL;
2294         }
2295         // look for another merge below this phi
2296         feed = phi;
2297       } else {
2298         // couldn't find a merge
2299         return NULL;
2300       }
2301     }
2302 
2303     // sanity check this feed turns up as the expected slice
2304     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2305 
2306     MemBarNode *trailing = NULL;
2307     // be sure we have a trailing membar the merge
2308     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2309       x = mm->fast_out(i);
2310       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2311         trailing = x->as_MemBar();
2312         break;
2313       }
2314     }
2315 
2316     return trailing;
2317   }
2318 
2319   // trailing_to_card_mark
2320   //
2321   // graph traversal helper which detects extra, non-normal Mem feed
2322   // from a trailing volatile membar to a preceding card mark volatile
2323   // membar i.e. it identifies whether one of the three possible extra
2324   // GC post-write Mem flow subgraphs is present
2325   //
2326   // this predicate checks for the same flow as the previous predicate
2327   // but starting from the bottom rather than the top.
2328   //
2329   // if the configuration is present returns the card mark membar
2330   // otherwise NULL
2331   //
2332   // n.b. the supplied membar is expected to be a trailing
2333   // MemBarVolatile i.e. the caller must ensure the input node has the
2334   // correct opcode
2335 
2336   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2337   {
2338     assert(trailing->Opcode() == Op_MemBarVolatile,
2339            "expecting a volatile membar");
2340     assert(!is_card_mark_membar(trailing),
2341            "not expecting a card mark membar");
2342 

2343     // the Mem feed to the membar should be a merge
2344     Node *x = trailing->in(TypeFunc::Memory);
2345     if (!x->is_MergeMem()) {
2346       return NULL;
2347     }
2348 
2349     MergeMemNode *mm = x->as_MergeMem();
2350 
2351     x = mm->in(Compile::AliasIdxBot);
2352     // with G1 we may possibly see a Phi or two before we see a Memory
2353     // Proj from the card mark membar
2354 
2355     const int MAX_PHIS = 3;     // max phis we will search through
2356     int phicount = 0;           // current search count
2357 
2358     bool retry_feed = !x->is_Proj();
2359 
2360     while (retry_feed) {
2361       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2362         PhiNode *phi = x->as_Phi();
2363         ProjNode *proj = NULL;
2364         PhiNode *nextphi = NULL;
2365         bool found_leading = false;
2366         for (uint i = 1; i < phi->req(); i++) {
2367           x = phi->in(i);


2380         }
2381         // if we found a correct looking proj then retry from there
2382         // otherwise we must see a leading and a phi or this the
2383         // wrong config
2384         if (proj != NULL) {
2385           x = proj;
2386           retry_feed = false;
2387         } else if (found_leading && nextphi != NULL) {
2388           // retry from this phi to check phi2
2389           x = nextphi;
2390         } else {
2391           // not what we were looking for
2392           return NULL;
2393         }
2394       } else {
2395         return NULL;
2396       }
2397     }
2398     // the proj has to come from the card mark membar
2399     x = x->in(0);
2400     if (!x->is_MemBar()) {
2401       return NULL;
2402     }
2403 
2404     MemBarNode *card_mark_membar = x->as_MemBar();
2405 
2406     if (!is_card_mark_membar(card_mark_membar)) {
2407       return NULL;
2408     }
2409 
2410     return card_mark_membar;
2411   }
2412 
2413   // trailing_to_leading
2414   //
2415   // graph traversal helper which checks the Mem flow up the graph
2416   // from a (non-card mark) trailing membar attempting to locate and
2417   // return an associated leading membar. it first looks for a
2418   // subgraph in the normal configuration (relying on helper
2419   // normal_to_leading). failing that it then looks for one of the
2420   // possible post-write card mark subgraphs linking the trailing node
2421   // to a the card mark membar (relying on helper
2422   // trailing_to_card_mark), and then checks that the card mark membar
2423   // is fed by a leading membar (once again relying on auxiliary
2424   // predicate normal_to_leading).
2425   //
2426   // if the configuration is valid returns the cpuorder member for
2427   // preference or when absent the release membar otherwise NULL.
2428   //
2429   // n.b. the input membar is expected to be either a volatile or
2430   // acquire membar but in the former case must *not* be a card mark
2431   // membar.
2432 
2433   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2434   {
2435     assert((trailing->Opcode() == Op_MemBarAcquire ||
2436             trailing->Opcode() == Op_MemBarVolatile),
2437            "expecting an acquire or volatile membar");
2438     assert((trailing->Opcode() != Op_MemBarVolatile ||
2439             !is_card_mark_membar(trailing)),
2440            "not expecting a card mark membar");
2441 
2442     MemBarNode *leading = normal_to_leading(trailing);
2443 
2444     if (leading) {
2445       return leading;
2446     }
2447 
2448     // nothing more to do if this is an acquire
2449     if (trailing->Opcode() == Op_MemBarAcquire) {
2450       return NULL;
2451     }
2452 
2453     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2454 
2455     if (!card_mark_membar) {
2456       return NULL;
2457     }
2458 
2459     return normal_to_leading(card_mark_membar);
2460   }
2461 
2462   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2463 
2464 bool unnecessary_acquire(const Node *barrier)
2465 {
2466   assert(barrier->is_MemBar(), "expecting a membar");
2467 
2468   if (UseBarriersForVolatile) {
2469     // we need to plant a dmb
2470     return false;
2471   }
2472 
2473   // a volatile read derived from bytecode (or also from an inlined
2474   // SHA field read via LibraryCallKit::load_field_from_object)
2475   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2476   // with a bogus read dependency on it's preceding load. so in those
2477   // cases we will find the load node at the PARMS offset of the
2478   // acquire membar.  n.b. there may be an intervening DecodeN node.
2479   //
2480   // a volatile load derived from an inlined unsafe field access
2481   // manifests as a cpuorder membar with Ctl and Mem projections
2482   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2483   // acquire then feeds another cpuorder membar via Ctl and Mem
2484   // projections. The load has no output dependency on these trailing
2485   // membars because subsequent nodes inserted into the graph take
2486   // their control feed from the final membar cpuorder meaning they
2487   // are all ordered after the load.
2488 
2489   Node *x = barrier->lookup(TypeFunc::Parms);
2490   if (x) {
2491     // we are starting from an acquire and it has a fake dependency
2492     //
2493     // need to check for
2494     //
2495     //   LoadX[mo_acquire]
2496     //   {  |1   }
2497     //   {DecodeN}
2498     //      |Parms
2499     //   MemBarAcquire*
2500     //
2501     // where * tags node we were passed
2502     // and |k means input k
2503     if (x->is_DecodeNarrowPtr()) {
2504       x = x->in(1);
2505     }
2506 
2507     return (x->is_Load() && x->as_Load()->is_acquire());
2508   }
2509   
2510   // now check for an unsafe volatile get
2511 
2512   // need to check for
2513   //
2514   //   MemBarCPUOrder
2515   //        ||       \\
2516   //   MemBarAcquire* LoadX[mo_acquire]
2517   //        ||
2518   //   MemBarCPUOrder
2519   //
2520   // where * tags node we were passed
2521   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2522 
2523   // check for a parent MemBarCPUOrder
2524   ProjNode *ctl;
2525   ProjNode *mem;
2526   MemBarNode *parent = parent_membar(barrier);
2527   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2528     return false;
2529   ctl = parent->proj_out(TypeFunc::Control);
2530   mem = parent->proj_out(TypeFunc::Memory);
2531   if (!ctl || !mem) {
2532     return false;
2533   }
2534   // ensure the proj nodes both feed a LoadX[mo_acquire]
2535   LoadNode *ld = NULL;
2536   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2537     x = ctl->fast_out(i);
2538     // if we see a load we keep hold of it and stop searching
2539     if (x->is_Load()) {
2540       ld = x->as_Load();
2541       break;
2542     }
2543   }
2544   // it must be an acquiring load
2545   if (ld && ld->is_acquire()) {
2546 
2547     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2548       x = mem->fast_out(i);
2549       // if we see the same load we drop it and stop searching
2550       if (x == ld) {
2551         ld = NULL;
2552         break;
2553       }
2554     }
2555     // we must have dropped the load
2556     if (ld == NULL) {

2557       // check for a child cpuorder membar
2558       MemBarNode *child  = child_membar(barrier->as_MemBar());
2559       if (child && child->Opcode() != Op_MemBarCPUOrder)


2560         return true;
2561     }
2562   }
2563 
2564   // final option for unnecessary mebar is that it is a trailing node
2565   // belonging to a CAS
2566 
2567   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2568 
2569   return leading != NULL;
2570 }
2571 
2572 bool needs_acquiring_load(const Node *n)
2573 {
2574   assert(n->is_Load(), "expecting a load");
2575   if (UseBarriersForVolatile) {
2576     // we use a normal load and a dmb
2577     return false;
2578   }
2579 
2580   LoadNode *ld = n->as_Load();
2581 
2582   if (!ld->is_acquire()) {
2583     return false;
2584   }
2585 
2586   // check if this load is feeding an acquire membar
2587   //
2588   //   LoadX[mo_acquire]
2589   //   {  |1   }
2590   //   {DecodeN}
2591   //      |Parms
2592   //   MemBarAcquire*
2593   //
2594   // where * tags node we were passed
2595   // and |k means input k
2596 
2597   Node *start = ld;
2598   Node *mbacq = NULL;
2599 
2600   // if we hit a DecodeNarrowPtr we reset the start node and restart
2601   // the search through the outputs
2602  restart:
2603 
2604   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {


2614   }
2615 
2616   if (mbacq) {
2617     return true;
2618   }
2619 
2620   // now check for an unsafe volatile get
2621 
2622   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2623   //
2624   //     MemBarCPUOrder
2625   //        ||       \\
2626   //   MemBarAcquire* LoadX[mo_acquire]
2627   //        ||
2628   //   MemBarCPUOrder
2629 
2630   MemBarNode *membar;
2631 
2632   membar = parent_membar(ld);
2633 
2634   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2635     return false;
2636   }
2637 
2638   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2639 
2640   membar = child_membar(membar);
2641 
2642   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2643     return false;
2644   }
2645 
2646   membar = child_membar(membar);
2647   
2648   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2649     return false;
2650   }
2651 
2652   return true;
2653 }
2654 
2655 bool unnecessary_release(const Node *n)
2656 {
2657   assert((n->is_MemBar() &&
2658           n->Opcode() == Op_MemBarRelease),
2659          "expecting a release membar");
2660 
2661   if (UseBarriersForVolatile) {
2662     // we need to plant a dmb
2663     return false;
2664   }
2665 
2666   // if there is a dependent CPUOrder barrier then use that as the
2667   // leading
2668 
2669   MemBarNode *barrier = n->as_MemBar();
2670   // check for an intervening cpuorder membar
2671   MemBarNode *b = child_membar(barrier);
2672   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2673     // ok, so start the check from the dependent cpuorder barrier
2674     barrier = b;
2675   }
2676 
2677   // must start with a normal feed
2678   MemBarNode *child_barrier = leading_to_normal(barrier);
2679 
2680   if (!child_barrier) {
2681     return false;
2682   }
2683 
2684   if (!is_card_mark_membar(child_barrier)) {
2685     // this is the trailing membar and we are done
2686     return true;
2687   }
2688 
2689   // must be sure this card mark feeds a trailing membar
2690   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2691   return (trailing != NULL);
2692 }
2693 
2694 bool unnecessary_volatile(const Node *n)
2695 {
2696   // assert n->is_MemBar();
2697   if (UseBarriersForVolatile) {
2698     // we need to plant a dmb
2699     return false;
2700   }
2701 
2702   MemBarNode *mbvol = n->as_MemBar();
2703 
2704   // first we check if this is part of a card mark. if so then we have
2705   // to generate a StoreLoad barrier
2706   
2707   if (is_card_mark_membar(mbvol)) {
2708       return false;
2709   }
2710 
2711   // ok, if it's not a card mark then we still need to check if it is
2712   // a trailing membar of a volatile put hgraph.
2713 
2714   return (trailing_to_leading(mbvol) != NULL);
2715 }
2716 
2717 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2718 
2719 bool needs_releasing_store(const Node *n)
2720 {
2721   // assert n->is_Store();
2722   if (UseBarriersForVolatile) {
2723     // we use a normal store and dmb combination
2724     return false;
2725   }
2726 
2727   StoreNode *st = n->as_Store();
2728 
2729   // the store must be marked as releasing
2730   if (!st->is_release()) {
2731     return false;
2732   }
2733 
2734   // the store must be fed by a membar
2735 
2736   Node *x = st->lookup(StoreNode::Memory);
2737 
2738   if (! x || !x->is_Proj()) {
2739     return false;
2740   }
2741 
2742   ProjNode *proj = x->as_Proj();
2743 
2744   x = proj->lookup(0);
2745 
2746   if (!x || !x->is_MemBar()) {
2747     return false;
2748   }
2749 
2750   MemBarNode *barrier = x->as_MemBar();
2751 
2752   // if the barrier is a release membar or a cpuorder mmebar fed by a
2753   // release membar then we need to check whether that forms part of a
2754   // volatile put graph.
2755 
2756   // reject invalid candidates
2757   if (!leading_membar(barrier)) {
2758     return false;
2759   }
2760 
2761   // does this lead a normal subgraph?
2762   MemBarNode *mbvol = leading_to_normal(barrier);
2763 
2764   if (!mbvol) {
2765     return false;
2766   }
2767 
2768   // all done unless this is a card mark
2769   if (!is_card_mark_membar(mbvol)) {
2770     return true;
2771   }
2772   
2773   // we found a card mark -- just make sure we have a trailing barrier
2774 
2775   return (card_mark_to_trailing(mbvol) != NULL);
2776 }
2777 
2778 // predicate controlling translation of CAS
2779 //
2780 // returns true if CAS needs to use an acquiring load otherwise false
2781 
2782 bool needs_acquiring_load_exclusive(const Node *n)
2783 {
2784   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2785   if (UseBarriersForVolatile) {
2786     return false;
2787   }
2788 
2789   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2790 #ifdef ASSERT
2791   LoadStoreNode *st = n->as_LoadStore();
2792 
2793   // the store must be fed by a membar
2794 
2795   Node *x = st->lookup(StoreNode::Memory);
2796 
2797   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2798 
2799   ProjNode *proj = x->as_Proj();
2800 
2801   x = proj->lookup(0);
2802 
2803   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2804 
2805   MemBarNode *barrier = x->as_MemBar();
2806 
2807   // the barrier must be a cpuorder mmebar fed by a release membar
2808 
2809   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2810          "CAS not fed by cpuorder membar!");
2811       
2812   MemBarNode *b = parent_membar(barrier);
2813   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2814           "CAS not fed by cpuorder+release membar pair!");
2815 
2816   // does this lead a normal subgraph?
2817   MemBarNode *mbar = leading_to_normal(barrier);
2818 
2819   assert(mbar != NULL, "CAS not embedded in normal graph!");
2820 
2821   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2822 #endif // ASSERT
2823   // so we can just return true here
2824   return true;
2825 }
2826 
2827 // predicate controlling translation of StoreCM
2828 //
2829 // returns true if a StoreStore must precede the card write otherwise
2830 // false
2831 
2832 bool unnecessary_storestore(const Node *storecm)
2833 {
2834   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2835 
2836   // we only ever need to generate a dmb ishst between an object put
2837   // and the associated card mark when we are using CMS without
2838   // conditional card marking
2839 
2840   if (!UseConcMarkSweepGC || UseCondCardMark) {
2841     return true;
2842   }
2843 
2844   // if we are implementing volatile puts using barriers then the
2845   // object put as an str so we must insert the dmb ishst
2846 
2847   if (UseBarriersForVolatile) {
2848     return false;
2849   }
2850 
2851   // we can omit the dmb ishst if this StoreCM is part of a volatile
2852   // put because in thta case the put will be implemented by stlr
2853   //
2854   // we need to check for a normal subgraph feeding this StoreCM.
2855   // that means the StoreCM must be fed Memory from a leading membar,
2856   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2857   // leading membar must be part of a normal subgraph
2858 
2859   Node *x = storecm->in(StoreNode::Memory);
2860 
2861   if (!x->is_Proj()) {
2862     return false;
2863   }
2864 
2865   x = x->in(0);
2866 
2867   if (!x->is_MemBar()) {
2868     return false;
2869   }
2870 
2871   MemBarNode *leading = x->as_MemBar();
2872 
2873   // reject invalid candidates
2874   if (!leading_membar(leading)) {
2875     return false;
2876   }
2877 
2878   // we can omit the StoreStore if it is the head of a normal subgraph
2879   return (leading_to_normal(leading) != NULL);
2880 }
2881 
2882 
2883 #define __ _masm.
2884 
2885 // advance declarations for helper functions to convert register
2886 // indices to register objects
2887 
2888 // the ad file has to provide implementations of certain methods
2889 // expected by the generic code
2890 //
2891 // REQUIRED FUNCTIONALITY
2892 
2893 //=============================================================================
2894 
2895 // !!!!! Special hack to get all types of calls to specify the byte offset
2896 //       from the start of the call to the point where the return address


8787 // comment storeIConditional was not used anywhere by AArch64.
8788 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8789 %{
8790   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8791 
8792   ins_cost(VOLATILE_REF_COST);
8793 
8794   format %{
8795     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8796     "cmpw rscratch1, zr\t# EQ on successful write"
8797   %}
8798 
8799   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8800 
8801   ins_pipe(pipe_slow);
8802 %}
8803 
8804 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8805 // can't match them
8806 
8807 // standard CompareAndSwapX when we are using barriers
8808 // these have higher priority than the rules selected by a predicate
8809 
8810 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8811 
8812   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8813   ins_cost(2 * VOLATILE_REF_COST);
8814 
8815   effect(KILL cr);
8816 
8817  format %{
8818     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8819     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8820  %}
8821 
8822  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8823             aarch64_enc_cset_eq(res));
8824 
8825   ins_pipe(pipe_slow);
8826 %}
8827 
8828 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8829 
8830   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8831   ins_cost(2 * VOLATILE_REF_COST);
8832 
8833   effect(KILL cr);
8834 
8835  format %{
8836     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8837     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8838  %}
8839 
8840  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8841             aarch64_enc_cset_eq(res));
8842 
8843   ins_pipe(pipe_slow);
8844 %}
8845 
8846 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8847 
8848   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8849   ins_cost(2 * VOLATILE_REF_COST);
8850 
8851   effect(KILL cr);
8852 
8853  format %{
8854     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8855     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8856  %}
8857 
8858  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8859             aarch64_enc_cset_eq(res));
8860 
8861   ins_pipe(pipe_slow);
8862 %}
8863 
8864 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8865 
8866   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8867   ins_cost(2 * VOLATILE_REF_COST);
8868 
8869   effect(KILL cr);
8870 
8871  format %{
8872     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8873     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8874  %}
8875 
8876  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8877             aarch64_enc_cset_eq(res));
8878 
8879   ins_pipe(pipe_slow);
8880 %}
8881 
8882 // alternative CompareAndSwapX when we are eliding barriers
8883 
8884 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8885 
8886   predicate(needs_acquiring_load_exclusive(n));
8887   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8888   ins_cost(VOLATILE_REF_COST);
8889 
8890   effect(KILL cr);
8891 
8892  format %{
8893     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8894     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8895  %}
8896 
8897  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8898             aarch64_enc_cset_eq(res));
8899 
8900   ins_pipe(pipe_slow);
8901 %}
8902 
8903 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8904 
8905   predicate(needs_acquiring_load_exclusive(n));
8906   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8907   ins_cost(VOLATILE_REF_COST);
8908 
8909   effect(KILL cr);
8910 
8911  format %{
8912     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8913     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8914  %}
8915 
8916  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8917             aarch64_enc_cset_eq(res));
8918 
8919   ins_pipe(pipe_slow);
8920 %}
8921 
8922 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8923 
8924   predicate(needs_acquiring_load_exclusive(n));
8925   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8926   ins_cost(VOLATILE_REF_COST);
8927 
8928   effect(KILL cr);
8929 
8930  format %{
8931     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8932     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8933  %}
8934 
8935  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8936             aarch64_enc_cset_eq(res));
8937 
8938   ins_pipe(pipe_slow);
8939 %}
8940 
8941 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8942 
8943   predicate(needs_acquiring_load_exclusive(n));
8944   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8945   ins_cost(VOLATILE_REF_COST);
8946 
8947   effect(KILL cr);
8948 
8949  format %{
8950     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8951     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8952  %}
8953 
8954  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8955             aarch64_enc_cset_eq(res));
8956 
8957   ins_pipe(pipe_slow);
8958 %}
8959 
8960 
8961 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
8962   match(Set prev (GetAndSetI mem newv));
8963   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8964   ins_encode %{
8965     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8966   %}
8967   ins_pipe(pipe_serial);
8968 %}
8969 
8970 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
8971   match(Set prev (GetAndSetL mem newv));
8972   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8973   ins_encode %{
8974     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));


< prev index next >