1022 static int emit_exception_handler(CodeBuffer &cbuf);
1023 static int emit_deopt_handler(CodeBuffer& cbuf);
1024
1025 static uint size_exception_handler() {
1026 return MacroAssembler::far_branch_size();
1027 }
1028
1029 static uint size_deopt_handler() {
1030 // count one adr and one far branch instruction
1031 return 4 * NativeInstruction::instruction_size;
1032 }
1033 };
1034
1035 // graph traversal helpers
1036
1037 MemBarNode *parent_membar(const Node *n);
1038 MemBarNode *child_membar(const MemBarNode *n);
1039 bool leading_membar(const MemBarNode *barrier);
1040
1041 bool is_card_mark_membar(const MemBarNode *barrier);
1042
1043 MemBarNode *leading_to_normal(MemBarNode *leading);
1044 MemBarNode *normal_to_leading(const MemBarNode *barrier);
1045 MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1046 MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1047 MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1048
1049 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1050
1051 bool unnecessary_acquire(const Node *barrier);
1052 bool needs_acquiring_load(const Node *load);
1053
1054 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1055
1056 bool unnecessary_release(const Node *barrier);
1057 bool unnecessary_volatile(const Node *barrier);
1058 bool needs_releasing_store(const Node *store);
1059
1060 // predicate controlling translation of StoreCM
1061 bool unnecessary_storestore(const Node *storecm);
1062 %}
1063
1064 source %{
1065
1066 // Optimizaton of volatile gets and puts
1067 // -------------------------------------
1068 //
1069 // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1070 // use to implement volatile reads and writes. For a volatile read
1071 // we simply need
1072 //
1073 // ldar<x>
1074 //
1075 // and for a volatile write we need
1076 //
1077 // stlr<x>
1078 //
1079 // Alternatively, we can implement them by pairing a normal
1080 // load/store with a memory barrier. For a volatile read we need
1081 //
1082 // ldr<x>
1083 // dmb ishld
1084 //
1085 // for a volatile write
1086 //
1087 // dmb ish
1088 // str<x>
1089 // dmb ish
1090 //
1091 // In order to generate the desired instruction sequence we need to
1092 // be able to identify specific 'signature' ideal graph node
1093 // sequences which i) occur as a translation of a volatile reads or
1094 // writes and ii) do not occur through any other translation or
1095 // graph transformation. We can then provide alternative aldc
1096 // matching rules which translate these node sequences to the
1097 // desired machine code sequences. Selection of the alternative
1098 // rules can be implemented by predicates which identify the
1099 // relevant node sequences.
1100 //
1101 // The ideal graph generator translates a volatile read to the node
1102 // sequence
1103 //
1104 // LoadX[mo_acquire]
1105 // MemBarAcquire
1106 //
1107 // As a special case when using the compressed oops optimization we
1108 // may also see this variant
1109 //
1110 // LoadN[mo_acquire]
1111 // DecodeN
1112 // MemBarAcquire
1113 //
1114 // A volatile write is translated to the node sequence
1115 //
1116 // MemBarRelease
1117 // StoreX[mo_release] {CardMark}-optional
1118 // MemBarVolatile
1119 //
1146 // predicates need to detect its presence in order to correctly
1147 // select the desired adlc rules.
1148 //
1149 // Inlined unsafe volatile gets manifest as a somewhat different
1150 // node sequence to a normal volatile get
1151 //
1152 // MemBarCPUOrder
1153 // || \\
1154 // MemBarAcquire LoadX[mo_acquire]
1155 // ||
1156 // MemBarCPUOrder
1157 //
1158 // In this case the acquire membar does not directly depend on the
1159 // load. However, we can be sure that the load is generated from an
1160 // inlined unsafe volatile get if we see it dependent on this unique
1161 // sequence of membar nodes. Similarly, given an acquire membar we
1162 // can know that it was added because of an inlined unsafe volatile
1163 // get if it is fed and feeds a cpuorder membar and if its feed
1164 // membar also feeds an acquiring load.
1165 //
1166 // So, where we can identify these volatile read and write
1167 // signatures we can choose to plant either of the above two code
1168 // sequences. For a volatile read we can simply plant a normal
1169 // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1170 // also choose to inhibit translation of the MemBarAcquire and
1171 // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1172 //
1173 // When we recognise a volatile store signature we can choose to
1174 // plant at a dmb ish as a translation for the MemBarRelease, a
1175 // normal str<x> and then a dmb ish for the MemBarVolatile.
1176 // Alternatively, we can inhibit translation of the MemBarRelease
1177 // and MemBarVolatile and instead plant a simple stlr<x>
1178 // instruction.
1179 //
1180 // Of course, the above only applies when we see these signature
1181 // configurations. We still want to plant dmb instructions in any
1182 // other cases where we may see a MemBarAcquire, MemBarRelease or
1183 // MemBarVolatile. For example, at the end of a constructor which
1184 // writes final/volatile fields we will see a MemBarRelease
1185 // instruction and this needs a 'dmb ish' lest we risk the
1186 // constructed object being visible without making the
1187 // final/volatile field writes visible.
1188 //
1189 // n.b. the translation rules below which rely on detection of the
1190 // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1191 // If we see anything other than the signature configurations we
1192 // always just translate the loads and stores to ldr<x> and str<x>
1193 // and translate acquire, release and volatile membars to the
1194 // relevant dmb instructions.
1195 //
1196
1197 // graph traversal helpers used for volatile put/get optimization
1198
1199 // 1) general purpose helpers
1200
1201 // if node n is linked to a parent MemBarNode by an intervening
1202 // Control and Memory ProjNode return the MemBarNode otherwise return
1203 // NULL.
1204 //
1205 // n may only be a Load or a MemBar.
1206
1207 MemBarNode *parent_membar(const Node *n)
1208 {
1209 Node *ctl = NULL;
1210 Node *mem = NULL;
1211 Node *membar = NULL;
1212
1213 if (n->is_Load()) {
1214 ctl = n->lookup(LoadNode::Control);
1215 mem = n->lookup(LoadNode::Memory);
1216 } else if (n->is_MemBar()) {
1217 ctl = n->lookup(TypeFunc::Control);
1218 mem = n->lookup(TypeFunc::Memory);
1219 } else {
1220 return NULL;
1221 }
1222
1223 if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj())
1224 return NULL;
1225
1226 membar = ctl->lookup(0);
1227
1228 if (!membar || !membar->is_MemBar())
1229 return NULL;
1230
1231 if (mem->lookup(0) != membar)
1232 return NULL;
1233
1234 return membar->as_MemBar();
1235 }
1236
1237 // if n is linked to a child MemBarNode by intervening Control and
1238 // Memory ProjNodes return the MemBarNode otherwise return NULL.
1239
1240 MemBarNode *child_membar(const MemBarNode *n)
1241 {
1242 ProjNode *ctl = n->proj_out(TypeFunc::Control);
1243 ProjNode *mem = n->proj_out(TypeFunc::Memory);
1244
1245 // MemBar needs to have both a Ctl and Mem projection
1246 if (! ctl || ! mem)
1247 return NULL;
1248
1249 MemBarNode *child = NULL;
1250 Node *x;
1251
1252 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1253 x = ctl->fast_out(i);
1254 // if we see a membar we keep hold of it. we may also see a new
1255 // arena copy of the original but it will appear later
1256 if (x->is_MemBar()) {
1257 child = x->as_MemBar();
1258 break;
1259 }
1260 }
1261
1262 if (child == NULL)
1263 return NULL;
1264
1265 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1266 x = mem->fast_out(i);
1267 // if we see a membar we keep hold of it. we may also see a new
1268 // arena copy of the original but it will appear later
1269 if (x == child) {
1270 return child;
1271 }
1272 }
1273 return NULL;
1274 }
1275
1276 // helper predicate use to filter candidates for a leading memory
1277 // barrier
1278 //
1279 // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1280 // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1281
1282 bool leading_membar(const MemBarNode *barrier)
1283 {
1284 int opcode = barrier->Opcode();
1285 // if this is a release membar we are ok
1286 if (opcode == Op_MemBarRelease)
1287 return true;
1288 // if its a cpuorder membar . . .
1289 if (opcode != Op_MemBarCPUOrder)
1290 return false;
1291 // then the parent has to be a release membar
1292 MemBarNode *parent = parent_membar(barrier);
1293 if (!parent)
1294 return false;
1295 opcode = parent->Opcode();
1296 return opcode == Op_MemBarRelease;
1297 }
1298
1299 // 2) card mark detection helper
1300
1301 // helper predicate which can be used to detect a volatile membar
1302 // introduced as part of a conditional card mark sequence either by
1303 // G1 or by CMS when UseCondCardMark is true.
1304 //
1305 // membar can be definitively determined to be part of a card mark
1306 // sequence if and only if all the following hold
1307 //
1308 // i) it is a MemBarVolatile
1309 //
1310 // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1311 // true
1312 //
1313 // iii) the node's Mem projection feeds a StoreCM node.
1314
1315 bool is_card_mark_membar(const MemBarNode *barrier)
1316 {
1317 if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark))
1318 return false;
1319
1320 if (barrier->Opcode() != Op_MemBarVolatile)
1321 return false;
1322
1323 ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1324
1325 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1326 Node *y = mem->fast_out(i);
1327 if (y->Opcode() == Op_StoreCM) {
1328 return true;
1329 }
1330 }
1331
1332 return false;
1333 }
1334
1335
1336 // 3) helper predicates to traverse volatile put graphs which may
1337 // contain GC barrier subgraphs
1338
1339 // Preamble
1340 // --------
1341 //
1342 // for volatile writes we can omit generating barriers and employ a
1343 // releasing store when we see a node sequence sequence with a
1344 // leading MemBarRelease and a trailing MemBarVolatile as follows
1345 //
1346 // MemBarRelease
1347 // { || } -- optional
1348 // {MemBarCPUOrder}
1349 // || \\
1350 // || StoreX[mo_release]
1351 // | \ /
1352 // | MergeMem
1353 // | /
1354 // MemBarVolatile
1355 //
1356 // where
1357 // || and \\ represent Ctl and Mem feeds via Proj nodes
1387 // ordering is required for both non-volatile and volatile
1388 // puts. Normally that means we need to translate a StoreCM using
1389 // the sequence
1390 //
1391 // dmb ishst
1392 // stlrb
1393 //
1394 // However, in the case of a volatile put if we can recognise this
1395 // configuration and plant an stlr for the object write then we can
1396 // omit the dmb and just plant an strb since visibility of the stlr
1397 // is ordered before visibility of subsequent stores. StoreCM nodes
1398 // also arise when using G1 or using CMS with conditional card
1399 // marking. In these cases (as we shall see) we don't need to insert
1400 // the dmb when translating StoreCM because there is already an
1401 // intervening StoreLoad barrier between it and the StoreP/N.
1402 //
1403 // It is also possible to perform the card mark conditionally on it
1404 // currently being unmarked in which case the volatile put graph
1405 // will look slightly different
1406 //
1407 // MemBarRelease
1408 // MemBarCPUOrder___________________________________________
1409 // || \\ Ctl \ Ctl \ \\ Mem \
1410 // || StoreN/P[mo_release] CastP2X If LoadB |
1411 // | \ / \ |
1412 // | MergeMem . . . StoreB
1413 // | / /
1414 // || /
1415 // MemBarVolatile
1416 //
1417 // It is worth noting at this stage that both the above
1418 // configurations can be uniquely identified by checking that the
1419 // memory flow includes the following subgraph:
1420 //
1421 // MemBarRelease
1422 // MemBarCPUOrder
1423 // | \ . . .
1424 // | StoreX[mo_release] . . .
1425 // | /
1426 // MergeMem
1427 // |
1428 // MemBarVolatile
1429 //
1430 // This is referred to as a *normal* subgraph. It can easily be
1431 // detected starting from any candidate MemBarRelease,
1432 // StoreX[mo_release] or MemBarVolatile.
1433 //
1434 // the code below uses two helper predicates, leading_to_normal and
1435 // normal_to_leading to identify this configuration, one validating
1436 // the layout starting from the top membar and searching down and
1437 // the other validating the layout starting from the lower membar
1438 // and searching up.
1439 //
1440 // There are two special case GC configurations when a normal graph
1441 // may not be generated: when using G1 (which always employs a
1442 // conditional card mark); and when using CMS with conditional card
1443 // marking configured. These GCs are both concurrent rather than
1444 // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1445 // graph between the leading and trailing membar nodes, in
1446 // particular enforcing stronger memory serialisation beween the
1447 // object put and the corresponding conditional card mark. CMS
1448 // employs a post-write GC barrier while G1 employs both a pre- and
1449 // post-write GC barrier. Of course the extra nodes may be absent --
1450 // they are only inserted for object puts. This significantly
1451 // complicates the task of identifying whether a MemBarRelease,
1452 // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1453 // when using these GC configurations (see below).
1454 //
1455 // In both cases the post-write subtree includes an auxiliary
1456 // MemBarVolatile (StoreLoad barrier) separating the object put and
1457 // the read of the corresponding card. This poses two additional
1458 // problems.
1459 //
1460 // Firstly, a card mark MemBarVolatile needs to be distinguished
1461 // from a normal trailing MemBarVolatile. Resolving this first
1462 // problem is straightforward: a card mark MemBarVolatile always
1463 // projects a Mem feed to a StoreCM node and that is a unique marker
1464 //
1465 // MemBarVolatile (card mark)
1466 // C | \ . . .
1467 // | StoreCM . . .
1468 // . . .
1469 //
1470 // The second problem is how the code generator is to translate the
1471 // card mark barrier? It always needs to be translated to a "dmb
1472 // ish" instruction whether or not it occurs as part of a volatile
1473 // put. A StoreLoad barrier is needed after the object put to ensure
1474 // i) visibility to GC threads of the object put and ii) visibility
1475 // to the mutator thread of any card clearing write by a GC
1476 // thread. Clearly a normal store (str) will not guarantee this
1477 // ordering but neither will a releasing store (stlr). The latter
1478 // guarantees that the object put is visible but does not guarantee
1479 // that writes by other threads have also been observed.
1480 //
1481 // So, returning to the task of translating the object put and the
1482 // leading/trailing membar nodes: what do the non-normal node graph
1483 // look like for these 2 special cases? and how can we determine the
1484 // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1485 // in both normal and non-normal cases?
1486 //
1487 // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1488 // which selects conditonal execution based on the value loaded
1489 // (LoadB) from the card. Ctl and Mem are fed to the If via an
1490 // intervening StoreLoad barrier (MemBarVolatile).
1491 //
1492 // So, with CMS we may see a node graph which looks like this
1493 //
1494 // MemBarRelease
1495 // MemBarCPUOrder_(leading)__________________
1496 // C | M \ \\ C \
1497 // | \ StoreN/P[mo_release] CastP2X
1498 // | Bot \ /
1499 // | MergeMem
1500 // | /
1501 // MemBarVolatile (card mark)
1502 // C | || M |
1503 // | LoadB |
1504 // | | |
1505 // | Cmp |\
1506 // | / | \
1507 // If | \
1508 // | \ | \
1509 // IfFalse IfTrue | \
1510 // \ / \ | \
1511 // \ / StoreCM |
1512 // \ / | |
1513 // Region . . . |
1514 // | \ /
1515 // | . . . \ / Bot
1516 // | MergeMem
1517 // | |
1518 // MemBarVolatile (trailing)
1519 //
1520 // The first MergeMem merges the AliasIdxBot Mem slice from the
1521 // leading membar and the oopptr Mem slice from the Store into the
1522 // card mark membar. The trailing MergeMem merges the AliasIdxBot
1523 // Mem slice from the card mark membar and the AliasIdxRaw slice
1524 // from the StoreCM into the trailing membar (n.b. the latter
1525 // proceeds via a Phi associated with the If region).
1526 //
1527 // G1 is quite a lot more complicated. The nodes inserted on behalf
1528 // of G1 may comprise: a pre-write graph which adds the old value to
1529 // the SATB queue; the releasing store itself; and, finally, a
1530 // post-write graph which performs a card mark.
1531 //
1532 // The pre-write graph may be omitted, but only when the put is
1533 // writing to a newly allocated (young gen) object and then only if
1534 // there is a direct memory chain to the Initialize node for the
1535 // object allocation. This will not happen for a volatile put since
1536 // any memory chain passes through the leading membar.
1537 //
1538 // The pre-write graph includes a series of 3 If tests. The outermost
1539 // If tests whether SATB is enabled (no else case). The next If tests
1540 // whether the old value is non-NULL (no else case). The third tests
1541 // whether the SATB queue index is > 0, if so updating the queue. The
1542 // else case for this third If calls out to the runtime to allocate a
1543 // new queue buffer.
1544 //
1545 // So with G1 the pre-write and releasing store subgraph looks like
1546 // this (the nested Ifs are omitted).
1558 // | \ |
1559 // | . . . \ |
1560 // | / | / | |
1561 // Region Phi[M] | |
1562 // | \ | | |
1563 // | \_____ | ___ | |
1564 // C | C \ | C \ M | |
1565 // | CastP2X | StoreN/P[mo_release] |
1566 // | | | |
1567 // C | M | M | M |
1568 // \ | | /
1569 // . . .
1570 // (post write subtree elided)
1571 // . . .
1572 // C \ M /
1573 // MemBarVolatile (trailing)
1574 //
1575 // n.b. the LoadB in this subgraph is not the card read -- it's a
1576 // read of the SATB queue active flag.
1577 //
1578 // The G1 post-write subtree is also optional, this time when the
1579 // new value being written is either null or can be identified as a
1580 // newly allocated (young gen) object with no intervening control
1581 // flow. The latter cannot happen but the former may, in which case
1582 // the card mark membar is omitted and the memory feeds from the
1583 // leading membar and the StoreN/P are merged direct into the
1584 // trailing membar as per the normal subgraph. So, the only special
1585 // case which arises is when the post-write subgraph is generated.
1586 //
1587 // The kernel of the post-write G1 subgraph is the card mark itself
1588 // which includes a card mark memory barrier (MemBarVolatile), a
1589 // card test (LoadB), and a conditional update (If feeding a
1590 // StoreCM). These nodes are surrounded by a series of nested Ifs
1591 // which try to avoid doing the card mark. The top level If skips if
1592 // the object reference does not cross regions (i.e. it tests if
1593 // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1594 // need not be recorded. The next If, which skips on a NULL value,
1595 // may be absent (it is not generated if the type of value is >=
1596 // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1597 // checking if card_val != young). n.b. although this test requires
1598 // a pre-read of the card it can safely be done before the StoreLoad
1599 // barrier. However that does not bypass the need to reread the card
1600 // after the barrier.
1601 //
1602 // (pre-write subtree elided)
1603 // . . . . . . . . . . . .
1651 // \ MergeMem
1652 // \ /
1653 // MemBarVolatile
1654 //
1655 // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1656 // from the leading membar and the oopptr Mem slice from the Store
1657 // into the card mark membar i.e. the memory flow to the card mark
1658 // membar still looks like a normal graph.
1659 //
1660 // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1661 // Mem slices (from the StoreCM and other card mark queue stores).
1662 // However in this case the AliasIdxBot Mem slice does not come
1663 // direct from the card mark membar. It is merged through a series
1664 // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1665 // from the leading membar with the Mem feed from the card mark
1666 // membar. Each Phi corresponds to one of the Ifs which may skip
1667 // around the card mark membar. So when the If implementing the NULL
1668 // value check has been elided the total number of Phis is 2
1669 // otherwise it is 3.
1670 //
1671 // So, the upshot is that in all cases the volatile put graph will
1672 // include a *normal* memory subgraph betwen the leading membar and
1673 // its child membar. When that child is not a card mark membar then
1674 // it marks the end of a volatile put subgraph. If the child is a
1675 // card mark membar then the normal subgraph will form part of a
1676 // volatile put subgraph if and only if the child feeds an
1677 // AliasIdxBot Mem feed to a trailing barrier via a MergeMem. That
1678 // feed is either direct (for CMS) or via 2 or 3 Phi nodes merging
1679 // the leading barrier memory flow (for G1).
1680 //
1681 // The predicates controlling generation of instructions for store
1682 // and barrier nodes employ a few simple helper functions (described
1683 // below) which identify the presence or absence of these subgraph
1684 // configurations and provide a means of traversing from one node in
1685 // the subgraph to another.
1686
1687 // leading_to_normal
1688 //
1689 //graph traversal helper which detects the normal case Mem feed
1690 // from a release membar (or, optionally, its cpuorder child) to a
1691 // dependent volatile membar i.e. it ensures that the following Mem
1692 // flow subgraph is present.
1693 //
1694 // MemBarRelease
1695 // MemBarCPUOrder
1696 // | \ . . .
1697 // | StoreN/P[mo_release] . . .
1698 // | /
1699 // MergeMem
1700 // |
1701 // MemBarVolatile
1702 //
1703 // if the correct configuration is present returns the volatile
1704 // membar otherwise NULL.
1705 //
1706 // the input membar is expected to be either a cpuorder membar or a
1707 // release membar. in the latter case it should not have a cpu membar
1708 // child.
1709 //
1710 // the returned membar may be a card mark membar rather than a
1711 // trailing membar.
1712
1713 MemBarNode *leading_to_normal(MemBarNode *leading)
1714 {
1715 assert((leading->Opcode() == Op_MemBarRelease ||
1716 leading->Opcode() == Op_MemBarCPUOrder),
1717 "expecting a volatile or cpuroder membar!");
1718
1719 // check the mem flow
1720 ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1721
1722 if (!mem)
1723 return NULL;
1724
1725 Node *x = NULL;
1726 StoreNode * st = NULL;
1727 MergeMemNode *mm = NULL;
1728
1729 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1730 x = mem->fast_out(i);
1731 if (x->is_MergeMem()) {
1732 if (mm != NULL)
1733 return NULL;
1734 // two merge mems is one too many
1735 mm = x->as_MergeMem();
1736 } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1737 // two releasing stores is one too many
1738 if (st != NULL)
1739 return NULL;
1740 st = x->as_Store();
1741 }
1742 }
1743
1744 if (!mm || !st)
1745 return NULL;
1746
1747 bool found = false;
1748 // ensure the store feeds the merge
1749 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1750 if (st->fast_out(i) == mm) {
1751 found = true;
1752 break;
1753 }
1754 }
1755
1756 if (!found)
1757 return NULL;
1758
1759 MemBarNode *mbvol = NULL;
1760 // ensure the merge feeds a volatile membar
1761 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1762 x = mm->fast_out(i);
1763 if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1764 mbvol = x->as_MemBar();
1765 break;
1766 }
1767 }
1768
1769 return mbvol;
1770 }
1771
1772 // normal_to_leading
1773 //
1774 // graph traversal helper which detects the normal case Mem feed
1775 // from either a card mark or a trailing membar to a preceding
1776 // release membar (optionally its cpuorder child) i.e. it ensures
1777 // that the following Mem flow subgraph is present.
1778 //
1779 // MemBarRelease
1780 // MemBarCPUOrder {leading}
1781 // | \ . . .
1782 // | StoreN/P[mo_release] . . .
1783 // | /
1784 // MergeMem
1785 // |
1786 // MemBarVolatile
1787 //
1788 // this predicate checks for the same flow as the previous predicate
1789 // but starting from the bottom rather than the top.
1790 //
1791 // if the configuration is present returns the cpuorder member for
1792 // preference or when absent the release membar otherwise NULL.
1793 //
1794 // n.b. the input membar is expected to be a MemBarVolatile but
1795 // need not be a card mark membar.
1796
1797 MemBarNode *normal_to_leading(const MemBarNode *barrier)
1798 {
1799 // input must be a volatile membar
1800 assert(barrier->Opcode() == Op_MemBarVolatile, "expecting a volatile membar");
1801 Node *x;
1802
1803 // the Mem feed to the membar should be a merge
1804 x = barrier->in(TypeFunc::Memory);
1805 if (!x->is_MergeMem())
1806 return NULL;
1807
1808 MergeMemNode *mm = x->as_MergeMem();
1809
1810 // the AliasIdxBot slice should be another MemBar projection
1811 x = mm->in(Compile::AliasIdxBot);
1812 // ensure this is a non control projection
1813 if (!x->is_Proj() || x->is_CFG())
1814 return NULL;
1815 // if it is fed by a membar that's the one we want
1816 x = x->in(0);
1817
1818 if (!x->is_MemBar())
1819 return NULL;
1820
1821 MemBarNode *leading = x->as_MemBar();
1822 // reject invalid candidates
1823 if (!leading_membar(leading))
1824 return NULL;
1825
1826 // ok, we have a leading ReleaseMembar, now for the sanity clauses
1827
1828 // the leading membar must feed Mem to a releasing store
1829 ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1830 StoreNode *st = NULL;
1831 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1832 x = mem->fast_out(i);
1833 if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1834 st = x->as_Store();
1835 break;
1836 }
1837 }
1838 if (st == NULL)
1839 return NULL;
1840
1841 // the releasing store has to feed the same merge
1842 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1843 if (st->fast_out(i) == mm)
1844 return leading;
1845 }
1846
1847 return NULL;
1848 }
1849
1850 // card_mark_to_trailing
1851 //
1852 // graph traversal helper which detects extra, non-normal Mem feed
1853 // from a card mark volatile membar to a trailing membar i.e. it
1854 // ensures that one of the following three GC post-write Mem flow
1855 // subgraphs is present.
1856 //
1857 // 1)
1858 // . . .
1859 // |
1860 // MemBarVolatile (card mark)
1861 // | |
1862 // | StoreCM
1863 // | |
1864 // | . . .
1865 // Bot | /
1866 // MergeMem
1867 // |
1868 // MemBarVolatile (trailing)
1869 //
1870 //
1871 // 2)
1872 // MemBarRelease/CPUOrder (leading)
1873 // |
1874 // |
1875 // |\ . . .
1876 // | \ |
1877 // | \ MemBarVolatile (card mark)
1878 // | \ | |
1879 // \ \ | StoreCM . . .
1880 // \ \ |
1881 // \ Phi
1882 // \ /
1883 // Phi . . .
1884 // Bot | /
1885 // MergeMem
1886 // |
1887 // MemBarVolatile (trailing)
1888 //
1889 // 3)
1890 // MemBarRelease/CPUOrder (leading)
1891 // |
1892 // |\
1893 // | \
1894 // | \ . . .
1895 // | \ |
1896 // |\ \ MemBarVolatile (card mark)
1897 // | \ \ | |
1898 // | \ \ | StoreCM . . .
1899 // | \ \ |
1900 // \ \ Phi
1901 // \ \ /
1902 // \ Phi
1903 // \ /
1904 // Phi . . .
1905 // Bot | /
1906 // MergeMem
1907 // |
1908 // MemBarVolatile (trailing)
1909 //
1910 // configuration 1 is only valid if UseConcMarkSweepGC &&
1911 // UseCondCardMark
1912 //
1913 // configurations 2 and 3 are only valid if UseG1GC.
1914 //
1915 // if a valid configuration is present returns the trailing membar
1916 // otherwise NULL.
1917 //
1918 // n.b. the supplied membar is expected to be a card mark
1919 // MemBarVolatile i.e. the caller must ensure the input node has the
1920 // correct operand and feeds Mem to a StoreCM node
1921
1922 MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
1923 {
1924 // input must be a card mark volatile membar
1925 assert(is_card_mark_membar(barrier), "expecting a card mark membar");
1926
1927 Node *feed = barrier->proj_out(TypeFunc::Memory);
1928 Node *x;
1938 x = feed->fast_out(i);
1939 // the correct Phi will be merging a Bot memory slice
1940 if (x->is_MergeMem()) {
1941 mm = x->as_MergeMem();
1942 break;
1943 }
1944 }
1945 if (mm) {
1946 retry_feed = false;
1947 } else if (UseG1GC & phicount++ < MAX_PHIS) {
1948 // the barrier may feed indirectly via one or two Phi nodes
1949 PhiNode *phi = NULL;
1950 for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
1951 x = feed->fast_out(i);
1952 // the correct Phi will be merging a Bot memory slice
1953 if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
1954 phi = x->as_Phi();
1955 break;
1956 }
1957 }
1958 if (!phi)
1959 return NULL;
1960 // look for another merge below this phi
1961 feed = phi;
1962 } else {
1963 // couldn't find a merge
1964 return NULL;
1965 }
1966 }
1967
1968 // sanity check this feed turns up as the expected slice
1969 assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
1970
1971 MemBarNode *trailing = NULL;
1972 // be sure we have a volatile membar below the merge
1973 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1974 x = mm->fast_out(i);
1975 if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1976 trailing = x->as_MemBar();
1977 break;
1978 }
1979 }
1980
1981 return trailing;
1982 }
1983
1984 // trailing_to_card_mark
1985 //
1986 // graph traversal helper which detects extra, non-normal Mem feed
1987 // from a trailing membar to a preceding card mark volatile membar
1988 // i.e. it identifies whether one of the three possible extra GC
1989 // post-write Mem flow subgraphs is present
1990 //
1991 // this predicate checks for the same flow as the previous predicate
1992 // but starting from the bottom rather than the top.
1993 //
1994 // if the configurationis present returns the card mark membar
1995 // otherwise NULL
1996
1997 MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
1998 {
1999 assert(!is_card_mark_membar(trailing), "not expecting a card mark membar");
2000
2001 Node *x = trailing->in(TypeFunc::Memory);
2002 // the Mem feed to the membar should be a merge
2003 if (!x->is_MergeMem())
2004 return NULL;
2005
2006 MergeMemNode *mm = x->as_MergeMem();
2007
2008 x = mm->in(Compile::AliasIdxBot);
2009 // with G1 we may possibly see a Phi or two before we see a Memory
2010 // Proj from the card mark membar
2011
2012 const int MAX_PHIS = 3; // max phis we will search through
2013 int phicount = 0; // current search count
2014
2015 bool retry_feed = !x->is_Proj();
2016
2017 while (retry_feed) {
2018 if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2019 PhiNode *phi = x->as_Phi();
2020 ProjNode *proj = NULL;
2021 PhiNode *nextphi = NULL;
2022 bool found_leading = false;
2023 for (uint i = 1; i < phi->req(); i++) {
2024 x = phi->in(i);
2037 }
2038 // if we found a correct looking proj then retry from there
2039 // otherwise we must see a leading and a phi or this the
2040 // wrong config
2041 if (proj != NULL) {
2042 x = proj;
2043 retry_feed = false;
2044 } else if (found_leading && nextphi != NULL) {
2045 // retry from this phi to check phi2
2046 x = nextphi;
2047 } else {
2048 // not what we were looking for
2049 return NULL;
2050 }
2051 } else {
2052 return NULL;
2053 }
2054 }
2055 // the proj has to come from the card mark membar
2056 x = x->in(0);
2057 if (!x->is_MemBar())
2058 return NULL;
2059
2060 MemBarNode *card_mark_membar = x->as_MemBar();
2061
2062 if (!is_card_mark_membar(card_mark_membar))
2063 return NULL;
2064
2065 return card_mark_membar;
2066 }
2067
2068 // trailing_to_leading
2069 //
2070 // graph traversal helper which checks the Mem flow up the graph
2071 // from a (non-card mark) volatile membar attempting to locate and
2072 // return an associated leading membar. it first looks for a
2073 // subgraph in the normal configuration (relying on helper
2074 // normal_to_leading). failing that it then looks for one of the
2075 // possible post-write card mark subgraphs linking the trailing node
2076 // to a the card mark membar (relying on helper
2077 // trailing_to_card_mark), and then checks that the card mark membar
2078 // is fed by a leading membar (once again relying on auxiliary
2079 // predicate normal_to_leading).
2080 //
2081 // if the configuration is valid returns the cpuorder member for
2082 // preference or when absent the release membar otherwise NULL.
2083 //
2084 // n.b. the input membar is expected to be a volatile membar but
2085 // must *not* be a card mark membar.
2086
2087 MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2088 {
2089 assert(!is_card_mark_membar(trailing), "not expecting a card mark membar");
2090
2091 MemBarNode *leading = normal_to_leading(trailing);
2092
2093 if (leading)
2094 return leading;
2095
2096 MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2097
2098 if (!card_mark_membar)
2099 return NULL;
2100
2101 return normal_to_leading(card_mark_membar);
2102 }
2103
2104 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2105
2106 bool unnecessary_acquire(const Node *barrier)
2107 {
2108 // assert barrier->is_MemBar();
2109 if (UseBarriersForVolatile)
2110 // we need to plant a dmb
2111 return false;
2112
2113 // a volatile read derived from bytecode (or also from an inlined
2114 // SHA field read via LibraryCallKit::load_field_from_object)
2115 // manifests as a LoadX[mo_acquire] followed by an acquire membar
2116 // with a bogus read dependency on it's preceding load. so in those
2117 // cases we will find the load node at the PARMS offset of the
2118 // acquire membar. n.b. there may be an intervening DecodeN node.
2119 //
2120 // a volatile load derived from an inlined unsafe field access
2121 // manifests as a cpuorder membar with Ctl and Mem projections
2122 // feeding both an acquire membar and a LoadX[mo_acquire]. The
2123 // acquire then feeds another cpuorder membar via Ctl and Mem
2124 // projections. The load has no output dependency on these trailing
2125 // membars because subsequent nodes inserted into the graph take
2126 // their control feed from the final membar cpuorder meaning they
2127 // are all ordered after the load.
2128
2129 Node *x = barrier->lookup(TypeFunc::Parms);
2130 if (x) {
2131 // we are starting from an acquire and it has a fake dependency
2132 //
2133 // need to check for
2134 //
2135 // LoadX[mo_acquire]
2136 // { |1 }
2137 // {DecodeN}
2138 // |Parms
2139 // MemBarAcquire*
2140 //
2141 // where * tags node we were passed
2142 // and |k means input k
2143 if (x->is_DecodeNarrowPtr())
2144 x = x->in(1);
2145
2146 return (x->is_Load() && x->as_Load()->is_acquire());
2147 }
2148
2149 // now check for an unsafe volatile get
2150
2151 // need to check for
2152 //
2153 // MemBarCPUOrder
2154 // || \\
2155 // MemBarAcquire* LoadX[mo_acquire]
2156 // ||
2157 // MemBarCPUOrder
2158 //
2159 // where * tags node we were passed
2160 // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2161
2162 // check for a parent MemBarCPUOrder
2163 ProjNode *ctl;
2164 ProjNode *mem;
2165 MemBarNode *parent = parent_membar(barrier);
2166 if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2167 return false;
2168 ctl = parent->proj_out(TypeFunc::Control);
2169 mem = parent->proj_out(TypeFunc::Memory);
2170 if (!ctl || !mem)
2171 return false;
2172 // ensure the proj nodes both feed a LoadX[mo_acquire]
2173 LoadNode *ld = NULL;
2174 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2175 x = ctl->fast_out(i);
2176 // if we see a load we keep hold of it and stop searching
2177 if (x->is_Load()) {
2178 ld = x->as_Load();
2179 break;
2180 }
2181 }
2182 // it must be an acquiring load
2183 if (! ld || ! ld->is_acquire())
2184 return false;
2185 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2186 x = mem->fast_out(i);
2187 // if we see the same load we drop it and stop searching
2188 if (x == ld) {
2189 ld = NULL;
2190 break;
2191 }
2192 }
2193 // we must have dropped the load
2194 if (ld)
2195 return false;
2196 // check for a child cpuorder membar
2197 MemBarNode *child = child_membar(barrier->as_MemBar());
2198 if (!child || child->Opcode() != Op_MemBarCPUOrder)
2199 return false;
2200
2201 return true;
2202 }
2203
2204 bool needs_acquiring_load(const Node *n)
2205 {
2206 // assert n->is_Load();
2207 if (UseBarriersForVolatile)
2208 // we use a normal load and a dmb
2209 return false;
2210
2211 LoadNode *ld = n->as_Load();
2212
2213 if (!ld->is_acquire())
2214 return false;
2215
2216 // check if this load is feeding an acquire membar
2217 //
2218 // LoadX[mo_acquire]
2219 // { |1 }
2220 // {DecodeN}
2221 // |Parms
2222 // MemBarAcquire*
2223 //
2224 // where * tags node we were passed
2225 // and |k means input k
2226
2227 Node *start = ld;
2228 Node *mbacq = NULL;
2229
2230 // if we hit a DecodeNarrowPtr we reset the start node and restart
2231 // the search through the outputs
2232 restart:
2233
2234 for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2244 }
2245
2246 if (mbacq) {
2247 return true;
2248 }
2249
2250 // now check for an unsafe volatile get
2251
2252 // check if Ctl and Proj feed comes from a MemBarCPUOrder
2253 //
2254 // MemBarCPUOrder
2255 // || \\
2256 // MemBarAcquire* LoadX[mo_acquire]
2257 // ||
2258 // MemBarCPUOrder
2259
2260 MemBarNode *membar;
2261
2262 membar = parent_membar(ld);
2263
2264 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
2265 return false;
2266
2267 // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2268
2269 membar = child_membar(membar);
2270
2271 if (!membar || !membar->Opcode() == Op_MemBarAcquire)
2272 return false;
2273
2274 membar = child_membar(membar);
2275
2276 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
2277 return false;
2278
2279 return true;
2280 }
2281
2282 bool unnecessary_release(const Node *n)
2283 {
2284 assert((n->is_MemBar() &&
2285 n->Opcode() == Op_MemBarRelease),
2286 "expecting a release membar");
2287
2288 if (UseBarriersForVolatile)
2289 // we need to plant a dmb
2290 return false;
2291
2292 // if there is a dependent CPUOrder barrier then use that as the
2293 // leading
2294
2295 MemBarNode *barrier = n->as_MemBar();
2296 // check for an intervening cpuorder membar
2297 MemBarNode *b = child_membar(barrier);
2298 if (b && b->Opcode() == Op_MemBarCPUOrder) {
2299 // ok, so start the check from the dependent cpuorder barrier
2300 barrier = b;
2301 }
2302
2303 // must start with a normal feed
2304 MemBarNode *child_barrier = leading_to_normal(barrier);
2305
2306 if (!child_barrier)
2307 return false;
2308
2309 if (!is_card_mark_membar(child_barrier))
2310 // this is the trailing membar and we are done
2311 return true;
2312
2313 // must be sure this card mark feeds a trailing membar
2314 MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2315 return (trailing != NULL);
2316 }
2317
2318 bool unnecessary_volatile(const Node *n)
2319 {
2320 // assert n->is_MemBar();
2321 if (UseBarriersForVolatile)
2322 // we need to plant a dmb
2323 return false;
2324
2325 MemBarNode *mbvol = n->as_MemBar();
2326
2327 // first we check if this is part of a card mark. if so then we have
2328 // to generate a StoreLoad barrier
2329
2330 if (is_card_mark_membar(mbvol))
2331 return false;
2332
2333 // ok, if it's not a card mark then we still need to check if it is
2334 // a trailing membar of a volatile put hgraph.
2335
2336 return (trailing_to_leading(mbvol) != NULL);
2337 }
2338
2339 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2340
2341 bool needs_releasing_store(const Node *n)
2342 {
2343 // assert n->is_Store();
2344 if (UseBarriersForVolatile)
2345 // we use a normal store and dmb combination
2346 return false;
2347
2348 StoreNode *st = n->as_Store();
2349
2350 // the store must be marked as releasing
2351 if (!st->is_release())
2352 return false;
2353
2354 // the store must be fed by a membar
2355
2356 Node *x = st->lookup(StoreNode::Memory);
2357
2358 if (! x || !x->is_Proj())
2359 return false;
2360
2361 ProjNode *proj = x->as_Proj();
2362
2363 x = proj->lookup(0);
2364
2365 if (!x || !x->is_MemBar())
2366 return false;
2367
2368 MemBarNode *barrier = x->as_MemBar();
2369
2370 // if the barrier is a release membar or a cpuorder mmebar fed by a
2371 // release membar then we need to check whether that forms part of a
2372 // volatile put graph.
2373
2374 // reject invalid candidates
2375 if (!leading_membar(barrier))
2376 return false;
2377
2378 // does this lead a normal subgraph?
2379 MemBarNode *mbvol = leading_to_normal(barrier);
2380
2381 if (!mbvol)
2382 return false;
2383
2384 // all done unless this is a card mark
2385 if (!is_card_mark_membar(mbvol))
2386 return true;
2387
2388 // we found a card mark -- just make sure we have a trailing barrier
2389
2390 return (card_mark_to_trailing(mbvol) != NULL);
2391 }
2392
2393 // predicate controlling translation of StoreCM
2394 //
2395 // returns true if a StoreStore must precede the card write otherwise
2396 // false
2397
2398 bool unnecessary_storestore(const Node *storecm)
2399 {
2400 assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM");
2401
2402 // we only ever need to generate a dmb ishst between an object put
2403 // and the associated card mark when we are using CMS without
2404 // conditional card marking
2405
2406 if (!UseConcMarkSweepGC || UseCondCardMark)
2407 return true;
2408
2409 // if we are implementing volatile puts using barriers then the
2410 // object put as an str so we must insert the dmb ishst
2411
2412 if (UseBarriersForVolatile)
2413 return false;
2414
2415 // we can omit the dmb ishst if this StoreCM is part of a volatile
2416 // put because in thta case the put will be implemented by stlr
2417 //
2418 // we need to check for a normal subgraph feeding this StoreCM.
2419 // that means the StoreCM must be fed Memory from a leading membar,
2420 // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2421 // leading membar must be part of a normal subgraph
2422
2423 Node *x = storecm->in(StoreNode::Memory);
2424
2425 if (!x->is_Proj())
2426 return false;
2427
2428 x = x->in(0);
2429
2430 if (!x->is_MemBar())
2431 return false;
2432
2433 MemBarNode *leading = x->as_MemBar();
2434
2435 // reject invalid candidates
2436 if (!leading_membar(leading))
2437 return false;
2438
2439 // we can omit the StoreStore if it is the head of a normal subgraph
2440 return (leading_to_normal(leading) != NULL);
2441 }
2442
2443
2444 #define __ _masm.
2445
2446 // advance declarations for helper functions to convert register
2447 // indices to register objects
2448
2449 // the ad file has to provide implementations of certain methods
2450 // expected by the generic code
2451 //
2452 // REQUIRED FUNCTIONALITY
2453
2454 //=============================================================================
2455
2456 // !!!!! Special hack to get all types of calls to specify the byte offset
2457 // from the start of the call to the point where the return address
8348 // comment storeIConditional was not used anywhere by AArch64.
8349 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8350 %{
8351 match(Set cr (StoreIConditional mem (Binary oldval newval)));
8352
8353 ins_cost(VOLATILE_REF_COST);
8354
8355 format %{
8356 "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8357 "cmpw rscratch1, zr\t# EQ on successful write"
8358 %}
8359
8360 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8361
8362 ins_pipe(pipe_slow);
8363 %}
8364
8365 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8366 // can't match them
8367
8368 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8369
8370 match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8371
8372 effect(KILL cr);
8373
8374 format %{
8375 "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8376 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8377 %}
8378
8379 ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8380 aarch64_enc_cset_eq(res));
8381
8382 ins_pipe(pipe_slow);
8383 %}
8384
8385 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8386
8387 match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8388
8389 effect(KILL cr);
8390
8391 format %{
8392 "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8393 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8394 %}
8395
8396 ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8397 aarch64_enc_cset_eq(res));
8398
8399 ins_pipe(pipe_slow);
8400 %}
8401
8402 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8403
8404 match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8405
8406 effect(KILL cr);
8407
8408 format %{
8409 "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8410 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8411 %}
8412
8413 ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8414 aarch64_enc_cset_eq(res));
8415
8416 ins_pipe(pipe_slow);
8417 %}
8418
8419 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8420
8421 match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8422
8423 effect(KILL cr);
8424
8425 format %{
8426 "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8427 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8428 %}
8429
8430 ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8431 aarch64_enc_cset_eq(res));
8432
8433 ins_pipe(pipe_slow);
8434 %}
8435
8436
8437 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
8438 match(Set prev (GetAndSetI mem newv));
8439 format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8440 ins_encode %{
8441 __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8442 %}
8443 ins_pipe(pipe_serial);
8444 %}
8445
8446 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
8447 match(Set prev (GetAndSetL mem newv));
8448 format %{ "atomic_xchg $prev, $newv, [$mem]" %}
8449 ins_encode %{
8450 __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
|
1022 static int emit_exception_handler(CodeBuffer &cbuf);
1023 static int emit_deopt_handler(CodeBuffer& cbuf);
1024
1025 static uint size_exception_handler() {
1026 return MacroAssembler::far_branch_size();
1027 }
1028
1029 static uint size_deopt_handler() {
1030 // count one adr and one far branch instruction
1031 return 4 * NativeInstruction::instruction_size;
1032 }
1033 };
1034
1035 // graph traversal helpers
1036
1037 MemBarNode *parent_membar(const Node *n);
1038 MemBarNode *child_membar(const MemBarNode *n);
1039 bool leading_membar(const MemBarNode *barrier);
1040
1041 bool is_card_mark_membar(const MemBarNode *barrier);
1042 bool is_CAS(int opcode);
1043
1044 MemBarNode *leading_to_normal(MemBarNode *leading);
1045 MemBarNode *normal_to_leading(const MemBarNode *barrier);
1046 MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1047 MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1048 MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1049
1050 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1051
1052 bool unnecessary_acquire(const Node *barrier);
1053 bool needs_acquiring_load(const Node *load);
1054
1055 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1056
1057 bool unnecessary_release(const Node *barrier);
1058 bool unnecessary_volatile(const Node *barrier);
1059 bool needs_releasing_store(const Node *store);
1060
1061 // predicate controlling translation of CompareAndSwapX
1062 bool needs_acquiring_load_exclusive(const Node *load);
1063
1064 // predicate controlling translation of StoreCM
1065 bool unnecessary_storestore(const Node *storecm);
1066 %}
1067
1068 source %{
1069
1070 // Optimizaton of volatile gets and puts
1071 // -------------------------------------
1072 //
1073 // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1074 // use to implement volatile reads and writes. For a volatile read
1075 // we simply need
1076 //
1077 // ldar<x>
1078 //
1079 // and for a volatile write we need
1080 //
1081 // stlr<x>
1082 //
1083 // Alternatively, we can implement them by pairing a normal
1084 // load/store with a memory barrier. For a volatile read we need
1085 //
1086 // ldr<x>
1087 // dmb ishld
1088 //
1089 // for a volatile write
1090 //
1091 // dmb ish
1092 // str<x>
1093 // dmb ish
1094 //
1095 // We can also use ldaxr and stlxr to implement compare and swap CAS
1096 // sequences. These are normally translated to an instruction
1097 // sequence like the following
1098 //
1099 // dmb ish
1100 // retry:
1101 // ldxr<x> rval raddr
1102 // cmp rval rold
1103 // b.ne done
1104 // stlxr<x> rval, rnew, rold
1105 // cbnz rval retry
1106 // done:
1107 // cset r0, eq
1108 // dmb ishld
1109 //
1110 // Note that the exclusive store is already using an stlxr
1111 // instruction. That is required to ensure visibility to other
1112 // threads of the exclusive write (assuming it succeeds) before that
1113 // of any subsequent writes.
1114 //
1115 // The following instruction sequence is an improvement on the above
1116 //
1117 // retry:
1118 // ldaxr<x> rval raddr
1119 // cmp rval rold
1120 // b.ne done
1121 // stlxr<x> rval, rnew, rold
1122 // cbnz rval retry
1123 // done:
1124 // cset r0, eq
1125 //
1126 // We don't need the leading dmb ish since the stlxr guarantees
1127 // visibility of prior writes in the case that the swap is
1128 // successful. Crucially we don't have to worry about the case where
1129 // the swap is not successful since no valid program should be
1130 // relying on visibility of prior changes by the attempting thread
1131 // in the case where the CAS fails.
1132 //
1133 // Similarly, we don't need the trailing dmb ishld if we substitute
1134 // an ldaxr instruction since that will provide all the guarantees we
1135 // require regarding observation of changes made by other threads
1136 // before any change to the CAS address observed by the load.
1137 //
1138 // In order to generate the desired instruction sequence we need to
1139 // be able to identify specific 'signature' ideal graph node
1140 // sequences which i) occur as a translation of a volatile reads or
1141 // writes or CAS operations and ii) do not occur through any other
1142 // translation or graph transformation. We can then provide
1143 // alternative aldc matching rules which translate these node
1144 // sequences to the desired machine code sequences. Selection of the
1145 // alternative rules can be implemented by predicates which identify
1146 // the relevant node sequences.
1147 //
1148 // The ideal graph generator translates a volatile read to the node
1149 // sequence
1150 //
1151 // LoadX[mo_acquire]
1152 // MemBarAcquire
1153 //
1154 // As a special case when using the compressed oops optimization we
1155 // may also see this variant
1156 //
1157 // LoadN[mo_acquire]
1158 // DecodeN
1159 // MemBarAcquire
1160 //
1161 // A volatile write is translated to the node sequence
1162 //
1163 // MemBarRelease
1164 // StoreX[mo_release] {CardMark}-optional
1165 // MemBarVolatile
1166 //
1193 // predicates need to detect its presence in order to correctly
1194 // select the desired adlc rules.
1195 //
1196 // Inlined unsafe volatile gets manifest as a somewhat different
1197 // node sequence to a normal volatile get
1198 //
1199 // MemBarCPUOrder
1200 // || \\
1201 // MemBarAcquire LoadX[mo_acquire]
1202 // ||
1203 // MemBarCPUOrder
1204 //
1205 // In this case the acquire membar does not directly depend on the
1206 // load. However, we can be sure that the load is generated from an
1207 // inlined unsafe volatile get if we see it dependent on this unique
1208 // sequence of membar nodes. Similarly, given an acquire membar we
1209 // can know that it was added because of an inlined unsafe volatile
1210 // get if it is fed and feeds a cpuorder membar and if its feed
1211 // membar also feeds an acquiring load.
1212 //
1213 // Finally an inlined (Unsafe) CAS operation is translated to the
1214 // following ideal graph
1215 //
1216 // MemBarRelease
1217 // MemBarCPUOrder
1218 // CompareAndSwapX {CardMark}-optional
1219 // MemBarCPUOrder
1220 // MemBarAcquire
1221 //
1222 // So, where we can identify these volatile read and write
1223 // signatures we can choose to plant either of the above two code
1224 // sequences. For a volatile read we can simply plant a normal
1225 // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1226 // also choose to inhibit translation of the MemBarAcquire and
1227 // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1228 //
1229 // When we recognise a volatile store signature we can choose to
1230 // plant at a dmb ish as a translation for the MemBarRelease, a
1231 // normal str<x> and then a dmb ish for the MemBarVolatile.
1232 // Alternatively, we can inhibit translation of the MemBarRelease
1233 // and MemBarVolatile and instead plant a simple stlr<x>
1234 // instruction.
1235 //
1236 // when we recognise a CAS signature we can choose to plant a dmb
1237 // ish as a translation for the MemBarRelease, the conventional
1238 // macro-instruction sequence for the CompareAndSwap node (which
1239 // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1240 // Alternatively, we can elide generation of the dmb instructions
1241 // and plant the alternative CompareAndSwap macro-instruction
1242 // sequence (which uses ldaxr<x>).
1243 //
1244 // Of course, the above only applies when we see these signature
1245 // configurations. We still want to plant dmb instructions in any
1246 // other cases where we may see a MemBarAcquire, MemBarRelease or
1247 // MemBarVolatile. For example, at the end of a constructor which
1248 // writes final/volatile fields we will see a MemBarRelease
1249 // instruction and this needs a 'dmb ish' lest we risk the
1250 // constructed object being visible without making the
1251 // final/volatile field writes visible.
1252 //
1253 // n.b. the translation rules below which rely on detection of the
1254 // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1255 // If we see anything other than the signature configurations we
1256 // always just translate the loads and stores to ldr<x> and str<x>
1257 // and translate acquire, release and volatile membars to the
1258 // relevant dmb instructions.
1259 //
1260
1261 // graph traversal helpers used for volatile put/get and CAS
1262 // optimization
1263
1264 // 1) general purpose helpers
1265
1266 // if node n is linked to a parent MemBarNode by an intervening
1267 // Control and Memory ProjNode return the MemBarNode otherwise return
1268 // NULL.
1269 //
1270 // n may only be a Load or a MemBar.
1271
1272 MemBarNode *parent_membar(const Node *n)
1273 {
1274 Node *ctl = NULL;
1275 Node *mem = NULL;
1276 Node *membar = NULL;
1277
1278 if (n->is_Load()) {
1279 ctl = n->lookup(LoadNode::Control);
1280 mem = n->lookup(LoadNode::Memory);
1281 } else if (n->is_MemBar()) {
1282 ctl = n->lookup(TypeFunc::Control);
1283 mem = n->lookup(TypeFunc::Memory);
1284 } else {
1285 return NULL;
1286 }
1287
1288 if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1289 return NULL;
1290 }
1291
1292 membar = ctl->lookup(0);
1293
1294 if (!membar || !membar->is_MemBar()) {
1295 return NULL;
1296 }
1297
1298 if (mem->lookup(0) != membar) {
1299 return NULL;
1300 }
1301
1302 return membar->as_MemBar();
1303 }
1304
1305 // if n is linked to a child MemBarNode by intervening Control and
1306 // Memory ProjNodes return the MemBarNode otherwise return NULL.
1307
1308 MemBarNode *child_membar(const MemBarNode *n)
1309 {
1310 ProjNode *ctl = n->proj_out(TypeFunc::Control);
1311 ProjNode *mem = n->proj_out(TypeFunc::Memory);
1312
1313 // MemBar needs to have both a Ctl and Mem projection
1314 if (! ctl || ! mem)
1315 return NULL;
1316
1317 MemBarNode *child = NULL;
1318 Node *x;
1319
1320 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1321 x = ctl->fast_out(i);
1322 // if we see a membar we keep hold of it. we may also see a new
1323 // arena copy of the original but it will appear later
1324 if (x->is_MemBar()) {
1325 child = x->as_MemBar();
1326 break;
1327 }
1328 }
1329
1330 if (child == NULL) {
1331 return NULL;
1332 }
1333
1334 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1335 x = mem->fast_out(i);
1336 // if we see a membar we keep hold of it. we may also see a new
1337 // arena copy of the original but it will appear later
1338 if (x == child) {
1339 return child;
1340 }
1341 }
1342 return NULL;
1343 }
1344
1345 // helper predicate use to filter candidates for a leading memory
1346 // barrier
1347 //
1348 // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1349 // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1350
1351 bool leading_membar(const MemBarNode *barrier)
1352 {
1353 int opcode = barrier->Opcode();
1354 // if this is a release membar we are ok
1355 if (opcode == Op_MemBarRelease) {
1356 return true;
1357 }
1358 // if its a cpuorder membar . . .
1359 if (opcode != Op_MemBarCPUOrder) {
1360 return false;
1361 }
1362 // then the parent has to be a release membar
1363 MemBarNode *parent = parent_membar(barrier);
1364 if (!parent) {
1365 return false;
1366 }
1367 opcode = parent->Opcode();
1368 return opcode == Op_MemBarRelease;
1369 }
1370
1371 // 2) card mark detection helper
1372
1373 // helper predicate which can be used to detect a volatile membar
1374 // introduced as part of a conditional card mark sequence either by
1375 // G1 or by CMS when UseCondCardMark is true.
1376 //
1377 // membar can be definitively determined to be part of a card mark
1378 // sequence if and only if all the following hold
1379 //
1380 // i) it is a MemBarVolatile
1381 //
1382 // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1383 // true
1384 //
1385 // iii) the node's Mem projection feeds a StoreCM node.
1386
1387 bool is_card_mark_membar(const MemBarNode *barrier)
1388 {
1389 if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1390 return false;
1391 }
1392
1393 if (barrier->Opcode() != Op_MemBarVolatile) {
1394 return false;
1395 }
1396
1397 ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1398
1399 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1400 Node *y = mem->fast_out(i);
1401 if (y->Opcode() == Op_StoreCM) {
1402 return true;
1403 }
1404 }
1405
1406 return false;
1407 }
1408
1409
1410 // 3) helper predicates to traverse volatile put or CAS graphs which
1411 // may contain GC barrier subgraphs
1412
1413 // Preamble
1414 // --------
1415 //
1416 // for volatile writes we can omit generating barriers and employ a
1417 // releasing store when we see a node sequence sequence with a
1418 // leading MemBarRelease and a trailing MemBarVolatile as follows
1419 //
1420 // MemBarRelease
1421 // { || } -- optional
1422 // {MemBarCPUOrder}
1423 // || \\
1424 // || StoreX[mo_release]
1425 // | \ /
1426 // | MergeMem
1427 // | /
1428 // MemBarVolatile
1429 //
1430 // where
1431 // || and \\ represent Ctl and Mem feeds via Proj nodes
1461 // ordering is required for both non-volatile and volatile
1462 // puts. Normally that means we need to translate a StoreCM using
1463 // the sequence
1464 //
1465 // dmb ishst
1466 // stlrb
1467 //
1468 // However, in the case of a volatile put if we can recognise this
1469 // configuration and plant an stlr for the object write then we can
1470 // omit the dmb and just plant an strb since visibility of the stlr
1471 // is ordered before visibility of subsequent stores. StoreCM nodes
1472 // also arise when using G1 or using CMS with conditional card
1473 // marking. In these cases (as we shall see) we don't need to insert
1474 // the dmb when translating StoreCM because there is already an
1475 // intervening StoreLoad barrier between it and the StoreP/N.
1476 //
1477 // It is also possible to perform the card mark conditionally on it
1478 // currently being unmarked in which case the volatile put graph
1479 // will look slightly different
1480 //
1481 // MemBarRelease____________________________________________
1482 // || \\ Ctl \ Ctl \ \\ Mem \
1483 // || StoreN/P[mo_release] CastP2X If LoadB |
1484 // | \ / \ |
1485 // | MergeMem . . . StoreB
1486 // | / /
1487 // || /
1488 // MemBarVolatile
1489 //
1490 // It is worth noting at this stage that both the above
1491 // configurations can be uniquely identified by checking that the
1492 // memory flow includes the following subgraph:
1493 //
1494 // MemBarRelease
1495 // {MemBarCPUOrder}
1496 // | \ . . .
1497 // | StoreX[mo_release] . . .
1498 // | /
1499 // MergeMem
1500 // |
1501 // MemBarVolatile
1502 //
1503 // This is referred to as a *normal* subgraph. It can easily be
1504 // detected starting from any candidate MemBarRelease,
1505 // StoreX[mo_release] or MemBarVolatile.
1506 //
1507 // A simple variation on this normal case occurs for an unsafe CAS
1508 // operation. The basic graph for a non-object CAS is
1509 //
1510 // MemBarRelease
1511 // ||
1512 // MemBarCPUOrder
1513 // || \\ . . .
1514 // || CompareAndSwapX
1515 // || |
1516 // || SCMemProj
1517 // | \ /
1518 // | MergeMem
1519 // | /
1520 // MemBarCPUOrder
1521 // ||
1522 // MemBarAcquire
1523 //
1524 // The same basic variations on this arrangement (mutatis mutandis)
1525 // occur when a card mark is introduced. i.e. we se the same basic
1526 // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1527 // tail of the graph is a pair comprising a MemBarCPUOrder +
1528 // MemBarAcquire.
1529 //
1530 // So, in the case of a CAS the normal graph has the variant form
1531 //
1532 // MemBarRelease
1533 // MemBarCPUOrder
1534 // | \ . . .
1535 // | CompareAndSwapX . . .
1536 // | |
1537 // | SCMemProj
1538 // | / . . .
1539 // MergeMem
1540 // |
1541 // MemBarCPUOrder
1542 // MemBarAcquire
1543 //
1544 // This graph can also easily be detected starting from any
1545 // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1546 //
1547 // the code below uses two helper predicates, leading_to_normal and
1548 // normal_to_leading to identify these normal graphs, one validating
1549 // the layout starting from the top membar and searching down and
1550 // the other validating the layout starting from the lower membar
1551 // and searching up.
1552 //
1553 // There are two special case GC configurations when a normal graph
1554 // may not be generated: when using G1 (which always employs a
1555 // conditional card mark); and when using CMS with conditional card
1556 // marking configured. These GCs are both concurrent rather than
1557 // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1558 // graph between the leading and trailing membar nodes, in
1559 // particular enforcing stronger memory serialisation beween the
1560 // object put and the corresponding conditional card mark. CMS
1561 // employs a post-write GC barrier while G1 employs both a pre- and
1562 // post-write GC barrier. Of course the extra nodes may be absent --
1563 // they are only inserted for object puts. This significantly
1564 // complicates the task of identifying whether a MemBarRelease,
1565 // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1566 // when using these GC configurations (see below). It adds similar
1567 // complexity to the task of identifying whether a MemBarRelease,
1568 // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1569 //
1570 // In both cases the post-write subtree includes an auxiliary
1571 // MemBarVolatile (StoreLoad barrier) separating the object put and
1572 // the read of the corresponding card. This poses two additional
1573 // problems.
1574 //
1575 // Firstly, a card mark MemBarVolatile needs to be distinguished
1576 // from a normal trailing MemBarVolatile. Resolving this first
1577 // problem is straightforward: a card mark MemBarVolatile always
1578 // projects a Mem feed to a StoreCM node and that is a unique marker
1579 //
1580 // MemBarVolatile (card mark)
1581 // C | \ . . .
1582 // | StoreCM . . .
1583 // . . .
1584 //
1585 // The second problem is how the code generator is to translate the
1586 // card mark barrier? It always needs to be translated to a "dmb
1587 // ish" instruction whether or not it occurs as part of a volatile
1588 // put. A StoreLoad barrier is needed after the object put to ensure
1589 // i) visibility to GC threads of the object put and ii) visibility
1590 // to the mutator thread of any card clearing write by a GC
1591 // thread. Clearly a normal store (str) will not guarantee this
1592 // ordering but neither will a releasing store (stlr). The latter
1593 // guarantees that the object put is visible but does not guarantee
1594 // that writes by other threads have also been observed.
1595 //
1596 // So, returning to the task of translating the object put and the
1597 // leading/trailing membar nodes: what do the non-normal node graph
1598 // look like for these 2 special cases? and how can we determine the
1599 // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1600 // in both normal and non-normal cases?
1601 //
1602 // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1603 // which selects conditonal execution based on the value loaded
1604 // (LoadB) from the card. Ctl and Mem are fed to the If via an
1605 // intervening StoreLoad barrier (MemBarVolatile).
1606 //
1607 // So, with CMS we may see a node graph for a volatile object store
1608 // which looks like this
1609 //
1610 // MemBarRelease
1611 // MemBarCPUOrder_(leading)__________________
1612 // C | M \ \\ C \
1613 // | \ StoreN/P[mo_release] CastP2X
1614 // | Bot \ /
1615 // | MergeMem
1616 // | /
1617 // MemBarVolatile (card mark)
1618 // C | || M |
1619 // | LoadB |
1620 // | | |
1621 // | Cmp |\
1622 // | / | \
1623 // If | \
1624 // | \ | \
1625 // IfFalse IfTrue | \
1626 // \ / \ | \
1627 // \ / StoreCM |
1628 // \ / | |
1629 // Region . . . |
1630 // | \ /
1631 // | . . . \ / Bot
1632 // | MergeMem
1633 // | |
1634 // MemBarVolatile (trailing)
1635 //
1636 // The first MergeMem merges the AliasIdxBot Mem slice from the
1637 // leading membar and the oopptr Mem slice from the Store into the
1638 // card mark membar. The trailing MergeMem merges the AliasIdxBot
1639 // Mem slice from the card mark membar and the AliasIdxRaw slice
1640 // from the StoreCM into the trailing membar (n.b. the latter
1641 // proceeds via a Phi associated with the If region).
1642 //
1643 // The graph for a CAS varies slightly, the obvious difference being
1644 // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1645 // and the trailing MemBarVolatile by a MemBarCPUOrder +
1646 // MemBarAcquire pair. The other important difference is that the
1647 // CompareAndSwap node's SCMemProj is not merged into the card mark
1648 // membar - it still feeds the trailing MergeMem. This also means
1649 // that the card mark membar receives its Mem feed directly from the
1650 // leading membar rather than via a MergeMem.
1651 //
1652 // MemBarRelease
1653 // MemBarCPUOrder__(leading)_________________________
1654 // || \\ C \
1655 // MemBarVolatile (card mark) CompareAndSwapN/P CastP2X
1656 // C | || M | |
1657 // | LoadB | ______/|
1658 // | | | / |
1659 // | Cmp | / SCMemProj
1660 // | / | / |
1661 // If | / /
1662 // | \ | / /
1663 // IfFalse IfTrue | / /
1664 // \ / \ |/ prec /
1665 // \ / StoreCM /
1666 // \ / | /
1667 // Region . . . /
1668 // | \ /
1669 // | . . . \ / Bot
1670 // | MergeMem
1671 // | |
1672 // MemBarCPUOrder
1673 // MemBarAcquire (trailing)
1674 //
1675 // This has a slightly different memory subgraph to the one seen
1676 // previously but the core of it is the same as for the CAS normal
1677 // sungraph
1678 //
1679 // MemBarRelease
1680 // MemBarCPUOrder____
1681 // || \ . . .
1682 // MemBarVolatile CompareAndSwapX . . .
1683 // | \ |
1684 // . . . SCMemProj
1685 // | / . . .
1686 // MergeMem
1687 // |
1688 // MemBarCPUOrder
1689 // MemBarAcquire
1690 //
1691 //
1692 // G1 is quite a lot more complicated. The nodes inserted on behalf
1693 // of G1 may comprise: a pre-write graph which adds the old value to
1694 // the SATB queue; the releasing store itself; and, finally, a
1695 // post-write graph which performs a card mark.
1696 //
1697 // The pre-write graph may be omitted, but only when the put is
1698 // writing to a newly allocated (young gen) object and then only if
1699 // there is a direct memory chain to the Initialize node for the
1700 // object allocation. This will not happen for a volatile put since
1701 // any memory chain passes through the leading membar.
1702 //
1703 // The pre-write graph includes a series of 3 If tests. The outermost
1704 // If tests whether SATB is enabled (no else case). The next If tests
1705 // whether the old value is non-NULL (no else case). The third tests
1706 // whether the SATB queue index is > 0, if so updating the queue. The
1707 // else case for this third If calls out to the runtime to allocate a
1708 // new queue buffer.
1709 //
1710 // So with G1 the pre-write and releasing store subgraph looks like
1711 // this (the nested Ifs are omitted).
1723 // | \ |
1724 // | . . . \ |
1725 // | / | / | |
1726 // Region Phi[M] | |
1727 // | \ | | |
1728 // | \_____ | ___ | |
1729 // C | C \ | C \ M | |
1730 // | CastP2X | StoreN/P[mo_release] |
1731 // | | | |
1732 // C | M | M | M |
1733 // \ | | /
1734 // . . .
1735 // (post write subtree elided)
1736 // . . .
1737 // C \ M /
1738 // MemBarVolatile (trailing)
1739 //
1740 // n.b. the LoadB in this subgraph is not the card read -- it's a
1741 // read of the SATB queue active flag.
1742 //
1743 // Once again the CAS graph is a minor variant on the above with the
1744 // expected substitutions of CompareAndSawpX for StoreN/P and
1745 // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1746 //
1747 // The G1 post-write subtree is also optional, this time when the
1748 // new value being written is either null or can be identified as a
1749 // newly allocated (young gen) object with no intervening control
1750 // flow. The latter cannot happen but the former may, in which case
1751 // the card mark membar is omitted and the memory feeds form the
1752 // leading membar and the SToreN/P are merged direct into the
1753 // trailing membar as per the normal subgraph. So, the only special
1754 // case which arises is when the post-write subgraph is generated.
1755 //
1756 // The kernel of the post-write G1 subgraph is the card mark itself
1757 // which includes a card mark memory barrier (MemBarVolatile), a
1758 // card test (LoadB), and a conditional update (If feeding a
1759 // StoreCM). These nodes are surrounded by a series of nested Ifs
1760 // which try to avoid doing the card mark. The top level If skips if
1761 // the object reference does not cross regions (i.e. it tests if
1762 // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1763 // need not be recorded. The next If, which skips on a NULL value,
1764 // may be absent (it is not generated if the type of value is >=
1765 // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1766 // checking if card_val != young). n.b. although this test requires
1767 // a pre-read of the card it can safely be done before the StoreLoad
1768 // barrier. However that does not bypass the need to reread the card
1769 // after the barrier.
1770 //
1771 // (pre-write subtree elided)
1772 // . . . . . . . . . . . .
1820 // \ MergeMem
1821 // \ /
1822 // MemBarVolatile
1823 //
1824 // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1825 // from the leading membar and the oopptr Mem slice from the Store
1826 // into the card mark membar i.e. the memory flow to the card mark
1827 // membar still looks like a normal graph.
1828 //
1829 // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1830 // Mem slices (from the StoreCM and other card mark queue stores).
1831 // However in this case the AliasIdxBot Mem slice does not come
1832 // direct from the card mark membar. It is merged through a series
1833 // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1834 // from the leading membar with the Mem feed from the card mark
1835 // membar. Each Phi corresponds to one of the Ifs which may skip
1836 // around the card mark membar. So when the If implementing the NULL
1837 // value check has been elided the total number of Phis is 2
1838 // otherwise it is 3.
1839 //
1840 // The CAS graph when using G1GC also includes a pre-write subgraph
1841 // and an optional post-write subgraph. Teh sam evarioations are
1842 // introduced as for CMS with conditional card marking i.e. the
1843 // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1844 // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1845 // Mem feed from the CompareAndSwapP/N includes a precedence
1846 // dependency feed to the StoreCM and a feed via an SCMemProj to the
1847 // trailing membar. So, as before the configuration includes the
1848 // normal CAS graph as a subgraph of the memory flow.
1849 //
1850 // So, the upshot is that in all cases the volatile put graph will
1851 // include a *normal* memory subgraph betwen the leading membar and
1852 // its child membar, either a volatile put graph (including a
1853 // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1854 // When that child is not a card mark membar then it marks the end
1855 // of the volatile put or CAS subgraph. If the child is a card mark
1856 // membar then the normal subgraph will form part of a volatile put
1857 // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1858 // to a trailing barrier via a MergeMem. That feed is either direct
1859 // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1860 // memory flow (for G1).
1861 //
1862 // The predicates controlling generation of instructions for store
1863 // and barrier nodes employ a few simple helper functions (described
1864 // below) which identify the presence or absence of all these
1865 // subgraph configurations and provide a means of traversing from
1866 // one node in the subgraph to another.
1867
1868 // is_CAS(int opcode)
1869 //
1870 // return true if opcode is one of the possible CompareAndSwapX
1871 // values otherwise false.
1872
1873 bool is_CAS(int opcode)
1874 {
1875 return (opcode == Op_CompareAndSwapI ||
1876 opcode == Op_CompareAndSwapL ||
1877 opcode == Op_CompareAndSwapN ||
1878 opcode == Op_CompareAndSwapP);
1879 }
1880
1881 // leading_to_normal
1882 //
1883 //graph traversal helper which detects the normal case Mem feed from
1884 // a release membar (or, optionally, its cpuorder child) to a
1885 // dependent volatile membar i.e. it ensures that one or other of
1886 // the following Mem flow subgraph is present.
1887 //
1888 // MemBarRelease
1889 // MemBarCPUOrder {leading}
1890 // | \ . . .
1891 // | StoreN/P[mo_release] . . .
1892 // | /
1893 // MergeMem
1894 // |
1895 // MemBarVolatile {trailing or card mark}
1896 //
1897 // MemBarRelease
1898 // MemBarCPUOrder {leading}
1899 // | \ . . .
1900 // | CompareAndSwapX . . .
1901 // |
1902 // . . . SCMemProj
1903 // \ |
1904 // | MergeMem
1905 // | /
1906 // MemBarCPUOrder
1907 // MemBarAcquire {trailing}
1908 //
1909 // if the correct configuration is present returns the trailing
1910 // membar otherwise NULL.
1911 //
1912 // the input membar is expected to be either a cpuorder membar or a
1913 // release membar. in the latter case it should not have a cpu membar
1914 // child.
1915 //
1916 // the returned value may be a card mark or trailing membar
1917 //
1918
1919 MemBarNode *leading_to_normal(MemBarNode *leading)
1920 {
1921 assert((leading->Opcode() == Op_MemBarRelease ||
1922 leading->Opcode() == Op_MemBarCPUOrder),
1923 "expecting a volatile or cpuroder membar!");
1924
1925 // check the mem flow
1926 ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1927
1928 if (!mem) {
1929 return NULL;
1930 }
1931
1932 Node *x = NULL;
1933 StoreNode * st = NULL;
1934 LoadStoreNode *cas = NULL;
1935 MergeMemNode *mm = NULL;
1936
1937 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1938 x = mem->fast_out(i);
1939 if (x->is_MergeMem()) {
1940 if (mm != NULL) {
1941 return NULL;
1942 }
1943 // two merge mems is one too many
1944 mm = x->as_MergeMem();
1945 } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1946 // two releasing stores/CAS nodes is one too many
1947 if (st != NULL || cas != NULL) {
1948 return NULL;
1949 }
1950 st = x->as_Store();
1951 } else if (is_CAS(x->Opcode())) {
1952 if (st != NULL || cas != NULL) {
1953 return NULL;
1954 }
1955 cas = x->as_LoadStore();
1956 }
1957 }
1958
1959 // must have a store or a cas
1960 if (!st && !cas) {
1961 return NULL;
1962 }
1963
1964 // must have a merge if we also have st
1965 if (st && !mm) {
1966 return NULL;
1967 }
1968
1969 Node *y = NULL;
1970 if (cas) {
1971 // look for an SCMemProj
1972 for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
1973 x = cas->fast_out(i);
1974 if (x->is_Proj()) {
1975 y = x;
1976 break;
1977 }
1978 }
1979 if (y == NULL) {
1980 return NULL;
1981 }
1982 // the proj must feed a MergeMem
1983 for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
1984 x = y->fast_out(i);
1985 if (x->is_MergeMem()) {
1986 mm = x->as_MergeMem();
1987 break;
1988 }
1989 }
1990 if (mm == NULL)
1991 return NULL;
1992 } else {
1993 // ensure the store feeds the existing mergemem;
1994 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1995 if (st->fast_out(i) == mm) {
1996 y = st;
1997 break;
1998 }
1999 }
2000 if (y == NULL) {
2001 return NULL;
2002 }
2003 }
2004
2005 MemBarNode *mbar = NULL;
2006 // ensure the merge feeds to the expected type of membar
2007 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2008 x = mm->fast_out(i);
2009 if (x->is_MemBar()) {
2010 int opcode = x->Opcode();
2011 if (opcode == Op_MemBarVolatile && st) {
2012 mbar = x->as_MemBar();
2013 } else if (cas && opcode == Op_MemBarCPUOrder) {
2014 MemBarNode *y = x->as_MemBar();
2015 y = child_membar(y);
2016 if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2017 mbar = y;
2018 }
2019 }
2020 break;
2021 }
2022 }
2023
2024 return mbar;
2025 }
2026
2027 // normal_to_leading
2028 //
2029 // graph traversal helper which detects the normal case Mem feed
2030 // from either a card mark or a trailing membar to a preceding
2031 // release membar (optionally its cpuorder child) i.e. it ensures
2032 // that one or other of the following Mem flow subgraphs is present.
2033 //
2034 // MemBarRelease
2035 // MemBarCPUOrder {leading}
2036 // | \ . . .
2037 // | StoreN/P[mo_release] . . .
2038 // | /
2039 // MergeMem
2040 // |
2041 // MemBarVolatile {card mark or trailing}
2042 //
2043 // MemBarRelease
2044 // MemBarCPUOrder {leading}
2045 // | \ . . .
2046 // | CompareAndSwapX . . .
2047 // |
2048 // . . . SCMemProj
2049 // \ |
2050 // | MergeMem
2051 // | /
2052 // MemBarCPUOrder
2053 // MemBarAcquire {trailing}
2054 //
2055 // this predicate checks for the same flow as the previous predicate
2056 // but starting from the bottom rather than the top.
2057 //
2058 // if the configuration is present returns the cpuorder member for
2059 // preference or when absent the release membar otherwise NULL.
2060 //
2061 // n.b. the input membar is expected to be a MemBarVolatile but
2062 // need not be a card mark membar.
2063
2064 MemBarNode *normal_to_leading(const MemBarNode *barrier)
2065 {
2066 // input must be a volatile membar
2067 assert((barrier->Opcode() == Op_MemBarVolatile ||
2068 barrier->Opcode() == Op_MemBarAcquire),
2069 "expecting a volatile or an acquire membar");
2070 Node *x;
2071 bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2072
2073 // if we have an acquire membar then it must be fed via a CPUOrder
2074 // membar
2075
2076 if (is_cas) {
2077 // skip to parent barrier which must be a cpuorder
2078 x = parent_membar(barrier);
2079 if (x->Opcode() != Op_MemBarCPUOrder)
2080 return NULL;
2081 } else {
2082 // start from the supplied barrier
2083 x = (Node *)barrier;
2084 }
2085
2086 // the Mem feed to the membar should be a merge
2087 x = x ->in(TypeFunc::Memory);
2088 if (!x->is_MergeMem())
2089 return NULL;
2090
2091 MergeMemNode *mm = x->as_MergeMem();
2092
2093 if (is_cas) {
2094 // the merge should be fed from the CAS via an SCMemProj node
2095 x = NULL;
2096 for (uint idx = 1; idx < mm->req(); idx++) {
2097 if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2098 x = mm->in(idx);
2099 break;
2100 }
2101 }
2102 if (x == NULL) {
2103 return NULL;
2104 }
2105 // check for a CAS feeding this proj
2106 x = x->in(0);
2107 int opcode = x->Opcode();
2108 if (!is_CAS(opcode)) {
2109 return NULL;
2110 }
2111 // the CAS should get its mem feed from the leading membar
2112 x = x->in(MemNode::Memory);
2113 } else {
2114 // the merge should get its Bottom mem feed from the leading membar
2115 x = mm->in(Compile::AliasIdxBot);
2116 }
2117
2118 // ensure this is a non control projection
2119 if (!x->is_Proj() || x->is_CFG()) {
2120 return NULL;
2121 }
2122 // if it is fed by a membar that's the one we want
2123 x = x->in(0);
2124
2125 if (!x->is_MemBar()) {
2126 return NULL;
2127 }
2128
2129 MemBarNode *leading = x->as_MemBar();
2130 // reject invalid candidates
2131 if (!leading_membar(leading)) {
2132 return NULL;
2133 }
2134
2135 // ok, we have a leading membar, now for the sanity clauses
2136
2137 // the leading membar must feed Mem to a releasing store or CAS
2138 ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2139 StoreNode *st = NULL;
2140 LoadStoreNode *cas = NULL;
2141 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2142 x = mem->fast_out(i);
2143 if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2144 // two stores or CASes is one too many
2145 if (st != NULL || cas != NULL) {
2146 return NULL;
2147 }
2148 st = x->as_Store();
2149 } else if (is_CAS(x->Opcode())) {
2150 if (st != NULL || cas != NULL) {
2151 return NULL;
2152 }
2153 cas = x->as_LoadStore();
2154 }
2155 }
2156
2157 // we should not have both a store and a cas
2158 if (st == NULL & cas == NULL) {
2159 return NULL;
2160 }
2161
2162 if (st == NULL) {
2163 // nothing more to check
2164 return leading;
2165 } else {
2166 // we should not have a store if we started from an acquire
2167 if (is_cas) {
2168 return NULL;
2169 }
2170
2171 // the store should feed the merge we used to get here
2172 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2173 if (st->fast_out(i) == mm) {
2174 return leading;
2175 }
2176 }
2177 }
2178
2179 return NULL;
2180 }
2181
2182 // card_mark_to_trailing
2183 //
2184 // graph traversal helper which detects extra, non-normal Mem feed
2185 // from a card mark volatile membar to a trailing membar i.e. it
2186 // ensures that one of the following three GC post-write Mem flow
2187 // subgraphs is present.
2188 //
2189 // 1)
2190 // . . .
2191 // |
2192 // MemBarVolatile (card mark)
2193 // | |
2194 // | StoreCM
2195 // | |
2196 // | . . .
2197 // Bot | /
2198 // MergeMem
2199 // |
2200 // |
2201 // MemBarVolatile {trailing}
2202 //
2203 // 2)
2204 // MemBarRelease/CPUOrder (leading)
2205 // |
2206 // |
2207 // |\ . . .
2208 // | \ |
2209 // | \ MemBarVolatile (card mark)
2210 // | \ | |
2211 // \ \ | StoreCM . . .
2212 // \ \ |
2213 // \ Phi
2214 // \ /
2215 // Phi . . .
2216 // Bot | /
2217 // MergeMem
2218 // |
2219 // MemBarVolatile {trailing}
2220 //
2221 //
2222 // 3)
2223 // MemBarRelease/CPUOrder (leading)
2224 // |
2225 // |\
2226 // | \
2227 // | \ . . .
2228 // | \ |
2229 // |\ \ MemBarVolatile (card mark)
2230 // | \ \ | |
2231 // | \ \ | StoreCM . . .
2232 // | \ \ |
2233 // \ \ Phi
2234 // \ \ /
2235 // \ Phi
2236 // \ /
2237 // Phi . . .
2238 // Bot | /
2239 // MergeMem
2240 // |
2241 // |
2242 // MemBarVolatile {trailing}
2243 //
2244 // configuration 1 is only valid if UseConcMarkSweepGC &&
2245 // UseCondCardMark
2246 //
2247 // configurations 2 and 3 are only valid if UseG1GC.
2248 //
2249 // if a valid configuration is present returns the trailing membar
2250 // otherwise NULL.
2251 //
2252 // n.b. the supplied membar is expected to be a card mark
2253 // MemBarVolatile i.e. the caller must ensure the input node has the
2254 // correct operand and feeds Mem to a StoreCM node
2255
2256 MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2257 {
2258 // input must be a card mark volatile membar
2259 assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2260
2261 Node *feed = barrier->proj_out(TypeFunc::Memory);
2262 Node *x;
2272 x = feed->fast_out(i);
2273 // the correct Phi will be merging a Bot memory slice
2274 if (x->is_MergeMem()) {
2275 mm = x->as_MergeMem();
2276 break;
2277 }
2278 }
2279 if (mm) {
2280 retry_feed = false;
2281 } else if (UseG1GC & phicount++ < MAX_PHIS) {
2282 // the barrier may feed indirectly via one or two Phi nodes
2283 PhiNode *phi = NULL;
2284 for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2285 x = feed->fast_out(i);
2286 // the correct Phi will be merging a Bot memory slice
2287 if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2288 phi = x->as_Phi();
2289 break;
2290 }
2291 }
2292 if (!phi) {
2293 return NULL;
2294 }
2295 // look for another merge below this phi
2296 feed = phi;
2297 } else {
2298 // couldn't find a merge
2299 return NULL;
2300 }
2301 }
2302
2303 // sanity check this feed turns up as the expected slice
2304 assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2305
2306 MemBarNode *trailing = NULL;
2307 // be sure we have a trailing membar the merge
2308 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2309 x = mm->fast_out(i);
2310 if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2311 trailing = x->as_MemBar();
2312 break;
2313 }
2314 }
2315
2316 return trailing;
2317 }
2318
2319 // trailing_to_card_mark
2320 //
2321 // graph traversal helper which detects extra, non-normal Mem feed
2322 // from a trailing volatile membar to a preceding card mark volatile
2323 // membar i.e. it identifies whether one of the three possible extra
2324 // GC post-write Mem flow subgraphs is present
2325 //
2326 // this predicate checks for the same flow as the previous predicate
2327 // but starting from the bottom rather than the top.
2328 //
2329 // if the configuration is present returns the card mark membar
2330 // otherwise NULL
2331 //
2332 // n.b. the supplied membar is expected to be a trailing
2333 // MemBarVolatile i.e. the caller must ensure the input node has the
2334 // correct opcode
2335
2336 MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2337 {
2338 assert(trailing->Opcode() == Op_MemBarVolatile,
2339 "expecting a volatile membar");
2340 assert(!is_card_mark_membar(trailing),
2341 "not expecting a card mark membar");
2342
2343 // the Mem feed to the membar should be a merge
2344 Node *x = trailing->in(TypeFunc::Memory);
2345 if (!x->is_MergeMem()) {
2346 return NULL;
2347 }
2348
2349 MergeMemNode *mm = x->as_MergeMem();
2350
2351 x = mm->in(Compile::AliasIdxBot);
2352 // with G1 we may possibly see a Phi or two before we see a Memory
2353 // Proj from the card mark membar
2354
2355 const int MAX_PHIS = 3; // max phis we will search through
2356 int phicount = 0; // current search count
2357
2358 bool retry_feed = !x->is_Proj();
2359
2360 while (retry_feed) {
2361 if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2362 PhiNode *phi = x->as_Phi();
2363 ProjNode *proj = NULL;
2364 PhiNode *nextphi = NULL;
2365 bool found_leading = false;
2366 for (uint i = 1; i < phi->req(); i++) {
2367 x = phi->in(i);
2380 }
2381 // if we found a correct looking proj then retry from there
2382 // otherwise we must see a leading and a phi or this the
2383 // wrong config
2384 if (proj != NULL) {
2385 x = proj;
2386 retry_feed = false;
2387 } else if (found_leading && nextphi != NULL) {
2388 // retry from this phi to check phi2
2389 x = nextphi;
2390 } else {
2391 // not what we were looking for
2392 return NULL;
2393 }
2394 } else {
2395 return NULL;
2396 }
2397 }
2398 // the proj has to come from the card mark membar
2399 x = x->in(0);
2400 if (!x->is_MemBar()) {
2401 return NULL;
2402 }
2403
2404 MemBarNode *card_mark_membar = x->as_MemBar();
2405
2406 if (!is_card_mark_membar(card_mark_membar)) {
2407 return NULL;
2408 }
2409
2410 return card_mark_membar;
2411 }
2412
2413 // trailing_to_leading
2414 //
2415 // graph traversal helper which checks the Mem flow up the graph
2416 // from a (non-card mark) trailing membar attempting to locate and
2417 // return an associated leading membar. it first looks for a
2418 // subgraph in the normal configuration (relying on helper
2419 // normal_to_leading). failing that it then looks for one of the
2420 // possible post-write card mark subgraphs linking the trailing node
2421 // to a the card mark membar (relying on helper
2422 // trailing_to_card_mark), and then checks that the card mark membar
2423 // is fed by a leading membar (once again relying on auxiliary
2424 // predicate normal_to_leading).
2425 //
2426 // if the configuration is valid returns the cpuorder member for
2427 // preference or when absent the release membar otherwise NULL.
2428 //
2429 // n.b. the input membar is expected to be either a volatile or
2430 // acquire membar but in the former case must *not* be a card mark
2431 // membar.
2432
2433 MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2434 {
2435 assert((trailing->Opcode() == Op_MemBarAcquire ||
2436 trailing->Opcode() == Op_MemBarVolatile),
2437 "expecting an acquire or volatile membar");
2438 assert((trailing->Opcode() != Op_MemBarVolatile ||
2439 !is_card_mark_membar(trailing)),
2440 "not expecting a card mark membar");
2441
2442 MemBarNode *leading = normal_to_leading(trailing);
2443
2444 if (leading) {
2445 return leading;
2446 }
2447
2448 // nothing more to do if this is an acquire
2449 if (trailing->Opcode() == Op_MemBarAcquire) {
2450 return NULL;
2451 }
2452
2453 MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2454
2455 if (!card_mark_membar) {
2456 return NULL;
2457 }
2458
2459 return normal_to_leading(card_mark_membar);
2460 }
2461
2462 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2463
2464 bool unnecessary_acquire(const Node *barrier)
2465 {
2466 assert(barrier->is_MemBar(), "expecting a membar");
2467
2468 if (UseBarriersForVolatile) {
2469 // we need to plant a dmb
2470 return false;
2471 }
2472
2473 // a volatile read derived from bytecode (or also from an inlined
2474 // SHA field read via LibraryCallKit::load_field_from_object)
2475 // manifests as a LoadX[mo_acquire] followed by an acquire membar
2476 // with a bogus read dependency on it's preceding load. so in those
2477 // cases we will find the load node at the PARMS offset of the
2478 // acquire membar. n.b. there may be an intervening DecodeN node.
2479 //
2480 // a volatile load derived from an inlined unsafe field access
2481 // manifests as a cpuorder membar with Ctl and Mem projections
2482 // feeding both an acquire membar and a LoadX[mo_acquire]. The
2483 // acquire then feeds another cpuorder membar via Ctl and Mem
2484 // projections. The load has no output dependency on these trailing
2485 // membars because subsequent nodes inserted into the graph take
2486 // their control feed from the final membar cpuorder meaning they
2487 // are all ordered after the load.
2488
2489 Node *x = barrier->lookup(TypeFunc::Parms);
2490 if (x) {
2491 // we are starting from an acquire and it has a fake dependency
2492 //
2493 // need to check for
2494 //
2495 // LoadX[mo_acquire]
2496 // { |1 }
2497 // {DecodeN}
2498 // |Parms
2499 // MemBarAcquire*
2500 //
2501 // where * tags node we were passed
2502 // and |k means input k
2503 if (x->is_DecodeNarrowPtr()) {
2504 x = x->in(1);
2505 }
2506
2507 return (x->is_Load() && x->as_Load()->is_acquire());
2508 }
2509
2510 // now check for an unsafe volatile get
2511
2512 // need to check for
2513 //
2514 // MemBarCPUOrder
2515 // || \\
2516 // MemBarAcquire* LoadX[mo_acquire]
2517 // ||
2518 // MemBarCPUOrder
2519 //
2520 // where * tags node we were passed
2521 // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2522
2523 // check for a parent MemBarCPUOrder
2524 ProjNode *ctl;
2525 ProjNode *mem;
2526 MemBarNode *parent = parent_membar(barrier);
2527 if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2528 return false;
2529 ctl = parent->proj_out(TypeFunc::Control);
2530 mem = parent->proj_out(TypeFunc::Memory);
2531 if (!ctl || !mem) {
2532 return false;
2533 }
2534 // ensure the proj nodes both feed a LoadX[mo_acquire]
2535 LoadNode *ld = NULL;
2536 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2537 x = ctl->fast_out(i);
2538 // if we see a load we keep hold of it and stop searching
2539 if (x->is_Load()) {
2540 ld = x->as_Load();
2541 break;
2542 }
2543 }
2544 // it must be an acquiring load
2545 if (ld && ld->is_acquire()) {
2546
2547 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2548 x = mem->fast_out(i);
2549 // if we see the same load we drop it and stop searching
2550 if (x == ld) {
2551 ld = NULL;
2552 break;
2553 }
2554 }
2555 // we must have dropped the load
2556 if (ld == NULL) {
2557 // check for a child cpuorder membar
2558 MemBarNode *child = child_membar(barrier->as_MemBar());
2559 if (child && child->Opcode() != Op_MemBarCPUOrder)
2560 return true;
2561 }
2562 }
2563
2564 // final option for unnecessary mebar is that it is a trailing node
2565 // belonging to a CAS
2566
2567 MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2568
2569 return leading != NULL;
2570 }
2571
2572 bool needs_acquiring_load(const Node *n)
2573 {
2574 assert(n->is_Load(), "expecting a load");
2575 if (UseBarriersForVolatile) {
2576 // we use a normal load and a dmb
2577 return false;
2578 }
2579
2580 LoadNode *ld = n->as_Load();
2581
2582 if (!ld->is_acquire()) {
2583 return false;
2584 }
2585
2586 // check if this load is feeding an acquire membar
2587 //
2588 // LoadX[mo_acquire]
2589 // { |1 }
2590 // {DecodeN}
2591 // |Parms
2592 // MemBarAcquire*
2593 //
2594 // where * tags node we were passed
2595 // and |k means input k
2596
2597 Node *start = ld;
2598 Node *mbacq = NULL;
2599
2600 // if we hit a DecodeNarrowPtr we reset the start node and restart
2601 // the search through the outputs
2602 restart:
2603
2604 for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2614 }
2615
2616 if (mbacq) {
2617 return true;
2618 }
2619
2620 // now check for an unsafe volatile get
2621
2622 // check if Ctl and Proj feed comes from a MemBarCPUOrder
2623 //
2624 // MemBarCPUOrder
2625 // || \\
2626 // MemBarAcquire* LoadX[mo_acquire]
2627 // ||
2628 // MemBarCPUOrder
2629
2630 MemBarNode *membar;
2631
2632 membar = parent_membar(ld);
2633
2634 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2635 return false;
2636 }
2637
2638 // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2639
2640 membar = child_membar(membar);
2641
2642 if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2643 return false;
2644 }
2645
2646 membar = child_membar(membar);
2647
2648 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2649 return false;
2650 }
2651
2652 return true;
2653 }
2654
2655 bool unnecessary_release(const Node *n)
2656 {
2657 assert((n->is_MemBar() &&
2658 n->Opcode() == Op_MemBarRelease),
2659 "expecting a release membar");
2660
2661 if (UseBarriersForVolatile) {
2662 // we need to plant a dmb
2663 return false;
2664 }
2665
2666 // if there is a dependent CPUOrder barrier then use that as the
2667 // leading
2668
2669 MemBarNode *barrier = n->as_MemBar();
2670 // check for an intervening cpuorder membar
2671 MemBarNode *b = child_membar(barrier);
2672 if (b && b->Opcode() == Op_MemBarCPUOrder) {
2673 // ok, so start the check from the dependent cpuorder barrier
2674 barrier = b;
2675 }
2676
2677 // must start with a normal feed
2678 MemBarNode *child_barrier = leading_to_normal(barrier);
2679
2680 if (!child_barrier) {
2681 return false;
2682 }
2683
2684 if (!is_card_mark_membar(child_barrier)) {
2685 // this is the trailing membar and we are done
2686 return true;
2687 }
2688
2689 // must be sure this card mark feeds a trailing membar
2690 MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2691 return (trailing != NULL);
2692 }
2693
2694 bool unnecessary_volatile(const Node *n)
2695 {
2696 // assert n->is_MemBar();
2697 if (UseBarriersForVolatile) {
2698 // we need to plant a dmb
2699 return false;
2700 }
2701
2702 MemBarNode *mbvol = n->as_MemBar();
2703
2704 // first we check if this is part of a card mark. if so then we have
2705 // to generate a StoreLoad barrier
2706
2707 if (is_card_mark_membar(mbvol)) {
2708 return false;
2709 }
2710
2711 // ok, if it's not a card mark then we still need to check if it is
2712 // a trailing membar of a volatile put hgraph.
2713
2714 return (trailing_to_leading(mbvol) != NULL);
2715 }
2716
2717 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2718
2719 bool needs_releasing_store(const Node *n)
2720 {
2721 // assert n->is_Store();
2722 if (UseBarriersForVolatile) {
2723 // we use a normal store and dmb combination
2724 return false;
2725 }
2726
2727 StoreNode *st = n->as_Store();
2728
2729 // the store must be marked as releasing
2730 if (!st->is_release()) {
2731 return false;
2732 }
2733
2734 // the store must be fed by a membar
2735
2736 Node *x = st->lookup(StoreNode::Memory);
2737
2738 if (! x || !x->is_Proj()) {
2739 return false;
2740 }
2741
2742 ProjNode *proj = x->as_Proj();
2743
2744 x = proj->lookup(0);
2745
2746 if (!x || !x->is_MemBar()) {
2747 return false;
2748 }
2749
2750 MemBarNode *barrier = x->as_MemBar();
2751
2752 // if the barrier is a release membar or a cpuorder mmebar fed by a
2753 // release membar then we need to check whether that forms part of a
2754 // volatile put graph.
2755
2756 // reject invalid candidates
2757 if (!leading_membar(barrier)) {
2758 return false;
2759 }
2760
2761 // does this lead a normal subgraph?
2762 MemBarNode *mbvol = leading_to_normal(barrier);
2763
2764 if (!mbvol) {
2765 return false;
2766 }
2767
2768 // all done unless this is a card mark
2769 if (!is_card_mark_membar(mbvol)) {
2770 return true;
2771 }
2772
2773 // we found a card mark -- just make sure we have a trailing barrier
2774
2775 return (card_mark_to_trailing(mbvol) != NULL);
2776 }
2777
2778 // predicate controlling translation of CAS
2779 //
2780 // returns true if CAS needs to use an acquiring load otherwise false
2781
2782 bool needs_acquiring_load_exclusive(const Node *n)
2783 {
2784 assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2785 if (UseBarriersForVolatile) {
2786 return false;
2787 }
2788
2789 // CAS nodes only ought to turn up in inlined unsafe CAS operations
2790 #ifdef ASSERT
2791 LoadStoreNode *st = n->as_LoadStore();
2792
2793 // the store must be fed by a membar
2794
2795 Node *x = st->lookup(StoreNode::Memory);
2796
2797 assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2798
2799 ProjNode *proj = x->as_Proj();
2800
2801 x = proj->lookup(0);
2802
2803 assert (x && x->is_MemBar(), "CAS not fed by membar!");
2804
2805 MemBarNode *barrier = x->as_MemBar();
2806
2807 // the barrier must be a cpuorder mmebar fed by a release membar
2808
2809 assert(barrier->Opcode() == Op_MemBarCPUOrder,
2810 "CAS not fed by cpuorder membar!");
2811
2812 MemBarNode *b = parent_membar(barrier);
2813 assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2814 "CAS not fed by cpuorder+release membar pair!");
2815
2816 // does this lead a normal subgraph?
2817 MemBarNode *mbar = leading_to_normal(barrier);
2818
2819 assert(mbar != NULL, "CAS not embedded in normal graph!");
2820
2821 assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2822 #endif // ASSERT
2823 // so we can just return true here
2824 return true;
2825 }
2826
2827 // predicate controlling translation of StoreCM
2828 //
2829 // returns true if a StoreStore must precede the card write otherwise
2830 // false
2831
2832 bool unnecessary_storestore(const Node *storecm)
2833 {
2834 assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM");
2835
2836 // we only ever need to generate a dmb ishst between an object put
2837 // and the associated card mark when we are using CMS without
2838 // conditional card marking
2839
2840 if (!UseConcMarkSweepGC || UseCondCardMark) {
2841 return true;
2842 }
2843
2844 // if we are implementing volatile puts using barriers then the
2845 // object put as an str so we must insert the dmb ishst
2846
2847 if (UseBarriersForVolatile) {
2848 return false;
2849 }
2850
2851 // we can omit the dmb ishst if this StoreCM is part of a volatile
2852 // put because in thta case the put will be implemented by stlr
2853 //
2854 // we need to check for a normal subgraph feeding this StoreCM.
2855 // that means the StoreCM must be fed Memory from a leading membar,
2856 // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2857 // leading membar must be part of a normal subgraph
2858
2859 Node *x = storecm->in(StoreNode::Memory);
2860
2861 if (!x->is_Proj()) {
2862 return false;
2863 }
2864
2865 x = x->in(0);
2866
2867 if (!x->is_MemBar()) {
2868 return false;
2869 }
2870
2871 MemBarNode *leading = x->as_MemBar();
2872
2873 // reject invalid candidates
2874 if (!leading_membar(leading)) {
2875 return false;
2876 }
2877
2878 // we can omit the StoreStore if it is the head of a normal subgraph
2879 return (leading_to_normal(leading) != NULL);
2880 }
2881
2882
2883 #define __ _masm.
2884
2885 // advance declarations for helper functions to convert register
2886 // indices to register objects
2887
2888 // the ad file has to provide implementations of certain methods
2889 // expected by the generic code
2890 //
2891 // REQUIRED FUNCTIONALITY
2892
2893 //=============================================================================
2894
2895 // !!!!! Special hack to get all types of calls to specify the byte offset
2896 // from the start of the call to the point where the return address
8787 // comment storeIConditional was not used anywhere by AArch64.
8788 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8789 %{
8790 match(Set cr (StoreIConditional mem (Binary oldval newval)));
8791
8792 ins_cost(VOLATILE_REF_COST);
8793
8794 format %{
8795 "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8796 "cmpw rscratch1, zr\t# EQ on successful write"
8797 %}
8798
8799 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8800
8801 ins_pipe(pipe_slow);
8802 %}
8803
8804 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8805 // can't match them
8806
8807 // standard CompareAndSwapX when we are using barriers
8808 // these have higher priority than the rules selected by a predicate
8809
8810 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8811
8812 match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8813 ins_cost(2 * VOLATILE_REF_COST);
8814
8815 effect(KILL cr);
8816
8817 format %{
8818 "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8819 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8820 %}
8821
8822 ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8823 aarch64_enc_cset_eq(res));
8824
8825 ins_pipe(pipe_slow);
8826 %}
8827
8828 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8829
8830 match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8831 ins_cost(2 * VOLATILE_REF_COST);
8832
8833 effect(KILL cr);
8834
8835 format %{
8836 "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8837 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8838 %}
8839
8840 ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8841 aarch64_enc_cset_eq(res));
8842
8843 ins_pipe(pipe_slow);
8844 %}
8845
8846 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8847
8848 match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8849 ins_cost(2 * VOLATILE_REF_COST);
8850
8851 effect(KILL cr);
8852
8853 format %{
8854 "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8855 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8856 %}
8857
8858 ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8859 aarch64_enc_cset_eq(res));
8860
8861 ins_pipe(pipe_slow);
8862 %}
8863
8864 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8865
8866 match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8867 ins_cost(2 * VOLATILE_REF_COST);
8868
8869 effect(KILL cr);
8870
8871 format %{
8872 "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8873 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8874 %}
8875
8876 ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8877 aarch64_enc_cset_eq(res));
8878
8879 ins_pipe(pipe_slow);
8880 %}
8881
8882 // alternative CompareAndSwapX when we are eliding barriers
8883
8884 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8885
8886 predicate(needs_acquiring_load_exclusive(n));
8887 match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8888 ins_cost(VOLATILE_REF_COST);
8889
8890 effect(KILL cr);
8891
8892 format %{
8893 "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8894 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8895 %}
8896
8897 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8898 aarch64_enc_cset_eq(res));
8899
8900 ins_pipe(pipe_slow);
8901 %}
8902
8903 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8904
8905 predicate(needs_acquiring_load_exclusive(n));
8906 match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8907 ins_cost(VOLATILE_REF_COST);
8908
8909 effect(KILL cr);
8910
8911 format %{
8912 "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8913 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8914 %}
8915
8916 ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8917 aarch64_enc_cset_eq(res));
8918
8919 ins_pipe(pipe_slow);
8920 %}
8921
8922 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8923
8924 predicate(needs_acquiring_load_exclusive(n));
8925 match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8926 ins_cost(VOLATILE_REF_COST);
8927
8928 effect(KILL cr);
8929
8930 format %{
8931 "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8932 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8933 %}
8934
8935 ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8936 aarch64_enc_cset_eq(res));
8937
8938 ins_pipe(pipe_slow);
8939 %}
8940
8941 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8942
8943 predicate(needs_acquiring_load_exclusive(n));
8944 match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8945 ins_cost(VOLATILE_REF_COST);
8946
8947 effect(KILL cr);
8948
8949 format %{
8950 "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8951 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8952 %}
8953
8954 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8955 aarch64_enc_cset_eq(res));
8956
8957 ins_pipe(pipe_slow);
8958 %}
8959
8960
8961 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
8962 match(Set prev (GetAndSetI mem newv));
8963 format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8964 ins_encode %{
8965 __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8966 %}
8967 ins_pipe(pipe_serial);
8968 %}
8969
8970 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
8971 match(Set prev (GetAndSetL mem newv));
8972 format %{ "atomic_xchg $prev, $newv, [$mem]" %}
8973 ins_encode %{
8974 __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
|