< prev index next >

src/cpu/aarch64/vm/aarch64.ad

Print this page
rev 8967 : 8080293: AARCH64: Remove unnecessary dmbs from generated CAS code
Summary: The current encoding for CAS generates unnecessary leading and trailing dmbs for the MemBarAcquire and MemBarRelease which ought to be elided
Reviewed-by: kvn

*** 1037,1046 **** --- 1037,1047 ---- MemBarNode *parent_membar(const Node *n); MemBarNode *child_membar(const MemBarNode *n); bool leading_membar(const MemBarNode *barrier); bool is_card_mark_membar(const MemBarNode *barrier); + bool is_CAS(int opcode); MemBarNode *leading_to_normal(MemBarNode *leading); MemBarNode *normal_to_leading(const MemBarNode *barrier); MemBarNode *card_mark_to_trailing(const MemBarNode *barrier); MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
*** 1055,1064 **** --- 1056,1068 ---- bool unnecessary_release(const Node *barrier); bool unnecessary_volatile(const Node *barrier); bool needs_releasing_store(const Node *store); + // predicate controlling translation of CompareAndSwapX + bool needs_acquiring_load_exclusive(const Node *load); + // predicate controlling translation of StoreCM bool unnecessary_storestore(const Node *storecm); %} source %{
*** 1086,1104 **** // // dmb ish // str<x> // dmb ish // // In order to generate the desired instruction sequence we need to // be able to identify specific 'signature' ideal graph node // sequences which i) occur as a translation of a volatile reads or ! // writes and ii) do not occur through any other translation or ! // graph transformation. We can then provide alternative aldc ! // matching rules which translate these node sequences to the ! // desired machine code sequences. Selection of the alternative ! // rules can be implemented by predicates which identify the ! // relevant node sequences. // // The ideal graph generator translates a volatile read to the node // sequence // // LoadX[mo_acquire] --- 1090,1151 ---- // // dmb ish // str<x> // dmb ish // + // We can also use ldaxr and stlxr to implement compare and swap CAS + // sequences. These are normally translated to an instruction + // sequence like the following + // + // dmb ish + // retry: + // ldxr<x> rval raddr + // cmp rval rold + // b.ne done + // stlxr<x> rval, rnew, rold + // cbnz rval retry + // done: + // cset r0, eq + // dmb ishld + // + // Note that the exclusive store is already using an stlxr + // instruction. That is required to ensure visibility to other + // threads of the exclusive write (assuming it succeeds) before that + // of any subsequent writes. + // + // The following instruction sequence is an improvement on the above + // + // retry: + // ldaxr<x> rval raddr + // cmp rval rold + // b.ne done + // stlxr<x> rval, rnew, rold + // cbnz rval retry + // done: + // cset r0, eq + // + // We don't need the leading dmb ish since the stlxr guarantees + // visibility of prior writes in the case that the swap is + // successful. Crucially we don't have to worry about the case where + // the swap is not successful since no valid program should be + // relying on visibility of prior changes by the attempting thread + // in the case where the CAS fails. + // + // Similarly, we don't need the trailing dmb ishld if we substitute + // an ldaxr instruction since that will provide all the guarantees we + // require regarding observation of changes made by other threads + // before any change to the CAS address observed by the load. + // // In order to generate the desired instruction sequence we need to // be able to identify specific 'signature' ideal graph node // sequences which i) occur as a translation of a volatile reads or ! // writes or CAS operations and ii) do not occur through any other ! // translation or graph transformation. We can then provide ! // alternative aldc matching rules which translate these node ! // sequences to the desired machine code sequences. Selection of the ! // alternative rules can be implemented by predicates which identify ! // the relevant node sequences. // // The ideal graph generator translates a volatile read to the node // sequence // // LoadX[mo_acquire]
*** 1161,1170 **** --- 1208,1226 ---- // sequence of membar nodes. Similarly, given an acquire membar we // can know that it was added because of an inlined unsafe volatile // get if it is fed and feeds a cpuorder membar and if its feed // membar also feeds an acquiring load. // + // Finally an inlined (Unsafe) CAS operation is translated to the + // following ideal graph + // + // MemBarRelease + // MemBarCPUOrder + // CompareAndSwapX {CardMark}-optional + // MemBarCPUOrder + // MemBarAcquire + // // So, where we can identify these volatile read and write // signatures we can choose to plant either of the above two code // sequences. For a volatile read we can simply plant a normal // ldr<x> and translate the MemBarAcquire to a dmb. However, we can // also choose to inhibit translation of the MemBarAcquire and
*** 1175,1184 **** --- 1231,1248 ---- // normal str<x> and then a dmb ish for the MemBarVolatile. // Alternatively, we can inhibit translation of the MemBarRelease // and MemBarVolatile and instead plant a simple stlr<x> // instruction. // + // when we recognise a CAS signature we can choose to plant a dmb + // ish as a translation for the MemBarRelease, the conventional + // macro-instruction sequence for the CompareAndSwap node (which + // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire. + // Alternatively, we can elide generation of the dmb instructions + // and plant the alternative CompareAndSwap macro-instruction + // sequence (which uses ldaxr<x>). + // // Of course, the above only applies when we see these signature // configurations. We still want to plant dmb instructions in any // other cases where we may see a MemBarAcquire, MemBarRelease or // MemBarVolatile. For example, at the end of a constructor which // writes final/volatile fields we will see a MemBarRelease
*** 1192,1202 **** // always just translate the loads and stores to ldr<x> and str<x> // and translate acquire, release and volatile membars to the // relevant dmb instructions. // ! // graph traversal helpers used for volatile put/get optimization // 1) general purpose helpers // if node n is linked to a parent MemBarNode by an intervening // Control and Memory ProjNode return the MemBarNode otherwise return --- 1256,1267 ---- // always just translate the loads and stores to ldr<x> and str<x> // and translate acquire, release and volatile membars to the // relevant dmb instructions. // ! // graph traversal helpers used for volatile put/get and CAS ! // optimization // 1) general purpose helpers // if node n is linked to a parent MemBarNode by an intervening // Control and Memory ProjNode return the MemBarNode otherwise return
*** 1218,1237 **** mem = n->lookup(TypeFunc::Memory); } else { return NULL; } ! if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) return NULL; membar = ctl->lookup(0); ! if (!membar || !membar->is_MemBar()) return NULL; ! if (mem->lookup(0) != membar) return NULL; return membar->as_MemBar(); } // if n is linked to a child MemBarNode by intervening Control and --- 1283,1305 ---- mem = n->lookup(TypeFunc::Memory); } else { return NULL; } ! if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) { return NULL; + } membar = ctl->lookup(0); ! if (!membar || !membar->is_MemBar()) { return NULL; + } ! if (mem->lookup(0) != membar) { return NULL; + } return membar->as_MemBar(); } // if n is linked to a child MemBarNode by intervening Control and
*** 1257,1268 **** child = x->as_MemBar(); break; } } ! if (child == NULL) return NULL; for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { x = mem->fast_out(i); // if we see a membar we keep hold of it. we may also see a new // arena copy of the original but it will appear later --- 1325,1337 ---- child = x->as_MemBar(); break; } } ! if (child == NULL) { return NULL; + } for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { x = mem->fast_out(i); // if we see a membar we keep hold of it. we may also see a new // arena copy of the original but it will appear later
*** 1281,1299 **** bool leading_membar(const MemBarNode *barrier) { int opcode = barrier->Opcode(); // if this is a release membar we are ok ! if (opcode == Op_MemBarRelease) return true; // if its a cpuorder membar . . . ! if (opcode != Op_MemBarCPUOrder) return false; // then the parent has to be a release membar MemBarNode *parent = parent_membar(barrier); ! if (!parent) return false; opcode = parent->Opcode(); return opcode == Op_MemBarRelease; } // 2) card mark detection helper --- 1350,1371 ---- bool leading_membar(const MemBarNode *barrier) { int opcode = barrier->Opcode(); // if this is a release membar we are ok ! if (opcode == Op_MemBarRelease) { return true; + } // if its a cpuorder membar . . . ! if (opcode != Op_MemBarCPUOrder) { return false; + } // then the parent has to be a release membar MemBarNode *parent = parent_membar(barrier); ! if (!parent) { return false; + } opcode = parent->Opcode(); return opcode == Op_MemBarRelease; } // 2) card mark detection helper
*** 1312,1326 **** // // iii) the node's Mem projection feeds a StoreCM node. bool is_card_mark_membar(const MemBarNode *barrier) { ! if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) return false; ! if (barrier->Opcode() != Op_MemBarVolatile) return false; ProjNode *mem = barrier->proj_out(TypeFunc::Memory); for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) { Node *y = mem->fast_out(i); --- 1384,1400 ---- // // iii) the node's Mem projection feeds a StoreCM node. bool is_card_mark_membar(const MemBarNode *barrier) { ! if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) { return false; + } ! if (barrier->Opcode() != Op_MemBarVolatile) { return false; + } ProjNode *mem = barrier->proj_out(TypeFunc::Memory); for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) { Node *y = mem->fast_out(i);
*** 1331,1342 **** return false; } ! // 3) helper predicates to traverse volatile put graphs which may ! // contain GC barrier subgraphs // Preamble // -------- // // for volatile writes we can omit generating barriers and employ a --- 1405,1416 ---- return false; } ! // 3) helper predicates to traverse volatile put or CAS graphs which ! // may contain GC barrier subgraphs // Preamble // -------- // // for volatile writes we can omit generating barriers and employ a
*** 1402,1413 **** // // It is also possible to perform the card mark conditionally on it // currently being unmarked in which case the volatile put graph // will look slightly different // ! // MemBarRelease ! // MemBarCPUOrder___________________________________________ // || \\ Ctl \ Ctl \ \\ Mem \ // || StoreN/P[mo_release] CastP2X If LoadB | // | \ / \ | // | MergeMem . . . StoreB // | / / --- 1476,1486 ---- // // It is also possible to perform the card mark conditionally on it // currently being unmarked in which case the volatile put graph // will look slightly different // ! // MemBarRelease____________________________________________ // || \\ Ctl \ Ctl \ \\ Mem \ // || StoreN/P[mo_release] CastP2X If LoadB | // | \ / \ | // | MergeMem . . . StoreB // | / /
*** 1417,1427 **** // It is worth noting at this stage that both the above // configurations can be uniquely identified by checking that the // memory flow includes the following subgraph: // // MemBarRelease ! // MemBarCPUOrder // | \ . . . // | StoreX[mo_release] . . . // | / // MergeMem // | --- 1490,1500 ---- // It is worth noting at this stage that both the above // configurations can be uniquely identified by checking that the // memory flow includes the following subgraph: // // MemBarRelease ! // {MemBarCPUOrder} // | \ . . . // | StoreX[mo_release] . . . // | / // MergeMem // |
*** 1429,1440 **** // // This is referred to as a *normal* subgraph. It can easily be // detected starting from any candidate MemBarRelease, // StoreX[mo_release] or MemBarVolatile. // // the code below uses two helper predicates, leading_to_normal and ! // normal_to_leading to identify this configuration, one validating // the layout starting from the top membar and searching down and // the other validating the layout starting from the lower membar // and searching up. // // There are two special case GC configurations when a normal graph --- 1502,1553 ---- // // This is referred to as a *normal* subgraph. It can easily be // detected starting from any candidate MemBarRelease, // StoreX[mo_release] or MemBarVolatile. // + // A simple variation on this normal case occurs for an unsafe CAS + // operation. The basic graph for a non-object CAS is + // + // MemBarRelease + // || + // MemBarCPUOrder + // || \\ . . . + // || CompareAndSwapX + // || | + // || SCMemProj + // | \ / + // | MergeMem + // | / + // MemBarCPUOrder + // || + // MemBarAcquire + // + // The same basic variations on this arrangement (mutatis mutandis) + // occur when a card mark is introduced. i.e. we se the same basic + // shape but the StoreP/N is replaced with CompareAndSawpP/N and the + // tail of the graph is a pair comprising a MemBarCPUOrder + + // MemBarAcquire. + // + // So, in the case of a CAS the normal graph has the variant form + // + // MemBarRelease + // MemBarCPUOrder + // | \ . . . + // | CompareAndSwapX . . . + // | | + // | SCMemProj + // | / . . . + // MergeMem + // | + // MemBarCPUOrder + // MemBarAcquire + // + // This graph can also easily be detected starting from any + // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire. + // // the code below uses two helper predicates, leading_to_normal and ! // normal_to_leading to identify these normal graphs, one validating // the layout starting from the top membar and searching down and // the other validating the layout starting from the lower membar // and searching up. // // There are two special case GC configurations when a normal graph
*** 1448,1458 **** // employs a post-write GC barrier while G1 employs both a pre- and // post-write GC barrier. Of course the extra nodes may be absent -- // they are only inserted for object puts. This significantly // complicates the task of identifying whether a MemBarRelease, // StoreX[mo_release] or MemBarVolatile forms part of a volatile put ! // when using these GC configurations (see below). // // In both cases the post-write subtree includes an auxiliary // MemBarVolatile (StoreLoad barrier) separating the object put and // the read of the corresponding card. This poses two additional // problems. --- 1561,1573 ---- // employs a post-write GC barrier while G1 employs both a pre- and // post-write GC barrier. Of course the extra nodes may be absent -- // they are only inserted for object puts. This significantly // complicates the task of identifying whether a MemBarRelease, // StoreX[mo_release] or MemBarVolatile forms part of a volatile put ! // when using these GC configurations (see below). It adds similar ! // complexity to the task of identifying whether a MemBarRelease, ! // CompareAndSwapX or MemBarAcquire forms part of a CAS. // // In both cases the post-write subtree includes an auxiliary // MemBarVolatile (StoreLoad barrier) separating the object put and // the read of the corresponding card. This poses two additional // problems.
*** 1487,1497 **** // A CMS GC post-barrier wraps its card write (StoreCM) inside an If // which selects conditonal execution based on the value loaded // (LoadB) from the card. Ctl and Mem are fed to the If via an // intervening StoreLoad barrier (MemBarVolatile). // ! // So, with CMS we may see a node graph which looks like this // // MemBarRelease // MemBarCPUOrder_(leading)__________________ // C | M \ \\ C \ // | \ StoreN/P[mo_release] CastP2X --- 1602,1613 ---- // A CMS GC post-barrier wraps its card write (StoreCM) inside an If // which selects conditonal execution based on the value loaded // (LoadB) from the card. Ctl and Mem are fed to the If via an // intervening StoreLoad barrier (MemBarVolatile). // ! // So, with CMS we may see a node graph for a volatile object store ! // which looks like this // // MemBarRelease // MemBarCPUOrder_(leading)__________________ // C | M \ \\ C \ // | \ StoreN/P[mo_release] CastP2X
*** 1522,1531 **** --- 1638,1696 ---- // card mark membar. The trailing MergeMem merges the AliasIdxBot // Mem slice from the card mark membar and the AliasIdxRaw slice // from the StoreCM into the trailing membar (n.b. the latter // proceeds via a Phi associated with the If region). // + // The graph for a CAS varies slightly, the obvious difference being + // that the StoreN/P node is replaced by a CompareAndSwapP/N node + // and the trailing MemBarVolatile by a MemBarCPUOrder + + // MemBarAcquire pair. The other important difference is that the + // CompareAndSwap node's SCMemProj is not merged into the card mark + // membar - it still feeds the trailing MergeMem. This also means + // that the card mark membar receives its Mem feed directly from the + // leading membar rather than via a MergeMem. + // + // MemBarRelease + // MemBarCPUOrder__(leading)_________________________ + // || \\ C \ + // MemBarVolatile (card mark) CompareAndSwapN/P CastP2X + // C | || M | | + // | LoadB | ______/| + // | | | / | + // | Cmp | / SCMemProj + // | / | / | + // If | / / + // | \ | / / + // IfFalse IfTrue | / / + // \ / \ |/ prec / + // \ / StoreCM / + // \ / | / + // Region . . . / + // | \ / + // | . . . \ / Bot + // | MergeMem + // | | + // MemBarCPUOrder + // MemBarAcquire (trailing) + // + // This has a slightly different memory subgraph to the one seen + // previously but the core of it is the same as for the CAS normal + // sungraph + // + // MemBarRelease + // MemBarCPUOrder____ + // || \ . . . + // MemBarVolatile CompareAndSwapX . . . + // | \ | + // . . . SCMemProj + // | / . . . + // MergeMem + // | + // MemBarCPUOrder + // MemBarAcquire + // + // // G1 is quite a lot more complicated. The nodes inserted on behalf // of G1 may comprise: a pre-write graph which adds the old value to // the SATB queue; the releasing store itself; and, finally, a // post-write graph which performs a card mark. //
*** 1573,1588 **** // MemBarVolatile (trailing) // // n.b. the LoadB in this subgraph is not the card read -- it's a // read of the SATB queue active flag. // // The G1 post-write subtree is also optional, this time when the // new value being written is either null or can be identified as a // newly allocated (young gen) object with no intervening control // flow. The latter cannot happen but the former may, in which case ! // the card mark membar is omitted and the memory feeds from the ! // leading membar and the StoreN/P are merged direct into the // trailing membar as per the normal subgraph. So, the only special // case which arises is when the post-write subgraph is generated. // // The kernel of the post-write G1 subgraph is the card mark itself // which includes a card mark memory barrier (MemBarVolatile), a --- 1738,1757 ---- // MemBarVolatile (trailing) // // n.b. the LoadB in this subgraph is not the card read -- it's a // read of the SATB queue active flag. // + // Once again the CAS graph is a minor variant on the above with the + // expected substitutions of CompareAndSawpX for StoreN/P and + // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile. + // // The G1 post-write subtree is also optional, this time when the // new value being written is either null or can be identified as a // newly allocated (young gen) object with no intervening control // flow. The latter cannot happen but the former may, in which case ! // the card mark membar is omitted and the memory feeds form the ! // leading membar and the SToreN/P are merged direct into the // trailing membar as per the normal subgraph. So, the only special // case which arises is when the post-write subgraph is generated. // // The kernel of the post-write G1 subgraph is the card mark itself // which includes a card mark memory barrier (MemBarVolatile), a
*** 1666,1791 **** // membar. Each Phi corresponds to one of the Ifs which may skip // around the card mark membar. So when the If implementing the NULL // value check has been elided the total number of Phis is 2 // otherwise it is 3. // // So, the upshot is that in all cases the volatile put graph will // include a *normal* memory subgraph betwen the leading membar and ! // its child membar. When that child is not a card mark membar then ! // it marks the end of a volatile put subgraph. If the child is a ! // card mark membar then the normal subgraph will form part of a ! // volatile put subgraph if and only if the child feeds an ! // AliasIdxBot Mem feed to a trailing barrier via a MergeMem. That ! // feed is either direct (for CMS) or via 2 or 3 Phi nodes merging ! // the leading barrier memory flow (for G1). // // The predicates controlling generation of instructions for store // and barrier nodes employ a few simple helper functions (described ! // below) which identify the presence or absence of these subgraph ! // configurations and provide a means of traversing from one node in ! // the subgraph to another. // leading_to_normal // ! //graph traversal helper which detects the normal case Mem feed ! // from a release membar (or, optionally, its cpuorder child) to a ! // dependent volatile membar i.e. it ensures that the following Mem ! // flow subgraph is present. // // MemBarRelease ! // MemBarCPUOrder // | \ . . . // | StoreN/P[mo_release] . . . // | / // MergeMem // | ! // MemBarVolatile // ! // if the correct configuration is present returns the volatile // membar otherwise NULL. // // the input membar is expected to be either a cpuorder membar or a // release membar. in the latter case it should not have a cpu membar // child. // ! // the returned membar may be a card mark membar rather than a ! // trailing membar. MemBarNode *leading_to_normal(MemBarNode *leading) { assert((leading->Opcode() == Op_MemBarRelease || leading->Opcode() == Op_MemBarCPUOrder), "expecting a volatile or cpuroder membar!"); // check the mem flow ProjNode *mem = leading->proj_out(TypeFunc::Memory); ! if (!mem) return NULL; Node *x = NULL; StoreNode * st = NULL; MergeMemNode *mm = NULL; for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { x = mem->fast_out(i); if (x->is_MergeMem()) { ! if (mm != NULL) return NULL; // two merge mems is one too many mm = x->as_MergeMem(); } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { ! // two releasing stores is one too many ! if (st != NULL) return NULL; st = x->as_Store(); } } ! if (!mm || !st) return NULL; ! bool found = false; ! // ensure the store feeds the merge for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { if (st->fast_out(i) == mm) { ! found = true; break; } } ! ! if (!found) return NULL; ! MemBarNode *mbvol = NULL; ! // ensure the merge feeds a volatile membar for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { x = mm->fast_out(i); ! if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { ! mbvol = x->as_MemBar(); break; } } ! return mbvol; } // normal_to_leading // // graph traversal helper which detects the normal case Mem feed // from either a card mark or a trailing membar to a preceding // release membar (optionally its cpuorder child) i.e. it ensures ! // that the following Mem flow subgraph is present. // // MemBarRelease // MemBarCPUOrder {leading} // | \ . . . // | StoreN/P[mo_release] . . . // | / // MergeMem // | ! // MemBarVolatile // // this predicate checks for the same flow as the previous predicate // but starting from the bottom rather than the top. // // if the configuration is present returns the cpuorder member for --- 1835,2058 ---- // membar. Each Phi corresponds to one of the Ifs which may skip // around the card mark membar. So when the If implementing the NULL // value check has been elided the total number of Phis is 2 // otherwise it is 3. // + // The CAS graph when using G1GC also includes a pre-write subgraph + // and an optional post-write subgraph. Teh sam evarioations are + // introduced as for CMS with conditional card marking i.e. the + // StoreP/N is swapped for a CompareAndSwapP/N, the tariling + // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the + // Mem feed from the CompareAndSwapP/N includes a precedence + // dependency feed to the StoreCM and a feed via an SCMemProj to the + // trailing membar. So, as before the configuration includes the + // normal CAS graph as a subgraph of the memory flow. + // // So, the upshot is that in all cases the volatile put graph will // include a *normal* memory subgraph betwen the leading membar and ! // its child membar, either a volatile put graph (including a ! // releasing StoreX) or a CAS graph (including a CompareAndSwapX). ! // When that child is not a card mark membar then it marks the end ! // of the volatile put or CAS subgraph. If the child is a card mark ! // membar then the normal subgraph will form part of a volatile put ! // subgraph if and only if the child feeds an AliasIdxBot Mem feed ! // to a trailing barrier via a MergeMem. That feed is either direct ! // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier ! // memory flow (for G1). // // The predicates controlling generation of instructions for store // and barrier nodes employ a few simple helper functions (described ! // below) which identify the presence or absence of all these ! // subgraph configurations and provide a means of traversing from ! // one node in the subgraph to another. ! ! // is_CAS(int opcode) ! // ! // return true if opcode is one of the possible CompareAndSwapX ! // values otherwise false. ! ! bool is_CAS(int opcode) ! { ! return (opcode == Op_CompareAndSwapI || ! opcode == Op_CompareAndSwapL || ! opcode == Op_CompareAndSwapN || ! opcode == Op_CompareAndSwapP); ! } // leading_to_normal // ! //graph traversal helper which detects the normal case Mem feed from ! // a release membar (or, optionally, its cpuorder child) to a ! // dependent volatile membar i.e. it ensures that one or other of ! // the following Mem flow subgraph is present. // // MemBarRelease ! // MemBarCPUOrder {leading} // | \ . . . // | StoreN/P[mo_release] . . . // | / // MergeMem // | ! // MemBarVolatile {trailing or card mark} ! // ! // MemBarRelease ! // MemBarCPUOrder {leading} ! // | \ . . . ! // | CompareAndSwapX . . . ! // | ! // . . . SCMemProj ! // \ | ! // | MergeMem ! // | / ! // MemBarCPUOrder ! // MemBarAcquire {trailing} // ! // if the correct configuration is present returns the trailing // membar otherwise NULL. // // the input membar is expected to be either a cpuorder membar or a // release membar. in the latter case it should not have a cpu membar // child. // ! // the returned value may be a card mark or trailing membar ! // MemBarNode *leading_to_normal(MemBarNode *leading) { assert((leading->Opcode() == Op_MemBarRelease || leading->Opcode() == Op_MemBarCPUOrder), "expecting a volatile or cpuroder membar!"); // check the mem flow ProjNode *mem = leading->proj_out(TypeFunc::Memory); ! if (!mem) { return NULL; + } Node *x = NULL; StoreNode * st = NULL; + LoadStoreNode *cas = NULL; MergeMemNode *mm = NULL; for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { x = mem->fast_out(i); if (x->is_MergeMem()) { ! if (mm != NULL) { return NULL; + } // two merge mems is one too many mm = x->as_MergeMem(); } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { ! // two releasing stores/CAS nodes is one too many ! if (st != NULL || cas != NULL) { return NULL; + } st = x->as_Store(); + } else if (is_CAS(x->Opcode())) { + if (st != NULL || cas != NULL) { + return NULL; + } + cas = x->as_LoadStore(); } } ! // must have a store or a cas ! if (!st && !cas) { return NULL; + } ! // must have a merge if we also have st ! if (st && !mm) { ! return NULL; ! } ! ! Node *y = NULL; ! if (cas) { ! // look for an SCMemProj ! for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) { ! x = cas->fast_out(i); ! if (x->is_Proj()) { ! y = x; ! break; ! } ! } ! if (y == NULL) { ! return NULL; ! } ! // the proj must feed a MergeMem ! for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) { ! x = y->fast_out(i); ! if (x->is_MergeMem()) { ! mm = x->as_MergeMem(); ! break; ! } ! } ! if (mm == NULL) ! return NULL; ! } else { ! // ensure the store feeds the existing mergemem; for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { if (st->fast_out(i) == mm) { ! y = st; break; } } ! if (y == NULL) { return NULL; + } + } ! MemBarNode *mbar = NULL; ! // ensure the merge feeds to the expected type of membar for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { x = mm->fast_out(i); ! if (x->is_MemBar()) { ! int opcode = x->Opcode(); ! if (opcode == Op_MemBarVolatile && st) { ! mbar = x->as_MemBar(); ! } else if (cas && opcode == Op_MemBarCPUOrder) { ! MemBarNode *y = x->as_MemBar(); ! y = child_membar(y); ! if (y != NULL && y->Opcode() == Op_MemBarAcquire) { ! mbar = y; ! } ! } break; } } ! return mbar; } // normal_to_leading // // graph traversal helper which detects the normal case Mem feed // from either a card mark or a trailing membar to a preceding // release membar (optionally its cpuorder child) i.e. it ensures ! // that one or other of the following Mem flow subgraphs is present. // // MemBarRelease // MemBarCPUOrder {leading} // | \ . . . // | StoreN/P[mo_release] . . . // | / // MergeMem // | ! // MemBarVolatile {card mark or trailing} ! // ! // MemBarRelease ! // MemBarCPUOrder {leading} ! // | \ . . . ! // | CompareAndSwapX . . . ! // | ! // . . . SCMemProj ! // \ | ! // | MergeMem ! // | / ! // MemBarCPUOrder ! // MemBarAcquire {trailing} // // this predicate checks for the same flow as the previous predicate // but starting from the bottom rather than the top. // // if the configuration is present returns the cpuorder member for
*** 1795,1850 **** // need not be a card mark membar. MemBarNode *normal_to_leading(const MemBarNode *barrier) { // input must be a volatile membar ! assert(barrier->Opcode() == Op_MemBarVolatile, "expecting a volatile membar"); Node *x; // the Mem feed to the membar should be a merge ! x = barrier->in(TypeFunc::Memory); if (!x->is_MergeMem()) return NULL; MergeMemNode *mm = x->as_MergeMem(); ! // the AliasIdxBot slice should be another MemBar projection x = mm->in(Compile::AliasIdxBot); // ensure this is a non control projection ! if (!x->is_Proj() || x->is_CFG()) return NULL; // if it is fed by a membar that's the one we want x = x->in(0); ! if (!x->is_MemBar()) return NULL; MemBarNode *leading = x->as_MemBar(); // reject invalid candidates ! if (!leading_membar(leading)) return NULL; ! // ok, we have a leading ReleaseMembar, now for the sanity clauses ! // the leading membar must feed Mem to a releasing store ProjNode *mem = leading->proj_out(TypeFunc::Memory); StoreNode *st = NULL; for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { x = mem->fast_out(i); if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { st = x->as_Store(); ! break; } } ! if (st == NULL) return NULL; ! // the releasing store has to feed the same merge for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { ! if (st->fast_out(i) == mm) return leading; } return NULL; } // card_mark_to_trailing --- 2062,2182 ---- // need not be a card mark membar. MemBarNode *normal_to_leading(const MemBarNode *barrier) { // input must be a volatile membar ! assert((barrier->Opcode() == Op_MemBarVolatile || ! barrier->Opcode() == Op_MemBarAcquire), ! "expecting a volatile or an acquire membar"); Node *x; + bool is_cas = barrier->Opcode() == Op_MemBarAcquire; + + // if we have an acquire membar then it must be fed via a CPUOrder + // membar + + if (is_cas) { + // skip to parent barrier which must be a cpuorder + x = parent_membar(barrier); + if (x->Opcode() != Op_MemBarCPUOrder) + return NULL; + } else { + // start from the supplied barrier + x = (Node *)barrier; + } // the Mem feed to the membar should be a merge ! x = x ->in(TypeFunc::Memory); if (!x->is_MergeMem()) return NULL; MergeMemNode *mm = x->as_MergeMem(); ! if (is_cas) { ! // the merge should be fed from the CAS via an SCMemProj node ! x = NULL; ! for (uint idx = 1; idx < mm->req(); idx++) { ! if (mm->in(idx)->Opcode() == Op_SCMemProj) { ! x = mm->in(idx); ! break; ! } ! } ! if (x == NULL) { ! return NULL; ! } ! // check for a CAS feeding this proj ! x = x->in(0); ! int opcode = x->Opcode(); ! if (!is_CAS(opcode)) { ! return NULL; ! } ! // the CAS should get its mem feed from the leading membar ! x = x->in(MemNode::Memory); ! } else { ! // the merge should get its Bottom mem feed from the leading membar x = mm->in(Compile::AliasIdxBot); + } + // ensure this is a non control projection ! if (!x->is_Proj() || x->is_CFG()) { return NULL; + } // if it is fed by a membar that's the one we want x = x->in(0); ! if (!x->is_MemBar()) { return NULL; + } MemBarNode *leading = x->as_MemBar(); // reject invalid candidates ! if (!leading_membar(leading)) { return NULL; + } ! // ok, we have a leading membar, now for the sanity clauses ! // the leading membar must feed Mem to a releasing store or CAS ProjNode *mem = leading->proj_out(TypeFunc::Memory); StoreNode *st = NULL; + LoadStoreNode *cas = NULL; for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { x = mem->fast_out(i); if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { + // two stores or CASes is one too many + if (st != NULL || cas != NULL) { + return NULL; + } st = x->as_Store(); ! } else if (is_CAS(x->Opcode())) { ! if (st != NULL || cas != NULL) { ! return NULL; ! } ! cas = x->as_LoadStore(); } } ! ! // we should not have both a store and a cas ! if (st == NULL & cas == NULL) { return NULL; + } ! if (st == NULL) { ! // nothing more to check ! return leading; ! } else { ! // we should not have a store if we started from an acquire ! if (is_cas) { ! return NULL; ! } ! ! // the store should feed the merge we used to get here for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { ! if (st->fast_out(i) == mm) { return leading; } + } + } return NULL; } // card_mark_to_trailing
*** 1863,1874 **** // | | // | . . . // Bot | / // MergeMem // | ! // MemBarVolatile (trailing) ! // // // 2) // MemBarRelease/CPUOrder (leading) // | // | --- 2195,2206 ---- // | | // | . . . // Bot | / // MergeMem // | ! // | ! // MemBarVolatile {trailing} // // 2) // MemBarRelease/CPUOrder (leading) // | // |
*** 1882,1892 **** // \ / // Phi . . . // Bot | / // MergeMem // | ! // MemBarVolatile (trailing) // // 3) // MemBarRelease/CPUOrder (leading) // | // |\ --- 2214,2225 ---- // \ / // Phi . . . // Bot | / // MergeMem // | ! // MemBarVolatile {trailing} ! // // // 3) // MemBarRelease/CPUOrder (leading) // | // |\
*** 1903,1913 **** // \ / // Phi . . . // Bot | / // MergeMem // | ! // MemBarVolatile (trailing) // // configuration 1 is only valid if UseConcMarkSweepGC && // UseCondCardMark // // configurations 2 and 3 are only valid if UseG1GC. --- 2236,2247 ---- // \ / // Phi . . . // Bot | / // MergeMem // | ! // | ! // MemBarVolatile {trailing} // // configuration 1 is only valid if UseConcMarkSweepGC && // UseCondCardMark // // configurations 2 and 3 are only valid if UseG1GC.
*** 1953,1964 **** if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) { phi = x->as_Phi(); break; } } ! if (!phi) return NULL; // look for another merge below this phi feed = phi; } else { // couldn't find a merge return NULL; --- 2287,2299 ---- if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) { phi = x->as_Phi(); break; } } ! if (!phi) { return NULL; + } // look for another merge below this phi feed = phi; } else { // couldn't find a merge return NULL;
*** 1967,1977 **** // sanity check this feed turns up as the expected slice assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge"); MemBarNode *trailing = NULL; ! // be sure we have a volatile membar below the merge for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { x = mm->fast_out(i); if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { trailing = x->as_MemBar(); break; --- 2302,2312 ---- // sanity check this feed turns up as the expected slice assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge"); MemBarNode *trailing = NULL; ! // be sure we have a trailing membar the merge for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { x = mm->fast_out(i); if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { trailing = x->as_MemBar(); break;
*** 1982,2009 **** } // trailing_to_card_mark // // graph traversal helper which detects extra, non-normal Mem feed ! // from a trailing membar to a preceding card mark volatile membar ! // i.e. it identifies whether one of the three possible extra GC ! // post-write Mem flow subgraphs is present // // this predicate checks for the same flow as the previous predicate // but starting from the bottom rather than the top. // ! // if the configurationis present returns the card mark membar // otherwise NULL MemBarNode *trailing_to_card_mark(const MemBarNode *trailing) { ! assert(!is_card_mark_membar(trailing), "not expecting a card mark membar"); - Node *x = trailing->in(TypeFunc::Memory); // the Mem feed to the membar should be a merge ! if (!x->is_MergeMem()) return NULL; MergeMemNode *mm = x->as_MergeMem(); x = mm->in(Compile::AliasIdxBot); // with G1 we may possibly see a Phi or two before we see a Memory --- 2317,2352 ---- } // trailing_to_card_mark // // graph traversal helper which detects extra, non-normal Mem feed ! // from a trailing volatile membar to a preceding card mark volatile ! // membar i.e. it identifies whether one of the three possible extra ! // GC post-write Mem flow subgraphs is present // // this predicate checks for the same flow as the previous predicate // but starting from the bottom rather than the top. // ! // if the configuration is present returns the card mark membar // otherwise NULL + // + // n.b. the supplied membar is expected to be a trailing + // MemBarVolatile i.e. the caller must ensure the input node has the + // correct opcode MemBarNode *trailing_to_card_mark(const MemBarNode *trailing) { ! assert(trailing->Opcode() == Op_MemBarVolatile, ! "expecting a volatile membar"); ! assert(!is_card_mark_membar(trailing), ! "not expecting a card mark membar"); // the Mem feed to the membar should be a merge ! Node *x = trailing->in(TypeFunc::Memory); ! if (!x->is_MergeMem()) { return NULL; + } MergeMemNode *mm = x->as_MergeMem(); x = mm->in(Compile::AliasIdxBot); // with G1 we may possibly see a Phi or two before we see a Memory
*** 2052,2076 **** return NULL; } } // the proj has to come from the card mark membar x = x->in(0); ! if (!x->is_MemBar()) return NULL; MemBarNode *card_mark_membar = x->as_MemBar(); ! if (!is_card_mark_membar(card_mark_membar)) return NULL; return card_mark_membar; } // trailing_to_leading // // graph traversal helper which checks the Mem flow up the graph ! // from a (non-card mark) volatile membar attempting to locate and // return an associated leading membar. it first looks for a // subgraph in the normal configuration (relying on helper // normal_to_leading). failing that it then looks for one of the // possible post-write card mark subgraphs linking the trailing node // to a the card mark membar (relying on helper --- 2395,2421 ---- return NULL; } } // the proj has to come from the card mark membar x = x->in(0); ! if (!x->is_MemBar()) { return NULL; + } MemBarNode *card_mark_membar = x->as_MemBar(); ! if (!is_card_mark_membar(card_mark_membar)) { return NULL; + } return card_mark_membar; } // trailing_to_leading // // graph traversal helper which checks the Mem flow up the graph ! // from a (non-card mark) trailing membar attempting to locate and // return an associated leading membar. it first looks for a // subgraph in the normal configuration (relying on helper // normal_to_leading). failing that it then looks for one of the // possible post-write card mark subgraphs linking the trailing node // to a the card mark membar (relying on helper
*** 2079,2116 **** // predicate normal_to_leading). // // if the configuration is valid returns the cpuorder member for // preference or when absent the release membar otherwise NULL. // ! // n.b. the input membar is expected to be a volatile membar but ! // must *not* be a card mark membar. MemBarNode *trailing_to_leading(const MemBarNode *trailing) { ! assert(!is_card_mark_membar(trailing), "not expecting a card mark membar"); MemBarNode *leading = normal_to_leading(trailing); ! if (leading) return leading; MemBarNode *card_mark_membar = trailing_to_card_mark(trailing); ! if (!card_mark_membar) return NULL; return normal_to_leading(card_mark_membar); } // predicates controlling emit of ldr<x>/ldar<x> and associated dmb bool unnecessary_acquire(const Node *barrier) { ! // assert barrier->is_MemBar(); ! if (UseBarriersForVolatile) // we need to plant a dmb return false; // a volatile read derived from bytecode (or also from an inlined // SHA field read via LibraryCallKit::load_field_from_object) // manifests as a LoadX[mo_acquire] followed by an acquire membar // with a bogus read dependency on it's preceding load. so in those --- 2424,2476 ---- // predicate normal_to_leading). // // if the configuration is valid returns the cpuorder member for // preference or when absent the release membar otherwise NULL. // ! // n.b. the input membar is expected to be either a volatile or ! // acquire membar but in the former case must *not* be a card mark ! // membar. MemBarNode *trailing_to_leading(const MemBarNode *trailing) { ! assert((trailing->Opcode() == Op_MemBarAcquire || ! trailing->Opcode() == Op_MemBarVolatile), ! "expecting an acquire or volatile membar"); ! assert((trailing->Opcode() != Op_MemBarVolatile || ! !is_card_mark_membar(trailing)), ! "not expecting a card mark membar"); MemBarNode *leading = normal_to_leading(trailing); ! if (leading) { return leading; + } + + // nothing more to do if this is an acquire + if (trailing->Opcode() == Op_MemBarAcquire) { + return NULL; + } MemBarNode *card_mark_membar = trailing_to_card_mark(trailing); ! if (!card_mark_membar) { return NULL; + } return normal_to_leading(card_mark_membar); } // predicates controlling emit of ldr<x>/ldar<x> and associated dmb bool unnecessary_acquire(const Node *barrier) { ! assert(barrier->is_MemBar(), "expecting a membar"); ! ! if (UseBarriersForVolatile) { // we need to plant a dmb return false; + } // a volatile read derived from bytecode (or also from an inlined // SHA field read via LibraryCallKit::load_field_from_object) // manifests as a LoadX[mo_acquire] followed by an acquire membar // with a bogus read dependency on it's preceding load. so in those
*** 2138,2149 **** // |Parms // MemBarAcquire* // // where * tags node we were passed // and |k means input k ! if (x->is_DecodeNarrowPtr()) x = x->in(1); return (x->is_Load() && x->as_Load()->is_acquire()); } // now check for an unsafe volatile get --- 2498,2510 ---- // |Parms // MemBarAcquire* // // where * tags node we were passed // and |k means input k ! if (x->is_DecodeNarrowPtr()) { x = x->in(1); + } return (x->is_Load() && x->as_Load()->is_acquire()); } // now check for an unsafe volatile get
*** 2165,2176 **** MemBarNode *parent = parent_membar(barrier); if (!parent || parent->Opcode() != Op_MemBarCPUOrder) return false; ctl = parent->proj_out(TypeFunc::Control); mem = parent->proj_out(TypeFunc::Memory); ! if (!ctl || !mem) return false; // ensure the proj nodes both feed a LoadX[mo_acquire] LoadNode *ld = NULL; for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { x = ctl->fast_out(i); // if we see a load we keep hold of it and stop searching --- 2526,2538 ---- MemBarNode *parent = parent_membar(barrier); if (!parent || parent->Opcode() != Op_MemBarCPUOrder) return false; ctl = parent->proj_out(TypeFunc::Control); mem = parent->proj_out(TypeFunc::Memory); ! if (!ctl || !mem) { return false; + } // ensure the proj nodes both feed a LoadX[mo_acquire] LoadNode *ld = NULL; for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { x = ctl->fast_out(i); // if we see a load we keep hold of it and stop searching
*** 2178,2219 **** ld = x->as_Load(); break; } } // it must be an acquiring load ! if (! ld || ! ld->is_acquire()) ! return false; for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { x = mem->fast_out(i); // if we see the same load we drop it and stop searching if (x == ld) { ld = NULL; break; } } // we must have dropped the load ! if (ld) ! return false; // check for a child cpuorder membar MemBarNode *child = child_membar(barrier->as_MemBar()); ! if (!child || child->Opcode() != Op_MemBarCPUOrder) ! return false; ! return true; } bool needs_acquiring_load(const Node *n) { ! // assert n->is_Load(); ! if (UseBarriersForVolatile) // we use a normal load and a dmb return false; LoadNode *ld = n->as_Load(); ! if (!ld->is_acquire()) return false; // check if this load is feeding an acquire membar // // LoadX[mo_acquire] // { |1 } --- 2540,2589 ---- ld = x->as_Load(); break; } } // it must be an acquiring load ! if (ld && ld->is_acquire()) { ! for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { x = mem->fast_out(i); // if we see the same load we drop it and stop searching if (x == ld) { ld = NULL; break; } } // we must have dropped the load ! if (ld == NULL) { // check for a child cpuorder membar MemBarNode *child = child_membar(barrier->as_MemBar()); ! if (child && child->Opcode() != Op_MemBarCPUOrder) return true; + } + } + + // final option for unnecessary mebar is that it is a trailing node + // belonging to a CAS + + MemBarNode *leading = trailing_to_leading(barrier->as_MemBar()); + + return leading != NULL; } bool needs_acquiring_load(const Node *n) { ! assert(n->is_Load(), "expecting a load"); ! if (UseBarriersForVolatile) { // we use a normal load and a dmb return false; + } LoadNode *ld = n->as_Load(); ! if (!ld->is_acquire()) { return false; + } // check if this load is feeding an acquire membar // // LoadX[mo_acquire] // { |1 }
*** 2259,2295 **** MemBarNode *membar; membar = parent_membar(ld); ! if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) return false; // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain membar = child_membar(membar); ! if (!membar || !membar->Opcode() == Op_MemBarAcquire) return false; membar = child_membar(membar); ! if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) return false; return true; } bool unnecessary_release(const Node *n) { assert((n->is_MemBar() && n->Opcode() == Op_MemBarRelease), "expecting a release membar"); ! if (UseBarriersForVolatile) // we need to plant a dmb return false; // if there is a dependent CPUOrder barrier then use that as the // leading MemBarNode *barrier = n->as_MemBar(); --- 2629,2669 ---- MemBarNode *membar; membar = parent_membar(ld); ! if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) { return false; + } // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain membar = child_membar(membar); ! if (!membar || !membar->Opcode() == Op_MemBarAcquire) { return false; + } membar = child_membar(membar); ! if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) { return false; + } return true; } bool unnecessary_release(const Node *n) { assert((n->is_MemBar() && n->Opcode() == Op_MemBarRelease), "expecting a release membar"); ! if (UseBarriersForVolatile) { // we need to plant a dmb return false; + } // if there is a dependent CPUOrder barrier then use that as the // leading MemBarNode *barrier = n->as_MemBar();
*** 2301,2336 **** } // must start with a normal feed MemBarNode *child_barrier = leading_to_normal(barrier); ! if (!child_barrier) return false; ! if (!is_card_mark_membar(child_barrier)) // this is the trailing membar and we are done return true; // must be sure this card mark feeds a trailing membar MemBarNode *trailing = card_mark_to_trailing(child_barrier); return (trailing != NULL); } bool unnecessary_volatile(const Node *n) { // assert n->is_MemBar(); ! if (UseBarriersForVolatile) // we need to plant a dmb return false; MemBarNode *mbvol = n->as_MemBar(); // first we check if this is part of a card mark. if so then we have // to generate a StoreLoad barrier ! if (is_card_mark_membar(mbvol)) return false; // ok, if it's not a card mark then we still need to check if it is // a trailing membar of a volatile put hgraph. return (trailing_to_leading(mbvol) != NULL); --- 2675,2714 ---- } // must start with a normal feed MemBarNode *child_barrier = leading_to_normal(barrier); ! if (!child_barrier) { return false; + } ! if (!is_card_mark_membar(child_barrier)) { // this is the trailing membar and we are done return true; + } // must be sure this card mark feeds a trailing membar MemBarNode *trailing = card_mark_to_trailing(child_barrier); return (trailing != NULL); } bool unnecessary_volatile(const Node *n) { // assert n->is_MemBar(); ! if (UseBarriersForVolatile) { // we need to plant a dmb return false; + } MemBarNode *mbvol = n->as_MemBar(); // first we check if this is part of a card mark. if so then we have // to generate a StoreLoad barrier ! if (is_card_mark_membar(mbvol)) { return false; + } // ok, if it's not a card mark then we still need to check if it is // a trailing membar of a volatile put hgraph. return (trailing_to_leading(mbvol) != NULL);
*** 2339,2397 **** // predicates controlling emit of str<x>/stlr<x> and associated dmbs bool needs_releasing_store(const Node *n) { // assert n->is_Store(); ! if (UseBarriersForVolatile) // we use a normal store and dmb combination return false; StoreNode *st = n->as_Store(); // the store must be marked as releasing ! if (!st->is_release()) return false; // the store must be fed by a membar Node *x = st->lookup(StoreNode::Memory); ! if (! x || !x->is_Proj()) return false; ProjNode *proj = x->as_Proj(); x = proj->lookup(0); ! if (!x || !x->is_MemBar()) return false; MemBarNode *barrier = x->as_MemBar(); // if the barrier is a release membar or a cpuorder mmebar fed by a // release membar then we need to check whether that forms part of a // volatile put graph. // reject invalid candidates ! if (!leading_membar(barrier)) return false; // does this lead a normal subgraph? MemBarNode *mbvol = leading_to_normal(barrier); ! if (!mbvol) return false; // all done unless this is a card mark ! if (!is_card_mark_membar(mbvol)) return true; // we found a card mark -- just make sure we have a trailing barrier return (card_mark_to_trailing(mbvol) != NULL); } // predicate controlling translation of StoreCM // // returns true if a StoreStore must precede the card write otherwise // false --- 2717,2831 ---- // predicates controlling emit of str<x>/stlr<x> and associated dmbs bool needs_releasing_store(const Node *n) { // assert n->is_Store(); ! if (UseBarriersForVolatile) { // we use a normal store and dmb combination return false; + } StoreNode *st = n->as_Store(); // the store must be marked as releasing ! if (!st->is_release()) { return false; + } // the store must be fed by a membar Node *x = st->lookup(StoreNode::Memory); ! if (! x || !x->is_Proj()) { return false; + } ProjNode *proj = x->as_Proj(); x = proj->lookup(0); ! if (!x || !x->is_MemBar()) { return false; + } MemBarNode *barrier = x->as_MemBar(); // if the barrier is a release membar or a cpuorder mmebar fed by a // release membar then we need to check whether that forms part of a // volatile put graph. // reject invalid candidates ! if (!leading_membar(barrier)) { return false; + } // does this lead a normal subgraph? MemBarNode *mbvol = leading_to_normal(barrier); ! if (!mbvol) { return false; + } // all done unless this is a card mark ! if (!is_card_mark_membar(mbvol)) { return true; + } // we found a card mark -- just make sure we have a trailing barrier return (card_mark_to_trailing(mbvol) != NULL); } + // predicate controlling translation of CAS + // + // returns true if CAS needs to use an acquiring load otherwise false + + bool needs_acquiring_load_exclusive(const Node *n) + { + assert(is_CAS(n->Opcode()), "expecting a compare and swap"); + if (UseBarriersForVolatile) { + return false; + } + + // CAS nodes only ought to turn up in inlined unsafe CAS operations + #ifdef ASSERT + LoadStoreNode *st = n->as_LoadStore(); + + // the store must be fed by a membar + + Node *x = st->lookup(StoreNode::Memory); + + assert (x && x->is_Proj(), "CAS not fed by memory proj!"); + + ProjNode *proj = x->as_Proj(); + + x = proj->lookup(0); + + assert (x && x->is_MemBar(), "CAS not fed by membar!"); + + MemBarNode *barrier = x->as_MemBar(); + + // the barrier must be a cpuorder mmebar fed by a release membar + + assert(barrier->Opcode() == Op_MemBarCPUOrder, + "CAS not fed by cpuorder membar!"); + + MemBarNode *b = parent_membar(barrier); + assert ((b != NULL && b->Opcode() == Op_MemBarRelease), + "CAS not fed by cpuorder+release membar pair!"); + + // does this lead a normal subgraph? + MemBarNode *mbar = leading_to_normal(barrier); + + assert(mbar != NULL, "CAS not embedded in normal graph!"); + + assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire"); + #endif // ASSERT + // so we can just return true here + return true; + } + // predicate controlling translation of StoreCM // // returns true if a StoreStore must precede the card write otherwise // false
*** 2401,2418 **** // we only ever need to generate a dmb ishst between an object put // and the associated card mark when we are using CMS without // conditional card marking ! if (!UseConcMarkSweepGC || UseCondCardMark) return true; // if we are implementing volatile puts using barriers then the // object put as an str so we must insert the dmb ishst ! if (UseBarriersForVolatile) return false; // we can omit the dmb ishst if this StoreCM is part of a volatile // put because in thta case the put will be implemented by stlr // // we need to check for a normal subgraph feeding this StoreCM. --- 2835,2854 ---- // we only ever need to generate a dmb ishst between an object put // and the associated card mark when we are using CMS without // conditional card marking ! if (!UseConcMarkSweepGC || UseCondCardMark) { return true; + } // if we are implementing volatile puts using barriers then the // object put as an str so we must insert the dmb ishst ! if (UseBarriersForVolatile) { return false; + } // we can omit the dmb ishst if this StoreCM is part of a volatile // put because in thta case the put will be implemented by stlr // // we need to check for a normal subgraph feeding this StoreCM.
*** 2420,2442 **** // either a MemBarRelease or its dependent MemBarCPUOrder, and the // leading membar must be part of a normal subgraph Node *x = storecm->in(StoreNode::Memory); ! if (!x->is_Proj()) return false; x = x->in(0); ! if (!x->is_MemBar()) return false; MemBarNode *leading = x->as_MemBar(); // reject invalid candidates ! if (!leading_membar(leading)) return false; // we can omit the StoreStore if it is the head of a normal subgraph return (leading_to_normal(leading) != NULL); } --- 2856,2881 ---- // either a MemBarRelease or its dependent MemBarCPUOrder, and the // leading membar must be part of a normal subgraph Node *x = storecm->in(StoreNode::Memory); ! if (!x->is_Proj()) { return false; + } x = x->in(0); ! if (!x->is_MemBar()) { return false; + } MemBarNode *leading = x->as_MemBar(); // reject invalid candidates ! if (!leading_membar(leading)) { return false; + } // we can omit the StoreStore if it is the head of a normal subgraph return (leading_to_normal(leading) != NULL); }
*** 8363,8375 **** --- 8802,8818 ---- %} // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher // can't match them + // standard CompareAndSwapX when we are using barriers + // these have higher priority than the rules selected by a predicate + instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapI mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); format %{ "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
*** 8383,8392 **** --- 8826,8836 ---- %} instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapL mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); format %{ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
*** 8400,8409 **** --- 8844,8854 ---- %} instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapP mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); format %{ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
*** 8417,8426 **** --- 8862,8872 ---- %} instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); format %{ "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
*** 8431,8440 **** --- 8877,8964 ---- aarch64_enc_cset_eq(res)); ins_pipe(pipe_slow); %} + // alternative CompareAndSwapX when we are eliding barriers + + instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); + %} + + instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); + %} + + instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); + %} + + instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); + %} + instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{ match(Set prev (GetAndSetI mem newv)); format %{ "atomic_xchgw $prev, $newv, [$mem]" %} ins_encode %{
< prev index next >