< prev index next >
src/cpu/aarch64/vm/aarch64.ad
Print this page
rev 8967 : 8080293: AARCH64: Remove unnecessary dmbs from generated CAS code
Summary: The current encoding for CAS generates unnecessary leading and trailing dmbs for the MemBarAcquire and MemBarRelease which ought to be elided
Reviewed-by: kvn
*** 1037,1046 ****
--- 1037,1047 ----
MemBarNode *parent_membar(const Node *n);
MemBarNode *child_membar(const MemBarNode *n);
bool leading_membar(const MemBarNode *barrier);
bool is_card_mark_membar(const MemBarNode *barrier);
+ bool is_CAS(int opcode);
MemBarNode *leading_to_normal(MemBarNode *leading);
MemBarNode *normal_to_leading(const MemBarNode *barrier);
MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
*** 1055,1064 ****
--- 1056,1068 ----
bool unnecessary_release(const Node *barrier);
bool unnecessary_volatile(const Node *barrier);
bool needs_releasing_store(const Node *store);
+ // predicate controlling translation of CompareAndSwapX
+ bool needs_acquiring_load_exclusive(const Node *load);
+
// predicate controlling translation of StoreCM
bool unnecessary_storestore(const Node *storecm);
%}
source %{
*** 1086,1104 ****
//
// dmb ish
// str<x>
// dmb ish
//
// In order to generate the desired instruction sequence we need to
// be able to identify specific 'signature' ideal graph node
// sequences which i) occur as a translation of a volatile reads or
! // writes and ii) do not occur through any other translation or
! // graph transformation. We can then provide alternative aldc
! // matching rules which translate these node sequences to the
! // desired machine code sequences. Selection of the alternative
! // rules can be implemented by predicates which identify the
! // relevant node sequences.
//
// The ideal graph generator translates a volatile read to the node
// sequence
//
// LoadX[mo_acquire]
--- 1090,1151 ----
//
// dmb ish
// str<x>
// dmb ish
//
+ // We can also use ldaxr and stlxr to implement compare and swap CAS
+ // sequences. These are normally translated to an instruction
+ // sequence like the following
+ //
+ // dmb ish
+ // retry:
+ // ldxr<x> rval raddr
+ // cmp rval rold
+ // b.ne done
+ // stlxr<x> rval, rnew, rold
+ // cbnz rval retry
+ // done:
+ // cset r0, eq
+ // dmb ishld
+ //
+ // Note that the exclusive store is already using an stlxr
+ // instruction. That is required to ensure visibility to other
+ // threads of the exclusive write (assuming it succeeds) before that
+ // of any subsequent writes.
+ //
+ // The following instruction sequence is an improvement on the above
+ //
+ // retry:
+ // ldaxr<x> rval raddr
+ // cmp rval rold
+ // b.ne done
+ // stlxr<x> rval, rnew, rold
+ // cbnz rval retry
+ // done:
+ // cset r0, eq
+ //
+ // We don't need the leading dmb ish since the stlxr guarantees
+ // visibility of prior writes in the case that the swap is
+ // successful. Crucially we don't have to worry about the case where
+ // the swap is not successful since no valid program should be
+ // relying on visibility of prior changes by the attempting thread
+ // in the case where the CAS fails.
+ //
+ // Similarly, we don't need the trailing dmb ishld if we substitute
+ // an ldaxr instruction since that will provide all the guarantees we
+ // require regarding observation of changes made by other threads
+ // before any change to the CAS address observed by the load.
+ //
// In order to generate the desired instruction sequence we need to
// be able to identify specific 'signature' ideal graph node
// sequences which i) occur as a translation of a volatile reads or
! // writes or CAS operations and ii) do not occur through any other
! // translation or graph transformation. We can then provide
! // alternative aldc matching rules which translate these node
! // sequences to the desired machine code sequences. Selection of the
! // alternative rules can be implemented by predicates which identify
! // the relevant node sequences.
//
// The ideal graph generator translates a volatile read to the node
// sequence
//
// LoadX[mo_acquire]
*** 1161,1170 ****
--- 1208,1226 ----
// sequence of membar nodes. Similarly, given an acquire membar we
// can know that it was added because of an inlined unsafe volatile
// get if it is fed and feeds a cpuorder membar and if its feed
// membar also feeds an acquiring load.
//
+ // Finally an inlined (Unsafe) CAS operation is translated to the
+ // following ideal graph
+ //
+ // MemBarRelease
+ // MemBarCPUOrder
+ // CompareAndSwapX {CardMark}-optional
+ // MemBarCPUOrder
+ // MemBarAcquire
+ //
// So, where we can identify these volatile read and write
// signatures we can choose to plant either of the above two code
// sequences. For a volatile read we can simply plant a normal
// ldr<x> and translate the MemBarAcquire to a dmb. However, we can
// also choose to inhibit translation of the MemBarAcquire and
*** 1175,1184 ****
--- 1231,1248 ----
// normal str<x> and then a dmb ish for the MemBarVolatile.
// Alternatively, we can inhibit translation of the MemBarRelease
// and MemBarVolatile and instead plant a simple stlr<x>
// instruction.
//
+ // when we recognise a CAS signature we can choose to plant a dmb
+ // ish as a translation for the MemBarRelease, the conventional
+ // macro-instruction sequence for the CompareAndSwap node (which
+ // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
+ // Alternatively, we can elide generation of the dmb instructions
+ // and plant the alternative CompareAndSwap macro-instruction
+ // sequence (which uses ldaxr<x>).
+ //
// Of course, the above only applies when we see these signature
// configurations. We still want to plant dmb instructions in any
// other cases where we may see a MemBarAcquire, MemBarRelease or
// MemBarVolatile. For example, at the end of a constructor which
// writes final/volatile fields we will see a MemBarRelease
*** 1192,1202 ****
// always just translate the loads and stores to ldr<x> and str<x>
// and translate acquire, release and volatile membars to the
// relevant dmb instructions.
//
! // graph traversal helpers used for volatile put/get optimization
// 1) general purpose helpers
// if node n is linked to a parent MemBarNode by an intervening
// Control and Memory ProjNode return the MemBarNode otherwise return
--- 1256,1267 ----
// always just translate the loads and stores to ldr<x> and str<x>
// and translate acquire, release and volatile membars to the
// relevant dmb instructions.
//
! // graph traversal helpers used for volatile put/get and CAS
! // optimization
// 1) general purpose helpers
// if node n is linked to a parent MemBarNode by an intervening
// Control and Memory ProjNode return the MemBarNode otherwise return
*** 1218,1237 ****
mem = n->lookup(TypeFunc::Memory);
} else {
return NULL;
}
! if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj())
return NULL;
membar = ctl->lookup(0);
! if (!membar || !membar->is_MemBar())
return NULL;
! if (mem->lookup(0) != membar)
return NULL;
return membar->as_MemBar();
}
// if n is linked to a child MemBarNode by intervening Control and
--- 1283,1305 ----
mem = n->lookup(TypeFunc::Memory);
} else {
return NULL;
}
! if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
return NULL;
+ }
membar = ctl->lookup(0);
! if (!membar || !membar->is_MemBar()) {
return NULL;
+ }
! if (mem->lookup(0) != membar) {
return NULL;
+ }
return membar->as_MemBar();
}
// if n is linked to a child MemBarNode by intervening Control and
*** 1257,1268 ****
child = x->as_MemBar();
break;
}
}
! if (child == NULL)
return NULL;
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
x = mem->fast_out(i);
// if we see a membar we keep hold of it. we may also see a new
// arena copy of the original but it will appear later
--- 1325,1337 ----
child = x->as_MemBar();
break;
}
}
! if (child == NULL) {
return NULL;
+ }
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
x = mem->fast_out(i);
// if we see a membar we keep hold of it. we may also see a new
// arena copy of the original but it will appear later
*** 1281,1299 ****
bool leading_membar(const MemBarNode *barrier)
{
int opcode = barrier->Opcode();
// if this is a release membar we are ok
! if (opcode == Op_MemBarRelease)
return true;
// if its a cpuorder membar . . .
! if (opcode != Op_MemBarCPUOrder)
return false;
// then the parent has to be a release membar
MemBarNode *parent = parent_membar(barrier);
! if (!parent)
return false;
opcode = parent->Opcode();
return opcode == Op_MemBarRelease;
}
// 2) card mark detection helper
--- 1350,1371 ----
bool leading_membar(const MemBarNode *barrier)
{
int opcode = barrier->Opcode();
// if this is a release membar we are ok
! if (opcode == Op_MemBarRelease) {
return true;
+ }
// if its a cpuorder membar . . .
! if (opcode != Op_MemBarCPUOrder) {
return false;
+ }
// then the parent has to be a release membar
MemBarNode *parent = parent_membar(barrier);
! if (!parent) {
return false;
+ }
opcode = parent->Opcode();
return opcode == Op_MemBarRelease;
}
// 2) card mark detection helper
*** 1312,1326 ****
//
// iii) the node's Mem projection feeds a StoreCM node.
bool is_card_mark_membar(const MemBarNode *barrier)
{
! if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark))
return false;
! if (barrier->Opcode() != Op_MemBarVolatile)
return false;
ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
Node *y = mem->fast_out(i);
--- 1384,1400 ----
//
// iii) the node's Mem projection feeds a StoreCM node.
bool is_card_mark_membar(const MemBarNode *barrier)
{
! if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
return false;
+ }
! if (barrier->Opcode() != Op_MemBarVolatile) {
return false;
+ }
ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
Node *y = mem->fast_out(i);
*** 1331,1342 ****
return false;
}
! // 3) helper predicates to traverse volatile put graphs which may
! // contain GC barrier subgraphs
// Preamble
// --------
//
// for volatile writes we can omit generating barriers and employ a
--- 1405,1416 ----
return false;
}
! // 3) helper predicates to traverse volatile put or CAS graphs which
! // may contain GC barrier subgraphs
// Preamble
// --------
//
// for volatile writes we can omit generating barriers and employ a
*** 1402,1413 ****
//
// It is also possible to perform the card mark conditionally on it
// currently being unmarked in which case the volatile put graph
// will look slightly different
//
! // MemBarRelease
! // MemBarCPUOrder___________________________________________
// || \\ Ctl \ Ctl \ \\ Mem \
// || StoreN/P[mo_release] CastP2X If LoadB |
// | \ / \ |
// | MergeMem . . . StoreB
// | / /
--- 1476,1486 ----
//
// It is also possible to perform the card mark conditionally on it
// currently being unmarked in which case the volatile put graph
// will look slightly different
//
! // MemBarRelease____________________________________________
// || \\ Ctl \ Ctl \ \\ Mem \
// || StoreN/P[mo_release] CastP2X If LoadB |
// | \ / \ |
// | MergeMem . . . StoreB
// | / /
*** 1417,1427 ****
// It is worth noting at this stage that both the above
// configurations can be uniquely identified by checking that the
// memory flow includes the following subgraph:
//
// MemBarRelease
! // MemBarCPUOrder
// | \ . . .
// | StoreX[mo_release] . . .
// | /
// MergeMem
// |
--- 1490,1500 ----
// It is worth noting at this stage that both the above
// configurations can be uniquely identified by checking that the
// memory flow includes the following subgraph:
//
// MemBarRelease
! // {MemBarCPUOrder}
// | \ . . .
// | StoreX[mo_release] . . .
// | /
// MergeMem
// |
*** 1429,1440 ****
//
// This is referred to as a *normal* subgraph. It can easily be
// detected starting from any candidate MemBarRelease,
// StoreX[mo_release] or MemBarVolatile.
//
// the code below uses two helper predicates, leading_to_normal and
! // normal_to_leading to identify this configuration, one validating
// the layout starting from the top membar and searching down and
// the other validating the layout starting from the lower membar
// and searching up.
//
// There are two special case GC configurations when a normal graph
--- 1502,1553 ----
//
// This is referred to as a *normal* subgraph. It can easily be
// detected starting from any candidate MemBarRelease,
// StoreX[mo_release] or MemBarVolatile.
//
+ // A simple variation on this normal case occurs for an unsafe CAS
+ // operation. The basic graph for a non-object CAS is
+ //
+ // MemBarRelease
+ // ||
+ // MemBarCPUOrder
+ // || \\ . . .
+ // || CompareAndSwapX
+ // || |
+ // || SCMemProj
+ // | \ /
+ // | MergeMem
+ // | /
+ // MemBarCPUOrder
+ // ||
+ // MemBarAcquire
+ //
+ // The same basic variations on this arrangement (mutatis mutandis)
+ // occur when a card mark is introduced. i.e. we se the same basic
+ // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
+ // tail of the graph is a pair comprising a MemBarCPUOrder +
+ // MemBarAcquire.
+ //
+ // So, in the case of a CAS the normal graph has the variant form
+ //
+ // MemBarRelease
+ // MemBarCPUOrder
+ // | \ . . .
+ // | CompareAndSwapX . . .
+ // | |
+ // | SCMemProj
+ // | / . . .
+ // MergeMem
+ // |
+ // MemBarCPUOrder
+ // MemBarAcquire
+ //
+ // This graph can also easily be detected starting from any
+ // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
+ //
// the code below uses two helper predicates, leading_to_normal and
! // normal_to_leading to identify these normal graphs, one validating
// the layout starting from the top membar and searching down and
// the other validating the layout starting from the lower membar
// and searching up.
//
// There are two special case GC configurations when a normal graph
*** 1448,1458 ****
// employs a post-write GC barrier while G1 employs both a pre- and
// post-write GC barrier. Of course the extra nodes may be absent --
// they are only inserted for object puts. This significantly
// complicates the task of identifying whether a MemBarRelease,
// StoreX[mo_release] or MemBarVolatile forms part of a volatile put
! // when using these GC configurations (see below).
//
// In both cases the post-write subtree includes an auxiliary
// MemBarVolatile (StoreLoad barrier) separating the object put and
// the read of the corresponding card. This poses two additional
// problems.
--- 1561,1573 ----
// employs a post-write GC barrier while G1 employs both a pre- and
// post-write GC barrier. Of course the extra nodes may be absent --
// they are only inserted for object puts. This significantly
// complicates the task of identifying whether a MemBarRelease,
// StoreX[mo_release] or MemBarVolatile forms part of a volatile put
! // when using these GC configurations (see below). It adds similar
! // complexity to the task of identifying whether a MemBarRelease,
! // CompareAndSwapX or MemBarAcquire forms part of a CAS.
//
// In both cases the post-write subtree includes an auxiliary
// MemBarVolatile (StoreLoad barrier) separating the object put and
// the read of the corresponding card. This poses two additional
// problems.
*** 1487,1497 ****
// A CMS GC post-barrier wraps its card write (StoreCM) inside an If
// which selects conditonal execution based on the value loaded
// (LoadB) from the card. Ctl and Mem are fed to the If via an
// intervening StoreLoad barrier (MemBarVolatile).
//
! // So, with CMS we may see a node graph which looks like this
//
// MemBarRelease
// MemBarCPUOrder_(leading)__________________
// C | M \ \\ C \
// | \ StoreN/P[mo_release] CastP2X
--- 1602,1613 ----
// A CMS GC post-barrier wraps its card write (StoreCM) inside an If
// which selects conditonal execution based on the value loaded
// (LoadB) from the card. Ctl and Mem are fed to the If via an
// intervening StoreLoad barrier (MemBarVolatile).
//
! // So, with CMS we may see a node graph for a volatile object store
! // which looks like this
//
// MemBarRelease
// MemBarCPUOrder_(leading)__________________
// C | M \ \\ C \
// | \ StoreN/P[mo_release] CastP2X
*** 1522,1531 ****
--- 1638,1696 ----
// card mark membar. The trailing MergeMem merges the AliasIdxBot
// Mem slice from the card mark membar and the AliasIdxRaw slice
// from the StoreCM into the trailing membar (n.b. the latter
// proceeds via a Phi associated with the If region).
//
+ // The graph for a CAS varies slightly, the obvious difference being
+ // that the StoreN/P node is replaced by a CompareAndSwapP/N node
+ // and the trailing MemBarVolatile by a MemBarCPUOrder +
+ // MemBarAcquire pair. The other important difference is that the
+ // CompareAndSwap node's SCMemProj is not merged into the card mark
+ // membar - it still feeds the trailing MergeMem. This also means
+ // that the card mark membar receives its Mem feed directly from the
+ // leading membar rather than via a MergeMem.
+ //
+ // MemBarRelease
+ // MemBarCPUOrder__(leading)_________________________
+ // || \\ C \
+ // MemBarVolatile (card mark) CompareAndSwapN/P CastP2X
+ // C | || M | |
+ // | LoadB | ______/|
+ // | | | / |
+ // | Cmp | / SCMemProj
+ // | / | / |
+ // If | / /
+ // | \ | / /
+ // IfFalse IfTrue | / /
+ // \ / \ |/ prec /
+ // \ / StoreCM /
+ // \ / | /
+ // Region . . . /
+ // | \ /
+ // | . . . \ / Bot
+ // | MergeMem
+ // | |
+ // MemBarCPUOrder
+ // MemBarAcquire (trailing)
+ //
+ // This has a slightly different memory subgraph to the one seen
+ // previously but the core of it is the same as for the CAS normal
+ // sungraph
+ //
+ // MemBarRelease
+ // MemBarCPUOrder____
+ // || \ . . .
+ // MemBarVolatile CompareAndSwapX . . .
+ // | \ |
+ // . . . SCMemProj
+ // | / . . .
+ // MergeMem
+ // |
+ // MemBarCPUOrder
+ // MemBarAcquire
+ //
+ //
// G1 is quite a lot more complicated. The nodes inserted on behalf
// of G1 may comprise: a pre-write graph which adds the old value to
// the SATB queue; the releasing store itself; and, finally, a
// post-write graph which performs a card mark.
//
*** 1573,1588 ****
// MemBarVolatile (trailing)
//
// n.b. the LoadB in this subgraph is not the card read -- it's a
// read of the SATB queue active flag.
//
// The G1 post-write subtree is also optional, this time when the
// new value being written is either null or can be identified as a
// newly allocated (young gen) object with no intervening control
// flow. The latter cannot happen but the former may, in which case
! // the card mark membar is omitted and the memory feeds from the
! // leading membar and the StoreN/P are merged direct into the
// trailing membar as per the normal subgraph. So, the only special
// case which arises is when the post-write subgraph is generated.
//
// The kernel of the post-write G1 subgraph is the card mark itself
// which includes a card mark memory barrier (MemBarVolatile), a
--- 1738,1757 ----
// MemBarVolatile (trailing)
//
// n.b. the LoadB in this subgraph is not the card read -- it's a
// read of the SATB queue active flag.
//
+ // Once again the CAS graph is a minor variant on the above with the
+ // expected substitutions of CompareAndSawpX for StoreN/P and
+ // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
+ //
// The G1 post-write subtree is also optional, this time when the
// new value being written is either null or can be identified as a
// newly allocated (young gen) object with no intervening control
// flow. The latter cannot happen but the former may, in which case
! // the card mark membar is omitted and the memory feeds form the
! // leading membar and the SToreN/P are merged direct into the
// trailing membar as per the normal subgraph. So, the only special
// case which arises is when the post-write subgraph is generated.
//
// The kernel of the post-write G1 subgraph is the card mark itself
// which includes a card mark memory barrier (MemBarVolatile), a
*** 1666,1791 ****
// membar. Each Phi corresponds to one of the Ifs which may skip
// around the card mark membar. So when the If implementing the NULL
// value check has been elided the total number of Phis is 2
// otherwise it is 3.
//
// So, the upshot is that in all cases the volatile put graph will
// include a *normal* memory subgraph betwen the leading membar and
! // its child membar. When that child is not a card mark membar then
! // it marks the end of a volatile put subgraph. If the child is a
! // card mark membar then the normal subgraph will form part of a
! // volatile put subgraph if and only if the child feeds an
! // AliasIdxBot Mem feed to a trailing barrier via a MergeMem. That
! // feed is either direct (for CMS) or via 2 or 3 Phi nodes merging
! // the leading barrier memory flow (for G1).
//
// The predicates controlling generation of instructions for store
// and barrier nodes employ a few simple helper functions (described
! // below) which identify the presence or absence of these subgraph
! // configurations and provide a means of traversing from one node in
! // the subgraph to another.
// leading_to_normal
//
! //graph traversal helper which detects the normal case Mem feed
! // from a release membar (or, optionally, its cpuorder child) to a
! // dependent volatile membar i.e. it ensures that the following Mem
! // flow subgraph is present.
//
// MemBarRelease
! // MemBarCPUOrder
// | \ . . .
// | StoreN/P[mo_release] . . .
// | /
// MergeMem
// |
! // MemBarVolatile
//
! // if the correct configuration is present returns the volatile
// membar otherwise NULL.
//
// the input membar is expected to be either a cpuorder membar or a
// release membar. in the latter case it should not have a cpu membar
// child.
//
! // the returned membar may be a card mark membar rather than a
! // trailing membar.
MemBarNode *leading_to_normal(MemBarNode *leading)
{
assert((leading->Opcode() == Op_MemBarRelease ||
leading->Opcode() == Op_MemBarCPUOrder),
"expecting a volatile or cpuroder membar!");
// check the mem flow
ProjNode *mem = leading->proj_out(TypeFunc::Memory);
! if (!mem)
return NULL;
Node *x = NULL;
StoreNode * st = NULL;
MergeMemNode *mm = NULL;
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
x = mem->fast_out(i);
if (x->is_MergeMem()) {
! if (mm != NULL)
return NULL;
// two merge mems is one too many
mm = x->as_MergeMem();
} else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
! // two releasing stores is one too many
! if (st != NULL)
return NULL;
st = x->as_Store();
}
}
! if (!mm || !st)
return NULL;
! bool found = false;
! // ensure the store feeds the merge
for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
if (st->fast_out(i) == mm) {
! found = true;
break;
}
}
!
! if (!found)
return NULL;
! MemBarNode *mbvol = NULL;
! // ensure the merge feeds a volatile membar
for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
x = mm->fast_out(i);
! if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
! mbvol = x->as_MemBar();
break;
}
}
! return mbvol;
}
// normal_to_leading
//
// graph traversal helper which detects the normal case Mem feed
// from either a card mark or a trailing membar to a preceding
// release membar (optionally its cpuorder child) i.e. it ensures
! // that the following Mem flow subgraph is present.
//
// MemBarRelease
// MemBarCPUOrder {leading}
// | \ . . .
// | StoreN/P[mo_release] . . .
// | /
// MergeMem
// |
! // MemBarVolatile
//
// this predicate checks for the same flow as the previous predicate
// but starting from the bottom rather than the top.
//
// if the configuration is present returns the cpuorder member for
--- 1835,2058 ----
// membar. Each Phi corresponds to one of the Ifs which may skip
// around the card mark membar. So when the If implementing the NULL
// value check has been elided the total number of Phis is 2
// otherwise it is 3.
//
+ // The CAS graph when using G1GC also includes a pre-write subgraph
+ // and an optional post-write subgraph. Teh sam evarioations are
+ // introduced as for CMS with conditional card marking i.e. the
+ // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
+ // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
+ // Mem feed from the CompareAndSwapP/N includes a precedence
+ // dependency feed to the StoreCM and a feed via an SCMemProj to the
+ // trailing membar. So, as before the configuration includes the
+ // normal CAS graph as a subgraph of the memory flow.
+ //
// So, the upshot is that in all cases the volatile put graph will
// include a *normal* memory subgraph betwen the leading membar and
! // its child membar, either a volatile put graph (including a
! // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
! // When that child is not a card mark membar then it marks the end
! // of the volatile put or CAS subgraph. If the child is a card mark
! // membar then the normal subgraph will form part of a volatile put
! // subgraph if and only if the child feeds an AliasIdxBot Mem feed
! // to a trailing barrier via a MergeMem. That feed is either direct
! // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
! // memory flow (for G1).
//
// The predicates controlling generation of instructions for store
// and barrier nodes employ a few simple helper functions (described
! // below) which identify the presence or absence of all these
! // subgraph configurations and provide a means of traversing from
! // one node in the subgraph to another.
!
! // is_CAS(int opcode)
! //
! // return true if opcode is one of the possible CompareAndSwapX
! // values otherwise false.
!
! bool is_CAS(int opcode)
! {
! return (opcode == Op_CompareAndSwapI ||
! opcode == Op_CompareAndSwapL ||
! opcode == Op_CompareAndSwapN ||
! opcode == Op_CompareAndSwapP);
! }
// leading_to_normal
//
! //graph traversal helper which detects the normal case Mem feed from
! // a release membar (or, optionally, its cpuorder child) to a
! // dependent volatile membar i.e. it ensures that one or other of
! // the following Mem flow subgraph is present.
//
// MemBarRelease
! // MemBarCPUOrder {leading}
// | \ . . .
// | StoreN/P[mo_release] . . .
// | /
// MergeMem
// |
! // MemBarVolatile {trailing or card mark}
! //
! // MemBarRelease
! // MemBarCPUOrder {leading}
! // | \ . . .
! // | CompareAndSwapX . . .
! // |
! // . . . SCMemProj
! // \ |
! // | MergeMem
! // | /
! // MemBarCPUOrder
! // MemBarAcquire {trailing}
//
! // if the correct configuration is present returns the trailing
// membar otherwise NULL.
//
// the input membar is expected to be either a cpuorder membar or a
// release membar. in the latter case it should not have a cpu membar
// child.
//
! // the returned value may be a card mark or trailing membar
! //
MemBarNode *leading_to_normal(MemBarNode *leading)
{
assert((leading->Opcode() == Op_MemBarRelease ||
leading->Opcode() == Op_MemBarCPUOrder),
"expecting a volatile or cpuroder membar!");
// check the mem flow
ProjNode *mem = leading->proj_out(TypeFunc::Memory);
! if (!mem) {
return NULL;
+ }
Node *x = NULL;
StoreNode * st = NULL;
+ LoadStoreNode *cas = NULL;
MergeMemNode *mm = NULL;
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
x = mem->fast_out(i);
if (x->is_MergeMem()) {
! if (mm != NULL) {
return NULL;
+ }
// two merge mems is one too many
mm = x->as_MergeMem();
} else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
! // two releasing stores/CAS nodes is one too many
! if (st != NULL || cas != NULL) {
return NULL;
+ }
st = x->as_Store();
+ } else if (is_CAS(x->Opcode())) {
+ if (st != NULL || cas != NULL) {
+ return NULL;
+ }
+ cas = x->as_LoadStore();
}
}
! // must have a store or a cas
! if (!st && !cas) {
return NULL;
+ }
! // must have a merge if we also have st
! if (st && !mm) {
! return NULL;
! }
!
! Node *y = NULL;
! if (cas) {
! // look for an SCMemProj
! for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
! x = cas->fast_out(i);
! if (x->is_Proj()) {
! y = x;
! break;
! }
! }
! if (y == NULL) {
! return NULL;
! }
! // the proj must feed a MergeMem
! for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
! x = y->fast_out(i);
! if (x->is_MergeMem()) {
! mm = x->as_MergeMem();
! break;
! }
! }
! if (mm == NULL)
! return NULL;
! } else {
! // ensure the store feeds the existing mergemem;
for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
if (st->fast_out(i) == mm) {
! y = st;
break;
}
}
! if (y == NULL) {
return NULL;
+ }
+ }
! MemBarNode *mbar = NULL;
! // ensure the merge feeds to the expected type of membar
for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
x = mm->fast_out(i);
! if (x->is_MemBar()) {
! int opcode = x->Opcode();
! if (opcode == Op_MemBarVolatile && st) {
! mbar = x->as_MemBar();
! } else if (cas && opcode == Op_MemBarCPUOrder) {
! MemBarNode *y = x->as_MemBar();
! y = child_membar(y);
! if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
! mbar = y;
! }
! }
break;
}
}
! return mbar;
}
// normal_to_leading
//
// graph traversal helper which detects the normal case Mem feed
// from either a card mark or a trailing membar to a preceding
// release membar (optionally its cpuorder child) i.e. it ensures
! // that one or other of the following Mem flow subgraphs is present.
//
// MemBarRelease
// MemBarCPUOrder {leading}
// | \ . . .
// | StoreN/P[mo_release] . . .
// | /
// MergeMem
// |
! // MemBarVolatile {card mark or trailing}
! //
! // MemBarRelease
! // MemBarCPUOrder {leading}
! // | \ . . .
! // | CompareAndSwapX . . .
! // |
! // . . . SCMemProj
! // \ |
! // | MergeMem
! // | /
! // MemBarCPUOrder
! // MemBarAcquire {trailing}
//
// this predicate checks for the same flow as the previous predicate
// but starting from the bottom rather than the top.
//
// if the configuration is present returns the cpuorder member for
*** 1795,1850 ****
// need not be a card mark membar.
MemBarNode *normal_to_leading(const MemBarNode *barrier)
{
// input must be a volatile membar
! assert(barrier->Opcode() == Op_MemBarVolatile, "expecting a volatile membar");
Node *x;
// the Mem feed to the membar should be a merge
! x = barrier->in(TypeFunc::Memory);
if (!x->is_MergeMem())
return NULL;
MergeMemNode *mm = x->as_MergeMem();
! // the AliasIdxBot slice should be another MemBar projection
x = mm->in(Compile::AliasIdxBot);
// ensure this is a non control projection
! if (!x->is_Proj() || x->is_CFG())
return NULL;
// if it is fed by a membar that's the one we want
x = x->in(0);
! if (!x->is_MemBar())
return NULL;
MemBarNode *leading = x->as_MemBar();
// reject invalid candidates
! if (!leading_membar(leading))
return NULL;
! // ok, we have a leading ReleaseMembar, now for the sanity clauses
! // the leading membar must feed Mem to a releasing store
ProjNode *mem = leading->proj_out(TypeFunc::Memory);
StoreNode *st = NULL;
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
x = mem->fast_out(i);
if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
st = x->as_Store();
! break;
}
}
! if (st == NULL)
return NULL;
! // the releasing store has to feed the same merge
for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
! if (st->fast_out(i) == mm)
return leading;
}
return NULL;
}
// card_mark_to_trailing
--- 2062,2182 ----
// need not be a card mark membar.
MemBarNode *normal_to_leading(const MemBarNode *barrier)
{
// input must be a volatile membar
! assert((barrier->Opcode() == Op_MemBarVolatile ||
! barrier->Opcode() == Op_MemBarAcquire),
! "expecting a volatile or an acquire membar");
Node *x;
+ bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
+
+ // if we have an acquire membar then it must be fed via a CPUOrder
+ // membar
+
+ if (is_cas) {
+ // skip to parent barrier which must be a cpuorder
+ x = parent_membar(barrier);
+ if (x->Opcode() != Op_MemBarCPUOrder)
+ return NULL;
+ } else {
+ // start from the supplied barrier
+ x = (Node *)barrier;
+ }
// the Mem feed to the membar should be a merge
! x = x ->in(TypeFunc::Memory);
if (!x->is_MergeMem())
return NULL;
MergeMemNode *mm = x->as_MergeMem();
! if (is_cas) {
! // the merge should be fed from the CAS via an SCMemProj node
! x = NULL;
! for (uint idx = 1; idx < mm->req(); idx++) {
! if (mm->in(idx)->Opcode() == Op_SCMemProj) {
! x = mm->in(idx);
! break;
! }
! }
! if (x == NULL) {
! return NULL;
! }
! // check for a CAS feeding this proj
! x = x->in(0);
! int opcode = x->Opcode();
! if (!is_CAS(opcode)) {
! return NULL;
! }
! // the CAS should get its mem feed from the leading membar
! x = x->in(MemNode::Memory);
! } else {
! // the merge should get its Bottom mem feed from the leading membar
x = mm->in(Compile::AliasIdxBot);
+ }
+
// ensure this is a non control projection
! if (!x->is_Proj() || x->is_CFG()) {
return NULL;
+ }
// if it is fed by a membar that's the one we want
x = x->in(0);
! if (!x->is_MemBar()) {
return NULL;
+ }
MemBarNode *leading = x->as_MemBar();
// reject invalid candidates
! if (!leading_membar(leading)) {
return NULL;
+ }
! // ok, we have a leading membar, now for the sanity clauses
! // the leading membar must feed Mem to a releasing store or CAS
ProjNode *mem = leading->proj_out(TypeFunc::Memory);
StoreNode *st = NULL;
+ LoadStoreNode *cas = NULL;
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
x = mem->fast_out(i);
if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
+ // two stores or CASes is one too many
+ if (st != NULL || cas != NULL) {
+ return NULL;
+ }
st = x->as_Store();
! } else if (is_CAS(x->Opcode())) {
! if (st != NULL || cas != NULL) {
! return NULL;
! }
! cas = x->as_LoadStore();
}
}
!
! // we should not have both a store and a cas
! if (st == NULL & cas == NULL) {
return NULL;
+ }
! if (st == NULL) {
! // nothing more to check
! return leading;
! } else {
! // we should not have a store if we started from an acquire
! if (is_cas) {
! return NULL;
! }
!
! // the store should feed the merge we used to get here
for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
! if (st->fast_out(i) == mm) {
return leading;
}
+ }
+ }
return NULL;
}
// card_mark_to_trailing
*** 1863,1874 ****
// | |
// | . . .
// Bot | /
// MergeMem
// |
! // MemBarVolatile (trailing)
! //
//
// 2)
// MemBarRelease/CPUOrder (leading)
// |
// |
--- 2195,2206 ----
// | |
// | . . .
// Bot | /
// MergeMem
// |
! // |
! // MemBarVolatile {trailing}
//
// 2)
// MemBarRelease/CPUOrder (leading)
// |
// |
*** 1882,1892 ****
// \ /
// Phi . . .
// Bot | /
// MergeMem
// |
! // MemBarVolatile (trailing)
//
// 3)
// MemBarRelease/CPUOrder (leading)
// |
// |\
--- 2214,2225 ----
// \ /
// Phi . . .
// Bot | /
// MergeMem
// |
! // MemBarVolatile {trailing}
! //
//
// 3)
// MemBarRelease/CPUOrder (leading)
// |
// |\
*** 1903,1913 ****
// \ /
// Phi . . .
// Bot | /
// MergeMem
// |
! // MemBarVolatile (trailing)
//
// configuration 1 is only valid if UseConcMarkSweepGC &&
// UseCondCardMark
//
// configurations 2 and 3 are only valid if UseG1GC.
--- 2236,2247 ----
// \ /
// Phi . . .
// Bot | /
// MergeMem
// |
! // |
! // MemBarVolatile {trailing}
//
// configuration 1 is only valid if UseConcMarkSweepGC &&
// UseCondCardMark
//
// configurations 2 and 3 are only valid if UseG1GC.
*** 1953,1964 ****
if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
phi = x->as_Phi();
break;
}
}
! if (!phi)
return NULL;
// look for another merge below this phi
feed = phi;
} else {
// couldn't find a merge
return NULL;
--- 2287,2299 ----
if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
phi = x->as_Phi();
break;
}
}
! if (!phi) {
return NULL;
+ }
// look for another merge below this phi
feed = phi;
} else {
// couldn't find a merge
return NULL;
*** 1967,1977 ****
// sanity check this feed turns up as the expected slice
assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
MemBarNode *trailing = NULL;
! // be sure we have a volatile membar below the merge
for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
x = mm->fast_out(i);
if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
trailing = x->as_MemBar();
break;
--- 2302,2312 ----
// sanity check this feed turns up as the expected slice
assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
MemBarNode *trailing = NULL;
! // be sure we have a trailing membar the merge
for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
x = mm->fast_out(i);
if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
trailing = x->as_MemBar();
break;
*** 1982,2009 ****
}
// trailing_to_card_mark
//
// graph traversal helper which detects extra, non-normal Mem feed
! // from a trailing membar to a preceding card mark volatile membar
! // i.e. it identifies whether one of the three possible extra GC
! // post-write Mem flow subgraphs is present
//
// this predicate checks for the same flow as the previous predicate
// but starting from the bottom rather than the top.
//
! // if the configurationis present returns the card mark membar
// otherwise NULL
MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
{
! assert(!is_card_mark_membar(trailing), "not expecting a card mark membar");
- Node *x = trailing->in(TypeFunc::Memory);
// the Mem feed to the membar should be a merge
! if (!x->is_MergeMem())
return NULL;
MergeMemNode *mm = x->as_MergeMem();
x = mm->in(Compile::AliasIdxBot);
// with G1 we may possibly see a Phi or two before we see a Memory
--- 2317,2352 ----
}
// trailing_to_card_mark
//
// graph traversal helper which detects extra, non-normal Mem feed
! // from a trailing volatile membar to a preceding card mark volatile
! // membar i.e. it identifies whether one of the three possible extra
! // GC post-write Mem flow subgraphs is present
//
// this predicate checks for the same flow as the previous predicate
// but starting from the bottom rather than the top.
//
! // if the configuration is present returns the card mark membar
// otherwise NULL
+ //
+ // n.b. the supplied membar is expected to be a trailing
+ // MemBarVolatile i.e. the caller must ensure the input node has the
+ // correct opcode
MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
{
! assert(trailing->Opcode() == Op_MemBarVolatile,
! "expecting a volatile membar");
! assert(!is_card_mark_membar(trailing),
! "not expecting a card mark membar");
// the Mem feed to the membar should be a merge
! Node *x = trailing->in(TypeFunc::Memory);
! if (!x->is_MergeMem()) {
return NULL;
+ }
MergeMemNode *mm = x->as_MergeMem();
x = mm->in(Compile::AliasIdxBot);
// with G1 we may possibly see a Phi or two before we see a Memory
*** 2052,2076 ****
return NULL;
}
}
// the proj has to come from the card mark membar
x = x->in(0);
! if (!x->is_MemBar())
return NULL;
MemBarNode *card_mark_membar = x->as_MemBar();
! if (!is_card_mark_membar(card_mark_membar))
return NULL;
return card_mark_membar;
}
// trailing_to_leading
//
// graph traversal helper which checks the Mem flow up the graph
! // from a (non-card mark) volatile membar attempting to locate and
// return an associated leading membar. it first looks for a
// subgraph in the normal configuration (relying on helper
// normal_to_leading). failing that it then looks for one of the
// possible post-write card mark subgraphs linking the trailing node
// to a the card mark membar (relying on helper
--- 2395,2421 ----
return NULL;
}
}
// the proj has to come from the card mark membar
x = x->in(0);
! if (!x->is_MemBar()) {
return NULL;
+ }
MemBarNode *card_mark_membar = x->as_MemBar();
! if (!is_card_mark_membar(card_mark_membar)) {
return NULL;
+ }
return card_mark_membar;
}
// trailing_to_leading
//
// graph traversal helper which checks the Mem flow up the graph
! // from a (non-card mark) trailing membar attempting to locate and
// return an associated leading membar. it first looks for a
// subgraph in the normal configuration (relying on helper
// normal_to_leading). failing that it then looks for one of the
// possible post-write card mark subgraphs linking the trailing node
// to a the card mark membar (relying on helper
*** 2079,2116 ****
// predicate normal_to_leading).
//
// if the configuration is valid returns the cpuorder member for
// preference or when absent the release membar otherwise NULL.
//
! // n.b. the input membar is expected to be a volatile membar but
! // must *not* be a card mark membar.
MemBarNode *trailing_to_leading(const MemBarNode *trailing)
{
! assert(!is_card_mark_membar(trailing), "not expecting a card mark membar");
MemBarNode *leading = normal_to_leading(trailing);
! if (leading)
return leading;
MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
! if (!card_mark_membar)
return NULL;
return normal_to_leading(card_mark_membar);
}
// predicates controlling emit of ldr<x>/ldar<x> and associated dmb
bool unnecessary_acquire(const Node *barrier)
{
! // assert barrier->is_MemBar();
! if (UseBarriersForVolatile)
// we need to plant a dmb
return false;
// a volatile read derived from bytecode (or also from an inlined
// SHA field read via LibraryCallKit::load_field_from_object)
// manifests as a LoadX[mo_acquire] followed by an acquire membar
// with a bogus read dependency on it's preceding load. so in those
--- 2424,2476 ----
// predicate normal_to_leading).
//
// if the configuration is valid returns the cpuorder member for
// preference or when absent the release membar otherwise NULL.
//
! // n.b. the input membar is expected to be either a volatile or
! // acquire membar but in the former case must *not* be a card mark
! // membar.
MemBarNode *trailing_to_leading(const MemBarNode *trailing)
{
! assert((trailing->Opcode() == Op_MemBarAcquire ||
! trailing->Opcode() == Op_MemBarVolatile),
! "expecting an acquire or volatile membar");
! assert((trailing->Opcode() != Op_MemBarVolatile ||
! !is_card_mark_membar(trailing)),
! "not expecting a card mark membar");
MemBarNode *leading = normal_to_leading(trailing);
! if (leading) {
return leading;
+ }
+
+ // nothing more to do if this is an acquire
+ if (trailing->Opcode() == Op_MemBarAcquire) {
+ return NULL;
+ }
MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
! if (!card_mark_membar) {
return NULL;
+ }
return normal_to_leading(card_mark_membar);
}
// predicates controlling emit of ldr<x>/ldar<x> and associated dmb
bool unnecessary_acquire(const Node *barrier)
{
! assert(barrier->is_MemBar(), "expecting a membar");
!
! if (UseBarriersForVolatile) {
// we need to plant a dmb
return false;
+ }
// a volatile read derived from bytecode (or also from an inlined
// SHA field read via LibraryCallKit::load_field_from_object)
// manifests as a LoadX[mo_acquire] followed by an acquire membar
// with a bogus read dependency on it's preceding load. so in those
*** 2138,2149 ****
// |Parms
// MemBarAcquire*
//
// where * tags node we were passed
// and |k means input k
! if (x->is_DecodeNarrowPtr())
x = x->in(1);
return (x->is_Load() && x->as_Load()->is_acquire());
}
// now check for an unsafe volatile get
--- 2498,2510 ----
// |Parms
// MemBarAcquire*
//
// where * tags node we were passed
// and |k means input k
! if (x->is_DecodeNarrowPtr()) {
x = x->in(1);
+ }
return (x->is_Load() && x->as_Load()->is_acquire());
}
// now check for an unsafe volatile get
*** 2165,2176 ****
MemBarNode *parent = parent_membar(barrier);
if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
return false;
ctl = parent->proj_out(TypeFunc::Control);
mem = parent->proj_out(TypeFunc::Memory);
! if (!ctl || !mem)
return false;
// ensure the proj nodes both feed a LoadX[mo_acquire]
LoadNode *ld = NULL;
for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
x = ctl->fast_out(i);
// if we see a load we keep hold of it and stop searching
--- 2526,2538 ----
MemBarNode *parent = parent_membar(barrier);
if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
return false;
ctl = parent->proj_out(TypeFunc::Control);
mem = parent->proj_out(TypeFunc::Memory);
! if (!ctl || !mem) {
return false;
+ }
// ensure the proj nodes both feed a LoadX[mo_acquire]
LoadNode *ld = NULL;
for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
x = ctl->fast_out(i);
// if we see a load we keep hold of it and stop searching
*** 2178,2219 ****
ld = x->as_Load();
break;
}
}
// it must be an acquiring load
! if (! ld || ! ld->is_acquire())
! return false;
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
x = mem->fast_out(i);
// if we see the same load we drop it and stop searching
if (x == ld) {
ld = NULL;
break;
}
}
// we must have dropped the load
! if (ld)
! return false;
// check for a child cpuorder membar
MemBarNode *child = child_membar(barrier->as_MemBar());
! if (!child || child->Opcode() != Op_MemBarCPUOrder)
! return false;
!
return true;
}
bool needs_acquiring_load(const Node *n)
{
! // assert n->is_Load();
! if (UseBarriersForVolatile)
// we use a normal load and a dmb
return false;
LoadNode *ld = n->as_Load();
! if (!ld->is_acquire())
return false;
// check if this load is feeding an acquire membar
//
// LoadX[mo_acquire]
// { |1 }
--- 2540,2589 ----
ld = x->as_Load();
break;
}
}
// it must be an acquiring load
! if (ld && ld->is_acquire()) {
!
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
x = mem->fast_out(i);
// if we see the same load we drop it and stop searching
if (x == ld) {
ld = NULL;
break;
}
}
// we must have dropped the load
! if (ld == NULL) {
// check for a child cpuorder membar
MemBarNode *child = child_membar(barrier->as_MemBar());
! if (child && child->Opcode() != Op_MemBarCPUOrder)
return true;
+ }
+ }
+
+ // final option for unnecessary mebar is that it is a trailing node
+ // belonging to a CAS
+
+ MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
+
+ return leading != NULL;
}
bool needs_acquiring_load(const Node *n)
{
! assert(n->is_Load(), "expecting a load");
! if (UseBarriersForVolatile) {
// we use a normal load and a dmb
return false;
+ }
LoadNode *ld = n->as_Load();
! if (!ld->is_acquire()) {
return false;
+ }
// check if this load is feeding an acquire membar
//
// LoadX[mo_acquire]
// { |1 }
*** 2259,2295 ****
MemBarNode *membar;
membar = parent_membar(ld);
! if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
return false;
// ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
membar = child_membar(membar);
! if (!membar || !membar->Opcode() == Op_MemBarAcquire)
return false;
membar = child_membar(membar);
! if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
return false;
return true;
}
bool unnecessary_release(const Node *n)
{
assert((n->is_MemBar() &&
n->Opcode() == Op_MemBarRelease),
"expecting a release membar");
! if (UseBarriersForVolatile)
// we need to plant a dmb
return false;
// if there is a dependent CPUOrder barrier then use that as the
// leading
MemBarNode *barrier = n->as_MemBar();
--- 2629,2669 ----
MemBarNode *membar;
membar = parent_membar(ld);
! if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
return false;
+ }
// ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
membar = child_membar(membar);
! if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
return false;
+ }
membar = child_membar(membar);
! if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
return false;
+ }
return true;
}
bool unnecessary_release(const Node *n)
{
assert((n->is_MemBar() &&
n->Opcode() == Op_MemBarRelease),
"expecting a release membar");
! if (UseBarriersForVolatile) {
// we need to plant a dmb
return false;
+ }
// if there is a dependent CPUOrder barrier then use that as the
// leading
MemBarNode *barrier = n->as_MemBar();
*** 2301,2336 ****
}
// must start with a normal feed
MemBarNode *child_barrier = leading_to_normal(barrier);
! if (!child_barrier)
return false;
! if (!is_card_mark_membar(child_barrier))
// this is the trailing membar and we are done
return true;
// must be sure this card mark feeds a trailing membar
MemBarNode *trailing = card_mark_to_trailing(child_barrier);
return (trailing != NULL);
}
bool unnecessary_volatile(const Node *n)
{
// assert n->is_MemBar();
! if (UseBarriersForVolatile)
// we need to plant a dmb
return false;
MemBarNode *mbvol = n->as_MemBar();
// first we check if this is part of a card mark. if so then we have
// to generate a StoreLoad barrier
! if (is_card_mark_membar(mbvol))
return false;
// ok, if it's not a card mark then we still need to check if it is
// a trailing membar of a volatile put hgraph.
return (trailing_to_leading(mbvol) != NULL);
--- 2675,2714 ----
}
// must start with a normal feed
MemBarNode *child_barrier = leading_to_normal(barrier);
! if (!child_barrier) {
return false;
+ }
! if (!is_card_mark_membar(child_barrier)) {
// this is the trailing membar and we are done
return true;
+ }
// must be sure this card mark feeds a trailing membar
MemBarNode *trailing = card_mark_to_trailing(child_barrier);
return (trailing != NULL);
}
bool unnecessary_volatile(const Node *n)
{
// assert n->is_MemBar();
! if (UseBarriersForVolatile) {
// we need to plant a dmb
return false;
+ }
MemBarNode *mbvol = n->as_MemBar();
// first we check if this is part of a card mark. if so then we have
// to generate a StoreLoad barrier
! if (is_card_mark_membar(mbvol)) {
return false;
+ }
// ok, if it's not a card mark then we still need to check if it is
// a trailing membar of a volatile put hgraph.
return (trailing_to_leading(mbvol) != NULL);
*** 2339,2397 ****
// predicates controlling emit of str<x>/stlr<x> and associated dmbs
bool needs_releasing_store(const Node *n)
{
// assert n->is_Store();
! if (UseBarriersForVolatile)
// we use a normal store and dmb combination
return false;
StoreNode *st = n->as_Store();
// the store must be marked as releasing
! if (!st->is_release())
return false;
// the store must be fed by a membar
Node *x = st->lookup(StoreNode::Memory);
! if (! x || !x->is_Proj())
return false;
ProjNode *proj = x->as_Proj();
x = proj->lookup(0);
! if (!x || !x->is_MemBar())
return false;
MemBarNode *barrier = x->as_MemBar();
// if the barrier is a release membar or a cpuorder mmebar fed by a
// release membar then we need to check whether that forms part of a
// volatile put graph.
// reject invalid candidates
! if (!leading_membar(barrier))
return false;
// does this lead a normal subgraph?
MemBarNode *mbvol = leading_to_normal(barrier);
! if (!mbvol)
return false;
// all done unless this is a card mark
! if (!is_card_mark_membar(mbvol))
return true;
// we found a card mark -- just make sure we have a trailing barrier
return (card_mark_to_trailing(mbvol) != NULL);
}
// predicate controlling translation of StoreCM
//
// returns true if a StoreStore must precede the card write otherwise
// false
--- 2717,2831 ----
// predicates controlling emit of str<x>/stlr<x> and associated dmbs
bool needs_releasing_store(const Node *n)
{
// assert n->is_Store();
! if (UseBarriersForVolatile) {
// we use a normal store and dmb combination
return false;
+ }
StoreNode *st = n->as_Store();
// the store must be marked as releasing
! if (!st->is_release()) {
return false;
+ }
// the store must be fed by a membar
Node *x = st->lookup(StoreNode::Memory);
! if (! x || !x->is_Proj()) {
return false;
+ }
ProjNode *proj = x->as_Proj();
x = proj->lookup(0);
! if (!x || !x->is_MemBar()) {
return false;
+ }
MemBarNode *barrier = x->as_MemBar();
// if the barrier is a release membar or a cpuorder mmebar fed by a
// release membar then we need to check whether that forms part of a
// volatile put graph.
// reject invalid candidates
! if (!leading_membar(barrier)) {
return false;
+ }
// does this lead a normal subgraph?
MemBarNode *mbvol = leading_to_normal(barrier);
! if (!mbvol) {
return false;
+ }
// all done unless this is a card mark
! if (!is_card_mark_membar(mbvol)) {
return true;
+ }
// we found a card mark -- just make sure we have a trailing barrier
return (card_mark_to_trailing(mbvol) != NULL);
}
+ // predicate controlling translation of CAS
+ //
+ // returns true if CAS needs to use an acquiring load otherwise false
+
+ bool needs_acquiring_load_exclusive(const Node *n)
+ {
+ assert(is_CAS(n->Opcode()), "expecting a compare and swap");
+ if (UseBarriersForVolatile) {
+ return false;
+ }
+
+ // CAS nodes only ought to turn up in inlined unsafe CAS operations
+ #ifdef ASSERT
+ LoadStoreNode *st = n->as_LoadStore();
+
+ // the store must be fed by a membar
+
+ Node *x = st->lookup(StoreNode::Memory);
+
+ assert (x && x->is_Proj(), "CAS not fed by memory proj!");
+
+ ProjNode *proj = x->as_Proj();
+
+ x = proj->lookup(0);
+
+ assert (x && x->is_MemBar(), "CAS not fed by membar!");
+
+ MemBarNode *barrier = x->as_MemBar();
+
+ // the barrier must be a cpuorder mmebar fed by a release membar
+
+ assert(barrier->Opcode() == Op_MemBarCPUOrder,
+ "CAS not fed by cpuorder membar!");
+
+ MemBarNode *b = parent_membar(barrier);
+ assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
+ "CAS not fed by cpuorder+release membar pair!");
+
+ // does this lead a normal subgraph?
+ MemBarNode *mbar = leading_to_normal(barrier);
+
+ assert(mbar != NULL, "CAS not embedded in normal graph!");
+
+ assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
+ #endif // ASSERT
+ // so we can just return true here
+ return true;
+ }
+
// predicate controlling translation of StoreCM
//
// returns true if a StoreStore must precede the card write otherwise
// false
*** 2401,2418 ****
// we only ever need to generate a dmb ishst between an object put
// and the associated card mark when we are using CMS without
// conditional card marking
! if (!UseConcMarkSweepGC || UseCondCardMark)
return true;
// if we are implementing volatile puts using barriers then the
// object put as an str so we must insert the dmb ishst
! if (UseBarriersForVolatile)
return false;
// we can omit the dmb ishst if this StoreCM is part of a volatile
// put because in thta case the put will be implemented by stlr
//
// we need to check for a normal subgraph feeding this StoreCM.
--- 2835,2854 ----
// we only ever need to generate a dmb ishst between an object put
// and the associated card mark when we are using CMS without
// conditional card marking
! if (!UseConcMarkSweepGC || UseCondCardMark) {
return true;
+ }
// if we are implementing volatile puts using barriers then the
// object put as an str so we must insert the dmb ishst
! if (UseBarriersForVolatile) {
return false;
+ }
// we can omit the dmb ishst if this StoreCM is part of a volatile
// put because in thta case the put will be implemented by stlr
//
// we need to check for a normal subgraph feeding this StoreCM.
*** 2420,2442 ****
// either a MemBarRelease or its dependent MemBarCPUOrder, and the
// leading membar must be part of a normal subgraph
Node *x = storecm->in(StoreNode::Memory);
! if (!x->is_Proj())
return false;
x = x->in(0);
! if (!x->is_MemBar())
return false;
MemBarNode *leading = x->as_MemBar();
// reject invalid candidates
! if (!leading_membar(leading))
return false;
// we can omit the StoreStore if it is the head of a normal subgraph
return (leading_to_normal(leading) != NULL);
}
--- 2856,2881 ----
// either a MemBarRelease or its dependent MemBarCPUOrder, and the
// leading membar must be part of a normal subgraph
Node *x = storecm->in(StoreNode::Memory);
! if (!x->is_Proj()) {
return false;
+ }
x = x->in(0);
! if (!x->is_MemBar()) {
return false;
+ }
MemBarNode *leading = x->as_MemBar();
// reject invalid candidates
! if (!leading_membar(leading)) {
return false;
+ }
// we can omit the StoreStore if it is the head of a normal subgraph
return (leading_to_normal(leading) != NULL);
}
*** 8363,8375 ****
--- 8802,8818 ----
%}
// XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
// can't match them
+ // standard CompareAndSwapX when we are using barriers
+ // these have higher priority than the rules selected by a predicate
+
instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+ ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
*** 8383,8392 ****
--- 8826,8836 ----
%}
instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+ ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
*** 8400,8409 ****
--- 8844,8854 ----
%}
instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
*** 8417,8426 ****
--- 8862,8872 ----
%}
instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
*** 8431,8440 ****
--- 8877,8964 ----
aarch64_enc_cset_eq(res));
ins_pipe(pipe_slow);
%}
+ // alternative CompareAndSwapX when we are eliding barriers
+
+ instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
+
+ predicate(needs_acquiring_load_exclusive(n));
+ match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+ ins_cost(VOLATILE_REF_COST);
+
+ effect(KILL cr);
+
+ format %{
+ "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
+ "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
+ aarch64_enc_cset_eq(res));
+
+ ins_pipe(pipe_slow);
+ %}
+
+ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
+
+ predicate(needs_acquiring_load_exclusive(n));
+ match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+ ins_cost(VOLATILE_REF_COST);
+
+ effect(KILL cr);
+
+ format %{
+ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
+ "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
+ aarch64_enc_cset_eq(res));
+
+ ins_pipe(pipe_slow);
+ %}
+
+ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+
+ predicate(needs_acquiring_load_exclusive(n));
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ ins_cost(VOLATILE_REF_COST);
+
+ effect(KILL cr);
+
+ format %{
+ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+ "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
+ aarch64_enc_cset_eq(res));
+
+ ins_pipe(pipe_slow);
+ %}
+
+ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
+
+ predicate(needs_acquiring_load_exclusive(n));
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ ins_cost(VOLATILE_REF_COST);
+
+ effect(KILL cr);
+
+ format %{
+ "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
+ "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
+ aarch64_enc_cset_eq(res));
+
+ ins_pipe(pipe_slow);
+ %}
+
instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
match(Set prev (GetAndSetI mem newv));
format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
ins_encode %{
< prev index next >