# HG changeset patch # User adinn # Date 1429024319 14400 # Tue Apr 14 11:11:59 2015 -0400 # Node ID cbaad0338e067911872d091e3b9989bc368b93f7 # Parent d2764ea89544563a5ed1d6a92e23267cf74328e4 fix volatile reads and writes on AArch64 diff --git a/src/cpu/aarch64/vm/aarch64.ad b/src/cpu/aarch64/vm/aarch64.ad --- a/src/cpu/aarch64/vm/aarch64.ad +++ b/src/cpu/aarch64/vm/aarch64.ad @@ -793,38 +793,832 @@ } }; - bool preceded_by_ordered_load(const Node *barrier); + // graph traversal helpers + MemBarNode *has_parent_membar(const Node *n, + ProjNode *&ctl, ProjNode *&mem); + MemBarNode *has_child_membar(const MemBarNode *n, + ProjNode *&ctl, ProjNode *&mem); + + // predicates controlling emit of ldr/ldar and associated dmb + bool unnecessary_acquire(const Node *barrier); + bool needs_acquiring_load(const Node *load); + + // predicates controlling emit of str/stlr and associated dmbs + bool unnecessary_release(const Node *barrier); + bool unnecessary_volatile(const Node *barrier); + bool needs_releasing_store(const Node *store); // Use barrier instructions rather than load acquire / store // release. - const bool UseBarriersForVolatile = true; + const bool UseBarriersForVolatile = false; + // Use barrier instructions for unsafe volatile gets rather than + // trying to identify an exact signature for them + const bool UseBarriersForUnsafeVolatileGet = false; %} source %{ - // AArch64 has load acquire and store release instructions which we - // use for ordered memory accesses, e.g. for volatiles. The ideal - // graph generator also inserts memory barriers around volatile - // accesses, and we don't want to generate both barriers and acq/rel - // instructions. So, when we emit a MemBarAcquire we look back in - // the ideal graph for an ordered load and only emit the barrier if - // we don't find one. - -bool preceded_by_ordered_load(const Node *barrier) { + // AArch64 has ldar and stlr instructions which we can safely + // use to implement volatile reads and writes. For a volatile read + // we simply need + // + // ldar + // + // and for a volatile write we need + // + // stlr + // + // Alternatively, we can implement them by pairing a normal + // load/store with a memory barrier. For a volatile read we need + // + // ldr + // dmb ishld + // + // for a volatile write + // + // dmb ish + // str + // dmb ish + // + // In order to generate the desired instruction sequence we need to + // be able to identify specific 'signature' ideal graph node + // sequences which i) occur as a translation of a volatile reads or + // writes and ii) do not occur through any other translation or + // graph transformation. We can then provide alternative aldc + // matching rules which translate these node sequences to the + // desired machine code sequences. Selection of the alternative + // rules can be implemented by predicates which identify the + // relevant node sequences. + // + // The ideal graph generator translates a volatile read to the node + // sequence + // + // LoadX[mo_acquire] + // MemBarAcquire + // + // As a special case when using the compressed oops optimization we + // may also see this variant + // + // LoadN[mo_acquire] + // DecodeN + // MemBarAcquire + // + // A volatile write is translated to the node sequence + // + // MemBarRelease + // StoreX[mo_release] + // MemBarVolatile + // + // n.b. the above node patterns are generated with a strict + // 'signature' configuration of input and output dependencies (see + // the predicates below for exact details). The two signatures are + // unique to translated volatile reads/stores -- they will not + // appear as a result of any other bytecode translation or inlining + // nor as a consequence of optimizing transforms. + // + // We also want to catch inlined unsafe volatile gets and puts and + // be able to implement them using either ldar/stlr or some + // combination of ldr/stlr and dmb instructions. + // + // Inlined unsafe volatiles puts manifest as a minor variant of the + // normal volatile put node sequence containing an extra cpuorder + // membar + // + // MemBarRelease + // MemBarCPUOrder + // StoreX[mo_release] + // MemBarVolatile + // + // n.b. as an aside, the cpuorder membar is not itself subject to + // matching and translation by adlc rules. However, the rule + // predicates need to detect its presence in order to correctly + // select the desired adlc rules. + // + // Inlined unsafe volatiles gets manifest as a somewhat different + // node sequence to a normal volatile get + // + // MemBarCPUOrder + // || \\ + // MemBarAcquire LoadX[mo_acquire] + // || + // MemBarCPUOrder + // + // In this case the acquire membar does not directly depend on the + // load. However, we can be sure that the load is generated from an + // inlined unsafe volatile get if we see it dependent on this unique + // sequence of membar nodes. Similarly, given an acquire membar we + // can know that it was added because of an inlined unsafe volatile + // get if it is fed and feeds a cpuorder membar and if its feed + // membar also feeds an acquiring load. + // + // So, where we can identify these volatile read and write + // signatures we can choose to plant either of the above two code + // sequences. For a volatile read we can simply plant a normal + // ldr and translate the MemBarAcquire to a dmb. However, we can + // also choose to inhibit translation of the MemBarAcquire and + // inhibit planting of the ldr, instead planting an ldar. + // + // When we recognise a volatile store signature we can choose to + // plant at a dmb ish as a translation for the MemBarRelease, a + // normal str and then a dmb ish for the MemBarVolatile. + // Alternatively, we can inhibit translation of the MemBarRelease + // and MemBarVolatile and instead plant a simple stlr + // instruction. + // + // Of course, the above only applies when we see these signature + // configurations. We still want to plant dmb instructions in any + // other cases where we may see a MemBarAcquire, MemBarRelease or + // MemBarVolatile. For example, at the end of a constructor which + // writes final/volatile fields we will see a MemBarRelease + // instruction and this needs a 'dmb ish' lest we risk the + // constructed object being visible without making the + // final/volatile field writes visible. + // + // n.b. the translation rules below which rely on detection of the + // volatile signatures and insert ldar or stlr are failsafe. + // If we see anything other than the signature configurations we + // always just translate the loads and stors to ldr and str + // and translate acquire, release and volatile membars to the + // relevant dmb instructions. + // + // n.b.b as a case in point for the above comment, the current + // predicates don't detect the precise signature for certain types + // of volatile object stores (where the heap_base input type is not + // known at compile-time to be non-NULL). In those cases the + // MemBarRelease and MemBarVolatile bracket an if-then-else sequence + // with a store in each branch (we need a different store depending + // on whether heap_base is actually NULL). In such a case we will + // just plant a dmb both before and after the branch/merge. The + // predicate could (and probably should) be fixed later to also + // detect this case. + + // graph traversal helpers + + // if node n is linked to a parent MemBarNode by an intervening + // Control or Memory ProjNode return the MemBarNode otherwise return + // NULL. + // + // n may only be a Load or a MemBar. + // + // The ProjNode* references c and m are used to return the relevant + // nodes. + + MemBarNode *has_parent_membar(const Node *n, ProjNode *&c, ProjNode *&m) + { + Node *ctl = NULL; + Node *mem = NULL; + Node *membar = NULL; + + if (n->is_Load()) { + ctl = n->lookup(LoadNode::Control); + mem = n->lookup(LoadNode::Memory); + } else if (n->is_MemBar()) { + ctl = n->lookup(TypeFunc::Control); + mem = n->lookup(TypeFunc::Memory); + } else { + return NULL; + } + + if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) + return NULL; + + c = ctl->as_Proj(); + + membar = ctl->lookup(0); + + if (!membar || !membar->is_MemBar()) + return NULL; + + m = mem->as_Proj(); + + if (mem->lookup(0) != membar) + return NULL; + + return membar->as_MemBar(); + } + + // if n is linked to a child MemBarNode by intervening Control and + // Memory ProjNodes return the MemBarNode otherwise return NULL. + // + // The ProjNode** arguments c and m are used to return pointers to + // the relevant nodes. A null argument means don't don't return a + // value. + + MemBarNode *has_child_membar(const MemBarNode *n, ProjNode *&c, ProjNode *&m) + { + ProjNode *ctl = n->proj_out(TypeFunc::Control); + ProjNode *mem = n->proj_out(TypeFunc::Memory); + + // MemBar needs to have both a Ctl and Mem projection + if (! ctl || ! mem) + return NULL; + + c = ctl; + m = mem; + + MemBarNode *child = NULL; + Node *x; + + for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { + x = ctl->fast_out(i); + // if we see a membar we keep hold of it. we may also see a new + // arena copy of the original but it will appear later + if (x->is_MemBar()) { + child = x->as_MemBar(); + break; + } + } + + if (child == NULL) + return NULL; + + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + x = mem->fast_out(i); + // if we see a membar we keep hold of it. we may also see a new + // arena copy of the original but it will appear later + if (x == child) { + return child; + } + } + return NULL; + } + + // predicates controlling emit of ldr/ldar and associated dmb + +bool unnecessary_acquire(const Node *barrier) { + // assert barrier->is_MemBar(); + if (UseBarriersForVolatile) + // we need to plant a dmb + return false; + + // a volatile read derived from bytecode (or also from an inlined + // SHA field read via LibraryCallKit::load_field_from_object) + // manifests as a LoadX[mo_acquire] followed by an acquire membar + // with a bogus read dependency on it's preceding load. so in those + // cases we will find the load node at the PARMS offset of the + // acquire membar. n.b. there may be an intervening DecodeN node. + // + // a volatile load derived from an inlined unsafe field access + // manifests as a cpuorder membar with Ctl and Mem projections + // feeding both an acquire membar and a LoadX[mo_acquire]. The + // acquire then feeds another cpuorder membar via Ctl and Mem + // projections. The load has no output dependency on these trailing + // membars because subsequent nodes inserted into the graph take + // their control feed from the final membar cpuorder meaning they + // are all ordered after the load. + Node *x = barrier->lookup(TypeFunc::Parms); - - if (! x) + if (x) { + // we are starting from an acquire and it has a fake dependency + // + // need to check for + // + // LoadX[mo_acquire] + // { |1 } + // {DecodeN} + // |Parms + // MemBarAcquire* + // + // where * tags node we were passed + // and |k means input k + if (x->is_DecodeNarrowPtr()) + x = x->in(1); + + return (x->is_Load() && x->as_Load()->is_acquire()); + } + + // only continue if we want to try to match unsafe volatile gets + if (UseBarriersForUnsafeVolatileGet) return false; - if (x->is_DecodeNarrowPtr()) - x = x->in(1); - - if (x->is_Load()) - return ! x->as_Load()->is_unordered(); - - return false; + // need to check for + // + // MemBarCPUOrder + // || \\ + // MemBarAcquire* LoadX[mo_acquire] + // || + // MemBarCPUOrder + // + // where * tags node we were passed + // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes + + // check for a parent MemBarCPUOrder + ProjNode *ctl; + ProjNode *mem; + MemBarNode *parent = has_parent_membar(barrier, ctl, mem); + if (!parent || parent->Opcode() != Op_MemBarCPUOrder) + return false; + // ensure the proj nodes both feed a LoadX[mo_acquire] + LoadNode *ld = NULL; + for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { + x = ctl->fast_out(i); + // if we see a load we keep hold of it and stop searching + if (x->is_Load()) { + ld = x->as_Load(); + break; + } + } + // it must be an acquiring load + if (! ld || ! ld->is_acquire()) + return false; + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + x = mem->fast_out(i); + // if we see the same load we drop it and stop searching + if (x == ld) { + ld = NULL; + break; + } + } + // we must have dropped the load + if (ld) + return false; + // check for a child cpuorder membar + MemBarNode *child = has_child_membar(barrier->as_MemBar(), ctl, mem); + if (!child || child->Opcode() != Op_MemBarCPUOrder) + return false; + + return true; } +bool needs_acquiring_load(const Node *n) +{ + // assert n->is_Load(); + if (UseBarriersForVolatile) + // we use a normal load and a dmb + return false; + + LoadNode *ld = n->as_Load(); + + if (!ld->is_acquire()) + return false; + + // check if this load is feeding an acquire membar + // + // LoadX[mo_acquire] + // { |1 } + // {DecodeN} + // |Parms + // MemBarAcquire* + // + // where * tags node we were passed + // and |k means input k + + Node *start = ld; + Node *mbacq = NULL; + + // if we hit a DecodeNarrowPtr we reset the start node and restart + // the search through the outputs + restart: + + for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) { + Node *x = start->fast_out(i); + if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) { + mbacq = x; + } else if (!mbacq && + (x->is_DecodeNarrowPtr() || + (x->is_Mach() && x->Opcode() == Op_DecodeN))) { + start = x; + goto restart; + } + } + + if (mbacq) { + return true; + } + + // only continue if we want to try to match unsafe volatile gets + if (UseBarriersForUnsafeVolatileGet) + return false; + + // check if Ctl and Proj feed comes from a MemBarCPUOrder + // + // MemBarCPUOrder + // || \\ + // MemBarAcquire* LoadX[mo_acquire] + // || + // MemBarCPUOrder + + MemBarNode *membar; + ProjNode *ctl; + ProjNode *mem; + + membar = has_parent_membar(ld, ctl, mem); + + if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) + return false; + + // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain + + membar = has_child_membar(membar, ctl, mem); + + if (!membar || !membar->Opcode() == Op_MemBarAcquire) + return false; + + membar = has_child_membar(membar, ctl, mem); + + if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) + return false; + + return true; +} + +bool unnecessary_release(const Node *n) { + // assert n->is_MemBar(); + if (UseBarriersForVolatile) + // we need to plant a dmb + return false; + + // ok, so we can omit this release barrier if it has been inserted + // as part of a volatile store sequence + // + // MemBarRelease + // { || } + // {MemBarCPUOrder} -- optional + // || \\ + // || StoreX[mo_release] + // | \ / + // | MergeMem + // | / + // MemBarVolatile + // + // where + // || and \\ represent Ctl and Mem feeds via Proj nodes + // | \ and / indicate further routing of the Ctl and Mem feeds + // + // so we need to check that + // + // ia) the release membar (or its dependent cpuorder membar) feeds + // control to a store node (via a Control project node) + // + // ii) the store is ordered release + // + // iii) the release membar (or its dependent cpuorder membar) feeds + // control to a volatile membar (via the same Control project node) + // + // iv) the release membar feeds memory to a merge mem and to the + // same store (both via a single Memory proj node) + // + // v) the store outputs to the merge mem + // + // vi) the merge mem outputs to the same volatile membar + // + // n.b. if this is an inlined unsafe node then the release membar + // may feed its control and memory links via an intervening cpuorder + // membar. this case can be dealt with when we check the release + // membar projections. if they both feed a single cpuorder membar + // node continue to make the same checks as above but with the + // cpuorder membar substituted for the release membar. if they don't + // both feed a cpuorder membar then the check fails. + // + // n.b.b. for an inlined unsafe store of an object in the case where + // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see + // an embedded if then else where we expect the store. this is + // needed to do the right type of store depending on whether + // heap_base is NULL. We could check for that but for now we can + // just take the hit of on inserting a redundant dmb for this + // redundant volatile membar + + MemBarNode *barrier = n->as_MemBar(); + ProjNode *ctl; + ProjNode *mem; + // check for an intervening cpuorder membar + MemBarNode *b = has_child_membar(barrier, ctl, mem); + if (b && b->Opcode() == Op_MemBarCPUOrder) { + // ok, so start form the dependent cpuorder barrier + barrier = b; + } + // check the ctl and mem flow + ctl = barrier->proj_out(TypeFunc::Control); + mem = barrier->proj_out(TypeFunc::Memory); + + // the barrier needs to have both a Ctl and Mem projection + if (! ctl || ! mem) + return false; + + Node *x = NULL; + Node *mbvol = NULL; + StoreNode * st = NULL; + + // For a normal volatile write the Ctl ProjNode should have output + // to a MemBarVolatile and a Store marked as releasing + // + // n.b. for an inlined unsafe store of an object in the case where + // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see + // an embedded if then else where we expect the store. this is + // needed to do the right type of store depending on whether + // heap_base is NULL. We could check for that case too but for now + // we can just take the hit of inserting a dmb and a non-volatile + // store to implement the volatile store + + for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { + x = ctl->fast_out(i); + if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { + if (mbvol) { + return false; + } + mbvol = x; + } else if (x->is_Store()) { + st = x->as_Store(); + if (! st->is_release()) { + return false; + } + } else if (!x->is_Mach()) { + // we may see mach nodes added during matching but nothing else + return false; + } + } + + if (!mbvol || !st) + return false; + + // the Mem ProjNode should output to a MergeMem and the same Store + Node *mm = NULL; + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + x = mem->fast_out(i); + if (!mm && x->is_MergeMem()) { + mm = x; + } else if (x != st && !x->is_Mach()) { + // we may see mach nodes added during matching but nothing else + return false; + } + } + + if (!mm) + return false; + + // the MergeMem should output to the MemBarVolatile + for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { + x = mm->fast_out(i); + if (x != mbvol && !x->is_Mach()) { + // we may see mach nodes added during matching but nothing else + return false; + } + } + + return true; +} + +bool unnecessary_volatile(const Node *n) { + // assert n->is_MemBar(); + if (UseBarriersForVolatile) + // we need to plant a dmb + return false; + + // ok, so we can omit this volatile barrier if it has been inserted + // as part of a volatile store sequence + // + // MemBarRelease + // { || } + // {MemBarCPUOrder} -- optional + // || \\ + // || StoreX[mo_release] + // | \ / + // | MergeMem + // | / + // MemBarVolatile + // + // where + // || and \\ represent Ctl and Mem feeds via Proj nodes + // | \ and / indicate further routing of the Ctl and Mem feeds + // + // we need to check that + // + // i) the volatile membar gets its control feed from a release + // membar (or its dependent cpuorder membar) via a Control project + // node + // + // ii) the release membar (or its dependent cpuorder membar) also + // feeds control to a store node via the same proj node + // + // iii) the store is ordered release + // + // iv) the release membar (or its dependent cpuorder membar) feeds + // memory to a merge mem and to the same store (both via a single + // Memory proj node) + // + // v) the store outputs to the merge mem + // + // vi) the merge mem outputs to the volatile membar + // + // n.b. for an inlined unsafe store of an object in the case where + // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see + // an embedded if then else where we expect the store. this is + // needed to do the right type of store depending on whether + // heap_base is NULL. We could check for that but for now we can + // just take the hit of on inserting a redundant dmb for this + // redundant volatile membar + + MemBarNode *mbvol = n->as_MemBar(); + Node *x = n->lookup(TypeFunc::Control); + + if (! x || !x->is_Proj()) + return false; + + ProjNode *proj = x->as_Proj(); + + x = proj->lookup(0); + + if (!x || !x->is_MemBar()) + return false; + + MemBarNode *barrier = x->as_MemBar(); + + // if the barrier is a release membar we have what we want. if it is + // a cpuorder membar then we need to ensure that it is fed by a + // release membar in which case we proceed to check the graph below + // this cpuorder membar as the feed + + if (x->Opcode() != Op_MemBarRelease) { + if (x->Opcode() != Op_MemBarCPUOrder) + return false; + ProjNode *ctl; + ProjNode *mem; + MemBarNode *b = has_parent_membar(x, ctl, mem); + if (!b || !b->Opcode() == Op_MemBarRelease) + return false; + } + + ProjNode *ctl = barrier->proj_out(TypeFunc::Control); + ProjNode *mem = barrier->proj_out(TypeFunc::Memory); + + // barrier needs to have both a Ctl and Mem projection + // and we need to have reached it via the Ctl projection + if (! ctl || ! mem || ctl != proj) + return false; + + StoreNode * st = NULL; + + // The Ctl ProjNode should have output to a MemBarVolatile and + // a Store marked as releasing + for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { + x = ctl->fast_out(i); + if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { + if (x != mbvol) { + return false; + } + } else if (x->is_Store()) { + st = x->as_Store(); + if (! st->is_release()) { + return false; + } + } else if (!x->is_Mach()){ + // we may see mach nodes added during matching but nothing else + return false; + } + } + + if (!st) + return false; + + // the Mem ProjNode should output to a MergeMem and the same Store + Node *mm = NULL; + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + x = mem->fast_out(i); + if (!mm && x->is_MergeMem()) { + mm = x; + } else if (x != st && !x->is_Mach()) { + // we may see mach nodes added during matching but nothing else + return false; + } + } + + if (!mm) + return false; + + // the MergeMem should output to the MemBarVolatile + for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { + x = mm->fast_out(i); + if (x != mbvol && !x->is_Mach()) { + // we may see mach nodes added during matching but nothing else + return false; + } + } + + return true; +} + + + +bool needs_releasing_store(const Node *n) +{ + // assert n->is_Store(); + if (UseBarriersForVolatile) + // we use a normal store and dmb combination + return false; + + StoreNode *st = n->as_Store(); + + if (!st->is_release()) + return false; + + // check if this store is bracketed by a release (or its dependent + // cpuorder membar) and a volatile membar + // + // MemBarRelease + // { || } + // {MemBarCPUOrder} -- optional + // || \\ + // || StoreX[mo_release] + // | \ / + // | MergeMem + // | / + // MemBarVolatile + // + // where + // || and \\ represent Ctl and Mem feeds via Proj nodes + // | \ and / indicate further routing of the Ctl and Mem feeds + // + + + Node *x = st->lookup(TypeFunc::Control); + + if (! x || !x->is_Proj()) + return false; + + ProjNode *proj = x->as_Proj(); + + x = proj->lookup(0); + + if (!x || !x->is_MemBar()) + return false; + + MemBarNode *barrier = x->as_MemBar(); + + // if the barrier is a release membar we have what we want. if it is + // a cpuorder membar then we need to ensure that it is fed by a + // release membar in which case we proceed to check the graph below + // this cpuorder membar as the feed + + if (x->Opcode() != Op_MemBarRelease) { + if (x->Opcode() != Op_MemBarCPUOrder) + return false; + Node *ctl = x->lookup(TypeFunc::Control); + Node *mem = x->lookup(TypeFunc::Memory); + if (!ctl || !ctl->is_Proj() || !mem || !mem->is_Proj()) + return false; + x = ctl->lookup(0); + if (!x || !x->is_MemBar() || !x->Opcode() == Op_MemBarRelease) + return false; + Node *y = mem->lookup(0); + if (!y || y != x) + return false; + } + + ProjNode *ctl = barrier->proj_out(TypeFunc::Control); + ProjNode *mem = barrier->proj_out(TypeFunc::Memory); + + // MemBarRelease needs to have both a Ctl and Mem projection + // and we need to have reached it via the Ctl projection + if (! ctl || ! mem || ctl != proj) + return false; + + MemBarNode *mbvol = NULL; + + // The Ctl ProjNode should have output to a MemBarVolatile and + // a Store marked as releasing + for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { + x = ctl->fast_out(i); + if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { + mbvol = x->as_MemBar(); + } else if (x->is_Store()) { + if (x != st) { + return false; + } + } else if (!x->is_Mach()){ + return false; + } + } + + if (!mbvol) + return false; + + // the Mem ProjNode should output to a MergeMem and the same Store + Node *mm = NULL; + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + x = mem->fast_out(i); + if (!mm && x->is_MergeMem()) { + mm = x; + } else if (x != st && !x->is_Mach()) { + return false; + } + } + + if (!mm) + return false; + + // the MergeMem should output to the MemBarVolatile + for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { + x = mm->fast_out(i); + if (x != mbvol && !x->is_Mach()) { + return false; + } + } + + return true; +} + + + #define __ _masm. // advance declarations for helper functions to convert register @@ -5151,7 +5945,7 @@ instruct loadB(iRegINoSp dst, memory mem) %{ match(Set dst (LoadB mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrsbw $dst, $mem\t# byte" %} @@ -5165,7 +5959,7 @@ instruct loadB2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadB mem))); - predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n->in(1))); ins_cost(4 * INSN_COST); format %{ "ldrsb $dst, $mem\t# byte" %} @@ -5179,7 +5973,7 @@ instruct loadUB(iRegINoSp dst, memory mem) %{ match(Set dst (LoadUB mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrbw $dst, $mem\t# byte" %} @@ -5193,7 +5987,7 @@ instruct loadUB2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadUB mem))); - predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n->in(1))); ins_cost(4 * INSN_COST); format %{ "ldrb $dst, $mem\t# byte" %} @@ -5207,7 +6001,7 @@ instruct loadS(iRegINoSp dst, memory mem) %{ match(Set dst (LoadS mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrshw $dst, $mem\t# short" %} @@ -5221,7 +6015,7 @@ instruct loadS2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadS mem))); - predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n->in(1))); ins_cost(4 * INSN_COST); format %{ "ldrsh $dst, $mem\t# short" %} @@ -5235,7 +6029,7 @@ instruct loadUS(iRegINoSp dst, memory mem) %{ match(Set dst (LoadUS mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrh $dst, $mem\t# short" %} @@ -5249,7 +6043,7 @@ instruct loadUS2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadUS mem))); - predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n->in(1))); ins_cost(4 * INSN_COST); format %{ "ldrh $dst, $mem\t# short" %} @@ -5263,7 +6057,7 @@ instruct loadI(iRegINoSp dst, memory mem) %{ match(Set dst (LoadI mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# int" %} @@ -5277,7 +6071,7 @@ instruct loadI2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadI mem))); - predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n->in(1))); ins_cost(4 * INSN_COST); format %{ "ldrsw $dst, $mem\t# int" %} @@ -5291,7 +6085,7 @@ instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) %{ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); - predicate(UseBarriersForVolatile || n->in(1)->in(1)->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load())); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# int" %} @@ -5305,7 +6099,7 @@ instruct loadL(iRegLNoSp dst, memory mem) %{ match(Set dst (LoadL mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# int" %} @@ -5332,7 +6126,7 @@ instruct loadP(iRegPNoSp dst, memory mem) %{ match(Set dst (LoadP mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# ptr" %} @@ -5346,7 +6140,7 @@ instruct loadN(iRegNNoSp dst, memory mem) %{ match(Set dst (LoadN mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# compressed ptr" %} @@ -5360,7 +6154,7 @@ instruct loadKlass(iRegPNoSp dst, memory mem) %{ match(Set dst (LoadKlass mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# class" %} @@ -5374,7 +6168,7 @@ instruct loadNKlass(iRegNNoSp dst, memory mem) %{ match(Set dst (LoadNKlass mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# compressed class ptr" %} @@ -5388,7 +6182,7 @@ instruct loadF(vRegF dst, memory mem) %{ match(Set dst (LoadF mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrs $dst, $mem\t# float" %} @@ -5402,7 +6196,7 @@ instruct loadD(vRegD dst, memory mem) %{ match(Set dst (LoadD mem)); - predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); + predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrd $dst, $mem\t# double" %} @@ -5633,7 +6427,7 @@ instruct storeB(iRegIorL2I src, memory mem) %{ match(Set mem (StoreB mem src)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strb $src, $mem\t# byte" %} @@ -5647,7 +6441,7 @@ instruct storeimmB0(immI0 zero, memory mem) %{ match(Set mem (StoreB mem zero)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strb zr, $mem\t# byte" %} @@ -5661,7 +6455,7 @@ instruct storeC(iRegIorL2I src, memory mem) %{ match(Set mem (StoreC mem src)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strh $src, $mem\t# short" %} @@ -5674,7 +6468,7 @@ instruct storeimmC0(immI0 zero, memory mem) %{ match(Set mem (StoreC mem zero)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strh zr, $mem\t# short" %} @@ -5689,7 +6483,7 @@ instruct storeI(iRegIorL2I src, memory mem) %{ match(Set mem(StoreI mem src)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# int" %} @@ -5702,7 +6496,7 @@ instruct storeimmI0(immI0 zero, memory mem) %{ match(Set mem(StoreI mem zero)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strw zr, $mem\t# int" %} @@ -5716,7 +6510,7 @@ instruct storeL(iRegL src, memory mem) %{ match(Set mem (StoreL mem src)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "str $src, $mem\t# int" %} @@ -5730,7 +6524,7 @@ instruct storeimmL0(immL0 zero, memory mem) %{ match(Set mem (StoreL mem zero)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "str zr, $mem\t# int" %} @@ -5744,7 +6538,7 @@ instruct storeP(iRegP src, memory mem) %{ match(Set mem (StoreP mem src)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "str $src, $mem\t# ptr" %} @@ -5758,7 +6552,7 @@ instruct storeimmP0(immP0 zero, memory mem) %{ match(Set mem (StoreP mem zero)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "str zr, $mem\t# ptr" %} @@ -5772,7 +6566,7 @@ instruct storeN(iRegN src, memory mem) %{ match(Set mem (StoreN mem src)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# compressed ptr" %} @@ -5787,7 +6581,7 @@ match(Set mem (StoreN mem zero)); predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL && - (UseBarriersForVolatile || n->as_Store()->is_unordered())); + (!needs_releasing_store(n))); ins_cost(INSN_COST); format %{ "strw rheapbase, $mem\t# compressed ptr (rheapbase==0)" %} @@ -5801,7 +6595,7 @@ instruct storeF(vRegF src, memory mem) %{ match(Set mem (StoreF mem src)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strs $src, $mem\t# float" %} @@ -5818,7 +6612,7 @@ instruct storeD(vRegD src, memory mem) %{ match(Set mem (StoreD mem src)); - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strd $src, $mem\t# double" %} @@ -5831,7 +6625,7 @@ // Store Compressed Klass Pointer instruct storeNKlass(iRegN src, memory mem) %{ - predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); + predicate(!needs_releasing_store(n)); match(Set mem (StoreNKlass mem src)); ins_cost(INSN_COST); @@ -6293,7 +7087,7 @@ %} instruct unnecessary_membar_acquire() %{ - predicate(! UseBarriersForVolatile && preceded_by_ordered_load(n)); + predicate(unnecessary_acquire(n)); match(MemBarAcquire); ins_cost(0); @@ -6345,6 +7139,19 @@ ins_pipe(pipe_serial); %} +instruct unnecessary_membar_release() %{ + predicate(unnecessary_release(n)); + match(MemBarRelease); + ins_cost(0); + + format %{ "membar_release (elided)" %} + + ins_encode %{ + __ block_comment("membar_release (elided)"); + %} + ins_pipe(pipe_serial); +%} + instruct membar_release() %{ match(MemBarRelease); ins_cost(VOLATILE_REF_COST); @@ -6382,6 +7189,20 @@ ins_pipe(pipe_serial); %} +instruct unnecessary_membar_volatile() %{ + predicate(unnecessary_volatile(n)); + match(MemBarVolatile); + ins_cost(0); + + format %{ "membar_volatile (elided)" %} + + ins_encode %{ + __ block_comment("membar_volatile (elided)"); + %} + + ins_pipe(pipe_serial); +%} + instruct membar_volatile() %{ match(MemBarVolatile); ins_cost(VOLATILE_REF_COST*100);