< prev index next >

src/cpu/aarch64/vm/aarch64.ad

Print this page
rev 8068 : fix volatile reads and writes on AArch64

*** 791,832 **** // count one adr and one far branch instruction return 4 * NativeInstruction::instruction_size; } }; ! bool preceded_by_ordered_load(const Node *barrier); // Use barrier instructions rather than load acquire / store // release. ! const bool UseBarriersForVolatile = true; %} source %{ ! // AArch64 has load acquire and store release instructions which we ! // use for ordered memory accesses, e.g. for volatiles. The ideal ! // graph generator also inserts memory barriers around volatile ! // accesses, and we don't want to generate both barriers and acq/rel ! // instructions. So, when we emit a MemBarAcquire we look back in ! // the ideal graph for an ordered load and only emit the barrier if ! // we don't find one. ! bool preceded_by_ordered_load(const Node *barrier) { ! Node *x = barrier->lookup(TypeFunc::Parms); ! if (! x) return false; if (x->is_DecodeNarrowPtr()) x = x->in(1); ! if (x->is_Load()) ! return ! x->as_Load()->is_unordered(); return false; } #define __ _masm. // advance declarations for helper functions to convert register // indices to register objects --- 791,1626 ---- // count one adr and one far branch instruction return 4 * NativeInstruction::instruction_size; } }; ! // graph traversal helpers ! MemBarNode *has_parent_membar(const Node *n, ! ProjNode *&ctl, ProjNode *&mem); ! MemBarNode *has_child_membar(const MemBarNode *n, ! ProjNode *&ctl, ProjNode *&mem); ! ! // predicates controlling emit of ldr<x>/ldar<x> and associated dmb ! bool unnecessary_acquire(const Node *barrier); ! bool needs_acquiring_load(const Node *load); ! ! // predicates controlling emit of str<x>/stlr<x> and associated dmbs ! bool unnecessary_release(const Node *barrier); ! bool unnecessary_volatile(const Node *barrier); ! bool needs_releasing_store(const Node *store); // Use barrier instructions rather than load acquire / store // release. ! const bool UseBarriersForVolatile = false; ! // Use barrier instructions for unsafe volatile gets rather than ! // trying to identify an exact signature for them ! const bool UseBarriersForUnsafeVolatileGet = false; %} source %{ ! // AArch64 has ldar<x> and stlr<x> instructions which we can safely ! // use to implement volatile reads and writes. For a volatile read ! // we simply need ! // ! // ldar<x> ! // ! // and for a volatile write we need ! // ! // stlr<x> ! // ! // Alternatively, we can implement them by pairing a normal ! // load/store with a memory barrier. For a volatile read we need ! // ! // ldr<x> ! // dmb ishld ! // ! // for a volatile write ! // ! // dmb ish ! // str<x> ! // dmb ish ! // ! // In order to generate the desired instruction sequence we need to ! // be able to identify specific 'signature' ideal graph node ! // sequences which i) occur as a translation of a volatile reads or ! // writes and ii) do not occur through any other translation or ! // graph transformation. We can then provide alternative aldc ! // matching rules which translate these node sequences to the ! // desired machine code sequences. Selection of the alternative ! // rules can be implemented by predicates which identify the ! // relevant node sequences. ! // ! // The ideal graph generator translates a volatile read to the node ! // sequence ! // ! // LoadX[mo_acquire] ! // MemBarAcquire ! // ! // As a special case when using the compressed oops optimization we ! // may also see this variant ! // ! // LoadN[mo_acquire] ! // DecodeN ! // MemBarAcquire ! // ! // A volatile write is translated to the node sequence ! // ! // MemBarRelease ! // StoreX[mo_release] ! // MemBarVolatile ! // ! // n.b. the above node patterns are generated with a strict ! // 'signature' configuration of input and output dependencies (see ! // the predicates below for exact details). The two signatures are ! // unique to translated volatile reads/stores -- they will not ! // appear as a result of any other bytecode translation or inlining ! // nor as a consequence of optimizing transforms. ! // ! // We also want to catch inlined unsafe volatile gets and puts and ! // be able to implement them using either ldar<x>/stlr<x> or some ! // combination of ldr<x>/stlr<x> and dmb instructions. ! // ! // Inlined unsafe volatiles puts manifest as a minor variant of the ! // normal volatile put node sequence containing an extra cpuorder ! // membar ! // ! // MemBarRelease ! // MemBarCPUOrder ! // StoreX[mo_release] ! // MemBarVolatile ! // ! // n.b. as an aside, the cpuorder membar is not itself subject to ! // matching and translation by adlc rules. However, the rule ! // predicates need to detect its presence in order to correctly ! // select the desired adlc rules. ! // ! // Inlined unsafe volatiles gets manifest as a somewhat different ! // node sequence to a normal volatile get ! // ! // MemBarCPUOrder ! // || \\ ! // MemBarAcquire LoadX[mo_acquire] ! // || ! // MemBarCPUOrder ! // ! // In this case the acquire membar does not directly depend on the ! // load. However, we can be sure that the load is generated from an ! // inlined unsafe volatile get if we see it dependent on this unique ! // sequence of membar nodes. Similarly, given an acquire membar we ! // can know that it was added because of an inlined unsafe volatile ! // get if it is fed and feeds a cpuorder membar and if its feed ! // membar also feeds an acquiring load. ! // ! // So, where we can identify these volatile read and write ! // signatures we can choose to plant either of the above two code ! // sequences. For a volatile read we can simply plant a normal ! // ldr<x> and translate the MemBarAcquire to a dmb. However, we can ! // also choose to inhibit translation of the MemBarAcquire and ! // inhibit planting of the ldr<x>, instead planting an ldar<x>. ! // ! // When we recognise a volatile store signature we can choose to ! // plant at a dmb ish as a translation for the MemBarRelease, a ! // normal str<x> and then a dmb ish for the MemBarVolatile. ! // Alternatively, we can inhibit translation of the MemBarRelease ! // and MemBarVolatile and instead plant a simple stlr<x> ! // instruction. ! // ! // Of course, the above only applies when we see these signature ! // configurations. We still want to plant dmb instructions in any ! // other cases where we may see a MemBarAcquire, MemBarRelease or ! // MemBarVolatile. For example, at the end of a constructor which ! // writes final/volatile fields we will see a MemBarRelease ! // instruction and this needs a 'dmb ish' lest we risk the ! // constructed object being visible without making the ! // final/volatile field writes visible. ! // ! // n.b. the translation rules below which rely on detection of the ! // volatile signatures and insert ldar<x> or stlr<x> are failsafe. ! // If we see anything other than the signature configurations we ! // always just translate the loads and stors to ldr<x> and str<x> ! // and translate acquire, release and volatile membars to the ! // relevant dmb instructions. ! // ! // n.b.b as a case in point for the above comment, the current ! // predicates don't detect the precise signature for certain types ! // of volatile object stores (where the heap_base input type is not ! // known at compile-time to be non-NULL). In those cases the ! // MemBarRelease and MemBarVolatile bracket an if-then-else sequence ! // with a store in each branch (we need a different store depending ! // on whether heap_base is actually NULL). In such a case we will ! // just plant a dmb both before and after the branch/merge. The ! // predicate could (and probably should) be fixed later to also ! // detect this case. ! ! // graph traversal helpers ! ! // if node n is linked to a parent MemBarNode by an intervening ! // Control or Memory ProjNode return the MemBarNode otherwise return ! // NULL. ! // ! // n may only be a Load or a MemBar. ! // ! // The ProjNode* references c and m are used to return the relevant ! // nodes. ! MemBarNode *has_parent_membar(const Node *n, ProjNode *&c, ProjNode *&m) ! { ! Node *ctl = NULL; ! Node *mem = NULL; ! Node *membar = NULL; ! ! if (n->is_Load()) { ! ctl = n->lookup(LoadNode::Control); ! mem = n->lookup(LoadNode::Memory); ! } else if (n->is_MemBar()) { ! ctl = n->lookup(TypeFunc::Control); ! mem = n->lookup(TypeFunc::Memory); ! } else { ! return NULL; ! } ! ! if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) ! return NULL; ! c = ctl->as_Proj(); ! ! membar = ctl->lookup(0); ! ! if (!membar || !membar->is_MemBar()) ! return NULL; ! ! m = mem->as_Proj(); ! ! if (mem->lookup(0) != membar) ! return NULL; ! ! return membar->as_MemBar(); ! } ! ! // if n is linked to a child MemBarNode by intervening Control and ! // Memory ProjNodes return the MemBarNode otherwise return NULL. ! // ! // The ProjNode** arguments c and m are used to return pointers to ! // the relevant nodes. A null argument means don't don't return a ! // value. ! ! MemBarNode *has_child_membar(const MemBarNode *n, ProjNode *&c, ProjNode *&m) ! { ! ProjNode *ctl = n->proj_out(TypeFunc::Control); ! ProjNode *mem = n->proj_out(TypeFunc::Memory); ! ! // MemBar needs to have both a Ctl and Mem projection ! if (! ctl || ! mem) ! return NULL; ! ! c = ctl; ! m = mem; ! ! MemBarNode *child = NULL; ! Node *x; ! ! for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { ! x = ctl->fast_out(i); ! // if we see a membar we keep hold of it. we may also see a new ! // arena copy of the original but it will appear later ! if (x->is_MemBar()) { ! child = x->as_MemBar(); ! break; ! } ! } ! ! if (child == NULL) ! return NULL; ! ! for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { ! x = mem->fast_out(i); ! // if we see a membar we keep hold of it. we may also see a new ! // arena copy of the original but it will appear later ! if (x == child) { ! return child; ! } ! } ! return NULL; ! } ! ! // predicates controlling emit of ldr<x>/ldar<x> and associated dmb ! ! bool unnecessary_acquire(const Node *barrier) { ! // assert barrier->is_MemBar(); ! if (UseBarriersForVolatile) ! // we need to plant a dmb return false; + // a volatile read derived from bytecode (or also from an inlined + // SHA field read via LibraryCallKit::load_field_from_object) + // manifests as a LoadX[mo_acquire] followed by an acquire membar + // with a bogus read dependency on it's preceding load. so in those + // cases we will find the load node at the PARMS offset of the + // acquire membar. n.b. there may be an intervening DecodeN node. + // + // a volatile load derived from an inlined unsafe field access + // manifests as a cpuorder membar with Ctl and Mem projections + // feeding both an acquire membar and a LoadX[mo_acquire]. The + // acquire then feeds another cpuorder membar via Ctl and Mem + // projections. The load has no output dependency on these trailing + // membars because subsequent nodes inserted into the graph take + // their control feed from the final membar cpuorder meaning they + // are all ordered after the load. + + Node *x = barrier->lookup(TypeFunc::Parms); + if (x) { + // we are starting from an acquire and it has a fake dependency + // + // need to check for + // + // LoadX[mo_acquire] + // { |1 } + // {DecodeN} + // |Parms + // MemBarAcquire* + // + // where * tags node we were passed + // and |k means input k if (x->is_DecodeNarrowPtr()) x = x->in(1); ! return (x->is_Load() && x->as_Load()->is_acquire()); ! } ! ! // only continue if we want to try to match unsafe volatile gets ! if (UseBarriersForUnsafeVolatileGet) ! return false; ! ! // need to check for ! // ! // MemBarCPUOrder ! // || \\ ! // MemBarAcquire* LoadX[mo_acquire] ! // || ! // MemBarCPUOrder ! // ! // where * tags node we were passed ! // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes ! ! // check for a parent MemBarCPUOrder ! ProjNode *ctl; ! ProjNode *mem; ! MemBarNode *parent = has_parent_membar(barrier, ctl, mem); ! if (!parent || parent->Opcode() != Op_MemBarCPUOrder) ! return false; ! // ensure the proj nodes both feed a LoadX[mo_acquire] ! LoadNode *ld = NULL; ! for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { ! x = ctl->fast_out(i); ! // if we see a load we keep hold of it and stop searching ! if (x->is_Load()) { ! ld = x->as_Load(); ! break; ! } ! } ! // it must be an acquiring load ! if (! ld || ! ld->is_acquire()) ! return false; ! for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { ! x = mem->fast_out(i); ! // if we see the same load we drop it and stop searching ! if (x == ld) { ! ld = NULL; ! break; ! } ! } ! // we must have dropped the load ! if (ld) ! return false; ! // check for a child cpuorder membar ! MemBarNode *child = has_child_membar(barrier->as_MemBar(), ctl, mem); ! if (!child || child->Opcode() != Op_MemBarCPUOrder) ! return false; ! ! return true; ! } ! ! bool needs_acquiring_load(const Node *n) ! { ! // assert n->is_Load(); ! if (UseBarriersForVolatile) ! // we use a normal load and a dmb ! return false; ! ! LoadNode *ld = n->as_Load(); ! ! if (!ld->is_acquire()) ! return false; ! ! // check if this load is feeding an acquire membar ! // ! // LoadX[mo_acquire] ! // { |1 } ! // {DecodeN} ! // |Parms ! // MemBarAcquire* ! // ! // where * tags node we were passed ! // and |k means input k ! ! Node *start = ld; ! Node *mbacq = NULL; ! ! // if we hit a DecodeNarrowPtr we reset the start node and restart ! // the search through the outputs ! restart: ! ! for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) { ! Node *x = start->fast_out(i); ! if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) { ! mbacq = x; ! } else if (!mbacq && ! (x->is_DecodeNarrowPtr() || ! (x->is_Mach() && x->Opcode() == Op_DecodeN))) { ! start = x; ! goto restart; ! } ! } ! ! if (mbacq) { ! return true; ! } ! ! // only continue if we want to try to match unsafe volatile gets ! if (UseBarriersForUnsafeVolatileGet) ! return false; ! ! // check if Ctl and Proj feed comes from a MemBarCPUOrder ! // ! // MemBarCPUOrder ! // || \\ ! // MemBarAcquire* LoadX[mo_acquire] ! // || ! // MemBarCPUOrder ! ! MemBarNode *membar; ! ProjNode *ctl; ! ProjNode *mem; ! ! membar = has_parent_membar(ld, ctl, mem); ! ! if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) ! return false; ! ! // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain ! ! membar = has_child_membar(membar, ctl, mem); ! ! if (!membar || !membar->Opcode() == Op_MemBarAcquire) ! return false; ! ! membar = has_child_membar(membar, ctl, mem); ! ! if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) ! return false; ! ! return true; ! } ! ! bool unnecessary_release(const Node *n) { ! // assert n->is_MemBar(); ! if (UseBarriersForVolatile) ! // we need to plant a dmb ! return false; ! ! // ok, so we can omit this release barrier if it has been inserted ! // as part of a volatile store sequence ! // ! // MemBarRelease ! // { || } ! // {MemBarCPUOrder} -- optional ! // || \\ ! // || StoreX[mo_release] ! // | \ / ! // | MergeMem ! // | / ! // MemBarVolatile ! // ! // where ! // || and \\ represent Ctl and Mem feeds via Proj nodes ! // | \ and / indicate further routing of the Ctl and Mem feeds ! // ! // so we need to check that ! // ! // ia) the release membar (or its dependent cpuorder membar) feeds ! // control to a store node (via a Control project node) ! // ! // ii) the store is ordered release ! // ! // iii) the release membar (or its dependent cpuorder membar) feeds ! // control to a volatile membar (via the same Control project node) ! // ! // iv) the release membar feeds memory to a merge mem and to the ! // same store (both via a single Memory proj node) ! // ! // v) the store outputs to the merge mem ! // ! // vi) the merge mem outputs to the same volatile membar ! // ! // n.b. if this is an inlined unsafe node then the release membar ! // may feed its control and memory links via an intervening cpuorder ! // membar. this case can be dealt with when we check the release ! // membar projections. if they both feed a single cpuorder membar ! // node continue to make the same checks as above but with the ! // cpuorder membar substituted for the release membar. if they don't ! // both feed a cpuorder membar then the check fails. ! // ! // n.b.b. for an inlined unsafe store of an object in the case where ! // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see ! // an embedded if then else where we expect the store. this is ! // needed to do the right type of store depending on whether ! // heap_base is NULL. We could check for that but for now we can ! // just take the hit of on inserting a redundant dmb for this ! // redundant volatile membar ! ! MemBarNode *barrier = n->as_MemBar(); ! ProjNode *ctl; ! ProjNode *mem; ! // check for an intervening cpuorder membar ! MemBarNode *b = has_child_membar(barrier, ctl, mem); ! if (b && b->Opcode() == Op_MemBarCPUOrder) { ! // ok, so start form the dependent cpuorder barrier ! barrier = b; ! } ! // check the ctl and mem flow ! ctl = barrier->proj_out(TypeFunc::Control); ! mem = barrier->proj_out(TypeFunc::Memory); ! ! // the barrier needs to have both a Ctl and Mem projection ! if (! ctl || ! mem) ! return false; ! ! Node *x = NULL; ! Node *mbvol = NULL; ! StoreNode * st = NULL; ! ! // For a normal volatile write the Ctl ProjNode should have output ! // to a MemBarVolatile and a Store marked as releasing ! // ! // n.b. for an inlined unsafe store of an object in the case where ! // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see ! // an embedded if then else where we expect the store. this is ! // needed to do the right type of store depending on whether ! // heap_base is NULL. We could check for that case too but for now ! // we can just take the hit of inserting a dmb and a non-volatile ! // store to implement the volatile store ! ! for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { ! x = ctl->fast_out(i); ! if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { ! if (mbvol) { ! return false; ! } ! mbvol = x; ! } else if (x->is_Store()) { ! st = x->as_Store(); ! if (! st->is_release()) { ! return false; ! } ! } else if (!x->is_Mach()) { ! // we may see mach nodes added during matching but nothing else ! return false; ! } ! } ! ! if (!mbvol || !st) ! return false; ! ! // the Mem ProjNode should output to a MergeMem and the same Store ! Node *mm = NULL; ! for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { ! x = mem->fast_out(i); ! if (!mm && x->is_MergeMem()) { ! mm = x; ! } else if (x != st && !x->is_Mach()) { ! // we may see mach nodes added during matching but nothing else ! return false; ! } ! } ! ! if (!mm) ! return false; ! ! // the MergeMem should output to the MemBarVolatile ! for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { ! x = mm->fast_out(i); ! if (x != mbvol && !x->is_Mach()) { ! // we may see mach nodes added during matching but nothing else ! return false; ! } ! } ! ! return true; ! } ! ! bool unnecessary_volatile(const Node *n) { ! // assert n->is_MemBar(); ! if (UseBarriersForVolatile) ! // we need to plant a dmb ! return false; ! ! // ok, so we can omit this volatile barrier if it has been inserted ! // as part of a volatile store sequence ! // ! // MemBarRelease ! // { || } ! // {MemBarCPUOrder} -- optional ! // || \\ ! // || StoreX[mo_release] ! // | \ / ! // | MergeMem ! // | / ! // MemBarVolatile ! // ! // where ! // || and \\ represent Ctl and Mem feeds via Proj nodes ! // | \ and / indicate further routing of the Ctl and Mem feeds ! // ! // we need to check that ! // ! // i) the volatile membar gets its control feed from a release ! // membar (or its dependent cpuorder membar) via a Control project ! // node ! // ! // ii) the release membar (or its dependent cpuorder membar) also ! // feeds control to a store node via the same proj node ! // ! // iii) the store is ordered release ! // ! // iv) the release membar (or its dependent cpuorder membar) feeds ! // memory to a merge mem and to the same store (both via a single ! // Memory proj node) ! // ! // v) the store outputs to the merge mem ! // ! // vi) the merge mem outputs to the volatile membar ! // ! // n.b. for an inlined unsafe store of an object in the case where ! // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see ! // an embedded if then else where we expect the store. this is ! // needed to do the right type of store depending on whether ! // heap_base is NULL. We could check for that but for now we can ! // just take the hit of on inserting a redundant dmb for this ! // redundant volatile membar ! ! MemBarNode *mbvol = n->as_MemBar(); ! Node *x = n->lookup(TypeFunc::Control); ! ! if (! x || !x->is_Proj()) ! return false; ! ! ProjNode *proj = x->as_Proj(); ! ! x = proj->lookup(0); ! ! if (!x || !x->is_MemBar()) ! return false; ! ! MemBarNode *barrier = x->as_MemBar(); ! ! // if the barrier is a release membar we have what we want. if it is ! // a cpuorder membar then we need to ensure that it is fed by a ! // release membar in which case we proceed to check the graph below ! // this cpuorder membar as the feed ! ! if (x->Opcode() != Op_MemBarRelease) { ! if (x->Opcode() != Op_MemBarCPUOrder) ! return false; ! ProjNode *ctl; ! ProjNode *mem; ! MemBarNode *b = has_parent_membar(x, ctl, mem); ! if (!b || !b->Opcode() == Op_MemBarRelease) ! return false; ! } ! ! ProjNode *ctl = barrier->proj_out(TypeFunc::Control); ! ProjNode *mem = barrier->proj_out(TypeFunc::Memory); ! ! // barrier needs to have both a Ctl and Mem projection ! // and we need to have reached it via the Ctl projection ! if (! ctl || ! mem || ctl != proj) ! return false; ! ! StoreNode * st = NULL; ! ! // The Ctl ProjNode should have output to a MemBarVolatile and ! // a Store marked as releasing ! for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { ! x = ctl->fast_out(i); ! if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { ! if (x != mbvol) { ! return false; ! } ! } else if (x->is_Store()) { ! st = x->as_Store(); ! if (! st->is_release()) { ! return false; ! } ! } else if (!x->is_Mach()){ ! // we may see mach nodes added during matching but nothing else ! return false; ! } ! } ! ! if (!st) ! return false; ! ! // the Mem ProjNode should output to a MergeMem and the same Store ! Node *mm = NULL; ! for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { ! x = mem->fast_out(i); ! if (!mm && x->is_MergeMem()) { ! mm = x; ! } else if (x != st && !x->is_Mach()) { ! // we may see mach nodes added during matching but nothing else ! return false; ! } ! } ! ! if (!mm) ! return false; ! ! // the MergeMem should output to the MemBarVolatile ! for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { ! x = mm->fast_out(i); ! if (x != mbvol && !x->is_Mach()) { ! // we may see mach nodes added during matching but nothing else ! return false; ! } ! } ! ! return true; ! } ! ! ! ! bool needs_releasing_store(const Node *n) ! { ! // assert n->is_Store(); ! if (UseBarriersForVolatile) ! // we use a normal store and dmb combination ! return false; ! ! StoreNode *st = n->as_Store(); ! ! if (!st->is_release()) ! return false; ! ! // check if this store is bracketed by a release (or its dependent ! // cpuorder membar) and a volatile membar ! // ! // MemBarRelease ! // { || } ! // {MemBarCPUOrder} -- optional ! // || \\ ! // || StoreX[mo_release] ! // | \ / ! // | MergeMem ! // | / ! // MemBarVolatile ! // ! // where ! // || and \\ represent Ctl and Mem feeds via Proj nodes ! // | \ and / indicate further routing of the Ctl and Mem feeds ! // ! ! ! Node *x = st->lookup(TypeFunc::Control); ! ! if (! x || !x->is_Proj()) ! return false; ! ! ProjNode *proj = x->as_Proj(); ! ! x = proj->lookup(0); ! ! if (!x || !x->is_MemBar()) ! return false; ! ! MemBarNode *barrier = x->as_MemBar(); ! ! // if the barrier is a release membar we have what we want. if it is ! // a cpuorder membar then we need to ensure that it is fed by a ! // release membar in which case we proceed to check the graph below ! // this cpuorder membar as the feed ! ! if (x->Opcode() != Op_MemBarRelease) { ! if (x->Opcode() != Op_MemBarCPUOrder) ! return false; ! Node *ctl = x->lookup(TypeFunc::Control); ! Node *mem = x->lookup(TypeFunc::Memory); ! if (!ctl || !ctl->is_Proj() || !mem || !mem->is_Proj()) ! return false; ! x = ctl->lookup(0); ! if (!x || !x->is_MemBar() || !x->Opcode() == Op_MemBarRelease) ! return false; ! Node *y = mem->lookup(0); ! if (!y || y != x) ! return false; ! } ! ! ProjNode *ctl = barrier->proj_out(TypeFunc::Control); ! ProjNode *mem = barrier->proj_out(TypeFunc::Memory); ! ! // MemBarRelease needs to have both a Ctl and Mem projection ! // and we need to have reached it via the Ctl projection ! if (! ctl || ! mem || ctl != proj) ! return false; ! ! MemBarNode *mbvol = NULL; ! ! // The Ctl ProjNode should have output to a MemBarVolatile and ! // a Store marked as releasing ! for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { ! x = ctl->fast_out(i); ! if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { ! mbvol = x->as_MemBar(); ! } else if (x->is_Store()) { ! if (x != st) { ! return false; ! } ! } else if (!x->is_Mach()){ ! return false; ! } ! } ! ! if (!mbvol) ! return false; ! ! // the Mem ProjNode should output to a MergeMem and the same Store ! Node *mm = NULL; ! for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { ! x = mem->fast_out(i); ! if (!mm && x->is_MergeMem()) { ! mm = x; ! } else if (x != st && !x->is_Mach()) { ! return false; ! } ! } ! ! if (!mm) ! return false; + // the MergeMem should output to the MemBarVolatile + for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { + x = mm->fast_out(i); + if (x != mbvol && !x->is_Mach()) { return false; + } + } + + return true; } + + #define __ _masm. // advance declarations for helper functions to convert register // indices to register objects
*** 5149,5159 **** // Load Byte (8 bit signed) instruct loadB(iRegINoSp dst, memory mem) %{ match(Set dst (LoadB mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrsbw $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrsbw(dst, mem)); --- 5943,5953 ---- // Load Byte (8 bit signed) instruct loadB(iRegINoSp dst, memory mem) %{ match(Set dst (LoadB mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrsbw $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrsbw(dst, mem));
*** 5163,5173 **** // Load Byte (8 bit signed) into long instruct loadB2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadB mem))); ! predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrsb $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrsb(dst, mem)); --- 5957,5967 ---- // Load Byte (8 bit signed) into long instruct loadB2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadB mem))); ! predicate(!needs_acquiring_load(n->in(1))); ins_cost(4 * INSN_COST); format %{ "ldrsb $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrsb(dst, mem));
*** 5177,5187 **** // Load Byte (8 bit unsigned) instruct loadUB(iRegINoSp dst, memory mem) %{ match(Set dst (LoadUB mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrbw $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrb(dst, mem)); --- 5971,5981 ---- // Load Byte (8 bit unsigned) instruct loadUB(iRegINoSp dst, memory mem) %{ match(Set dst (LoadUB mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrbw $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrb(dst, mem));
*** 5191,5201 **** // Load Byte (8 bit unsigned) into long instruct loadUB2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadUB mem))); ! predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrb $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrb(dst, mem)); --- 5985,5995 ---- // Load Byte (8 bit unsigned) into long instruct loadUB2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadUB mem))); ! predicate(!needs_acquiring_load(n->in(1))); ins_cost(4 * INSN_COST); format %{ "ldrb $dst, $mem\t# byte" %} ins_encode(aarch64_enc_ldrb(dst, mem));
*** 5205,5215 **** // Load Short (16 bit signed) instruct loadS(iRegINoSp dst, memory mem) %{ match(Set dst (LoadS mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrshw $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrshw(dst, mem)); --- 5999,6009 ---- // Load Short (16 bit signed) instruct loadS(iRegINoSp dst, memory mem) %{ match(Set dst (LoadS mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrshw $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrshw(dst, mem));
*** 5219,5229 **** // Load Short (16 bit signed) into long instruct loadS2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadS mem))); ! predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrsh $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrsh(dst, mem)); --- 6013,6023 ---- // Load Short (16 bit signed) into long instruct loadS2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadS mem))); ! predicate(!needs_acquiring_load(n->in(1))); ins_cost(4 * INSN_COST); format %{ "ldrsh $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrsh(dst, mem));
*** 5233,5243 **** // Load Char (16 bit unsigned) instruct loadUS(iRegINoSp dst, memory mem) %{ match(Set dst (LoadUS mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrh $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrh(dst, mem)); --- 6027,6037 ---- // Load Char (16 bit unsigned) instruct loadUS(iRegINoSp dst, memory mem) %{ match(Set dst (LoadUS mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrh $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrh(dst, mem));
*** 5247,5257 **** // Load Short/Char (16 bit unsigned) into long instruct loadUS2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadUS mem))); ! predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrh $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrh(dst, mem)); --- 6041,6051 ---- // Load Short/Char (16 bit unsigned) into long instruct loadUS2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadUS mem))); ! predicate(!needs_acquiring_load(n->in(1))); ins_cost(4 * INSN_COST); format %{ "ldrh $dst, $mem\t# short" %} ins_encode(aarch64_enc_ldrh(dst, mem));
*** 5261,5271 **** // Load Integer (32 bit signed) instruct loadI(iRegINoSp dst, memory mem) %{ match(Set dst (LoadI mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldrw(dst, mem)); --- 6055,6065 ---- // Load Integer (32 bit signed) instruct loadI(iRegINoSp dst, memory mem) %{ match(Set dst (LoadI mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldrw(dst, mem));
*** 5275,5285 **** // Load Integer (32 bit signed) into long instruct loadI2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadI mem))); ! predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrsw $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldrsw(dst, mem)); --- 6069,6079 ---- // Load Integer (32 bit signed) into long instruct loadI2L(iRegLNoSp dst, memory mem) %{ match(Set dst (ConvI2L (LoadI mem))); ! predicate(!needs_acquiring_load(n->in(1))); ins_cost(4 * INSN_COST); format %{ "ldrsw $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldrsw(dst, mem));
*** 5289,5299 **** // Load Integer (32 bit unsigned) into long instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) %{ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ! predicate(UseBarriersForVolatile || n->in(1)->in(1)->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldrw(dst, mem)); --- 6083,6093 ---- // Load Integer (32 bit unsigned) into long instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) %{ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ! predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load())); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldrw(dst, mem));
*** 5303,5313 **** // Load Long (64 bit signed) instruct loadL(iRegLNoSp dst, memory mem) %{ match(Set dst (LoadL mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldr(dst, mem)); --- 6097,6107 ---- // Load Long (64 bit signed) instruct loadL(iRegLNoSp dst, memory mem) %{ match(Set dst (LoadL mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# int" %} ins_encode(aarch64_enc_ldr(dst, mem));
*** 5330,5340 **** // Load Pointer instruct loadP(iRegPNoSp dst, memory mem) %{ match(Set dst (LoadP mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# ptr" %} ins_encode(aarch64_enc_ldr(dst, mem)); --- 6124,6134 ---- // Load Pointer instruct loadP(iRegPNoSp dst, memory mem) %{ match(Set dst (LoadP mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# ptr" %} ins_encode(aarch64_enc_ldr(dst, mem));
*** 5344,5354 **** // Load Compressed Pointer instruct loadN(iRegNNoSp dst, memory mem) %{ match(Set dst (LoadN mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# compressed ptr" %} ins_encode(aarch64_enc_ldrw(dst, mem)); --- 6138,6148 ---- // Load Compressed Pointer instruct loadN(iRegNNoSp dst, memory mem) %{ match(Set dst (LoadN mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# compressed ptr" %} ins_encode(aarch64_enc_ldrw(dst, mem));
*** 5358,5368 **** // Load Klass Pointer instruct loadKlass(iRegPNoSp dst, memory mem) %{ match(Set dst (LoadKlass mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# class" %} ins_encode(aarch64_enc_ldr(dst, mem)); --- 6152,6162 ---- // Load Klass Pointer instruct loadKlass(iRegPNoSp dst, memory mem) %{ match(Set dst (LoadKlass mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# class" %} ins_encode(aarch64_enc_ldr(dst, mem));
*** 5372,5382 **** // Load Narrow Klass Pointer instruct loadNKlass(iRegNNoSp dst, memory mem) %{ match(Set dst (LoadNKlass mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# compressed class ptr" %} ins_encode(aarch64_enc_ldrw(dst, mem)); --- 6166,6176 ---- // Load Narrow Klass Pointer instruct loadNKlass(iRegNNoSp dst, memory mem) %{ match(Set dst (LoadNKlass mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# compressed class ptr" %} ins_encode(aarch64_enc_ldrw(dst, mem));
*** 5386,5396 **** // Load Float instruct loadF(vRegF dst, memory mem) %{ match(Set dst (LoadF mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrs $dst, $mem\t# float" %} ins_encode( aarch64_enc_ldrs(dst, mem) ); --- 6180,6190 ---- // Load Float instruct loadF(vRegF dst, memory mem) %{ match(Set dst (LoadF mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrs $dst, $mem\t# float" %} ins_encode( aarch64_enc_ldrs(dst, mem) );
*** 5400,5410 **** // Load Double instruct loadD(vRegD dst, memory mem) %{ match(Set dst (LoadD mem)); ! predicate(UseBarriersForVolatile || n->as_Load()->is_unordered()); ins_cost(4 * INSN_COST); format %{ "ldrd $dst, $mem\t# double" %} ins_encode( aarch64_enc_ldrd(dst, mem) ); --- 6194,6204 ---- // Load Double instruct loadD(vRegD dst, memory mem) %{ match(Set dst (LoadD mem)); ! predicate(!needs_acquiring_load(n)); ins_cost(4 * INSN_COST); format %{ "ldrd $dst, $mem\t# double" %} ins_encode( aarch64_enc_ldrd(dst, mem) );
*** 5631,5641 **** // Store Byte instruct storeB(iRegIorL2I src, memory mem) %{ match(Set mem (StoreB mem src)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strb $src, $mem\t# byte" %} ins_encode(aarch64_enc_strb(src, mem)); --- 6425,6435 ---- // Store Byte instruct storeB(iRegIorL2I src, memory mem) %{ match(Set mem (StoreB mem src)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strb $src, $mem\t# byte" %} ins_encode(aarch64_enc_strb(src, mem));
*** 5645,5655 **** instruct storeimmB0(immI0 zero, memory mem) %{ match(Set mem (StoreB mem zero)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strb zr, $mem\t# byte" %} ins_encode(aarch64_enc_strb0(mem)); --- 6439,6449 ---- instruct storeimmB0(immI0 zero, memory mem) %{ match(Set mem (StoreB mem zero)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strb zr, $mem\t# byte" %} ins_encode(aarch64_enc_strb0(mem));
*** 5659,5669 **** // Store Char/Short instruct storeC(iRegIorL2I src, memory mem) %{ match(Set mem (StoreC mem src)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strh $src, $mem\t# short" %} ins_encode(aarch64_enc_strh(src, mem)); --- 6453,6463 ---- // Store Char/Short instruct storeC(iRegIorL2I src, memory mem) %{ match(Set mem (StoreC mem src)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strh $src, $mem\t# short" %} ins_encode(aarch64_enc_strh(src, mem));
*** 5672,5682 **** %} instruct storeimmC0(immI0 zero, memory mem) %{ match(Set mem (StoreC mem zero)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strh zr, $mem\t# short" %} ins_encode(aarch64_enc_strh0(mem)); --- 6466,6476 ---- %} instruct storeimmC0(immI0 zero, memory mem) %{ match(Set mem (StoreC mem zero)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strh zr, $mem\t# short" %} ins_encode(aarch64_enc_strh0(mem));
*** 5687,5697 **** // Store Integer instruct storeI(iRegIorL2I src, memory mem) %{ match(Set mem(StoreI mem src)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# int" %} ins_encode(aarch64_enc_strw(src, mem)); --- 6481,6491 ---- // Store Integer instruct storeI(iRegIorL2I src, memory mem) %{ match(Set mem(StoreI mem src)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# int" %} ins_encode(aarch64_enc_strw(src, mem));
*** 5700,5710 **** %} instruct storeimmI0(immI0 zero, memory mem) %{ match(Set mem(StoreI mem zero)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strw zr, $mem\t# int" %} ins_encode(aarch64_enc_strw0(mem)); --- 6494,6504 ---- %} instruct storeimmI0(immI0 zero, memory mem) %{ match(Set mem(StoreI mem zero)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strw zr, $mem\t# int" %} ins_encode(aarch64_enc_strw0(mem));
*** 5714,5724 **** // Store Long (64 bit signed) instruct storeL(iRegL src, memory mem) %{ match(Set mem (StoreL mem src)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "str $src, $mem\t# int" %} ins_encode(aarch64_enc_str(src, mem)); --- 6508,6518 ---- // Store Long (64 bit signed) instruct storeL(iRegL src, memory mem) %{ match(Set mem (StoreL mem src)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "str $src, $mem\t# int" %} ins_encode(aarch64_enc_str(src, mem));
*** 5728,5738 **** // Store Long (64 bit signed) instruct storeimmL0(immL0 zero, memory mem) %{ match(Set mem (StoreL mem zero)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "str zr, $mem\t# int" %} ins_encode(aarch64_enc_str0(mem)); --- 6522,6532 ---- // Store Long (64 bit signed) instruct storeimmL0(immL0 zero, memory mem) %{ match(Set mem (StoreL mem zero)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "str zr, $mem\t# int" %} ins_encode(aarch64_enc_str0(mem));
*** 5742,5752 **** // Store Pointer instruct storeP(iRegP src, memory mem) %{ match(Set mem (StoreP mem src)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "str $src, $mem\t# ptr" %} ins_encode(aarch64_enc_str(src, mem)); --- 6536,6546 ---- // Store Pointer instruct storeP(iRegP src, memory mem) %{ match(Set mem (StoreP mem src)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "str $src, $mem\t# ptr" %} ins_encode(aarch64_enc_str(src, mem));
*** 5756,5766 **** // Store Pointer instruct storeimmP0(immP0 zero, memory mem) %{ match(Set mem (StoreP mem zero)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "str zr, $mem\t# ptr" %} ins_encode(aarch64_enc_str0(mem)); --- 6550,6560 ---- // Store Pointer instruct storeimmP0(immP0 zero, memory mem) %{ match(Set mem (StoreP mem zero)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "str zr, $mem\t# ptr" %} ins_encode(aarch64_enc_str0(mem));
*** 5770,5780 **** // Store Compressed Pointer instruct storeN(iRegN src, memory mem) %{ match(Set mem (StoreN mem src)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# compressed ptr" %} ins_encode(aarch64_enc_strw(src, mem)); --- 6564,6574 ---- // Store Compressed Pointer instruct storeN(iRegN src, memory mem) %{ match(Set mem (StoreN mem src)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# compressed ptr" %} ins_encode(aarch64_enc_strw(src, mem));
*** 5785,5795 **** instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) %{ match(Set mem (StoreN mem zero)); predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL && ! (UseBarriersForVolatile || n->as_Store()->is_unordered())); ins_cost(INSN_COST); format %{ "strw rheapbase, $mem\t# compressed ptr (rheapbase==0)" %} ins_encode(aarch64_enc_strw(heapbase, mem)); --- 6579,6589 ---- instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) %{ match(Set mem (StoreN mem zero)); predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL && ! (!needs_releasing_store(n))); ins_cost(INSN_COST); format %{ "strw rheapbase, $mem\t# compressed ptr (rheapbase==0)" %} ins_encode(aarch64_enc_strw(heapbase, mem));
*** 5799,5809 **** // Store Float instruct storeF(vRegF src, memory mem) %{ match(Set mem (StoreF mem src)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strs $src, $mem\t# float" %} ins_encode( aarch64_enc_strs(src, mem) ); --- 6593,6603 ---- // Store Float instruct storeF(vRegF src, memory mem) %{ match(Set mem (StoreF mem src)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strs $src, $mem\t# float" %} ins_encode( aarch64_enc_strs(src, mem) );
*** 5816,5826 **** // Store Double instruct storeD(vRegD src, memory mem) %{ match(Set mem (StoreD mem src)); ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); ins_cost(INSN_COST); format %{ "strd $src, $mem\t# double" %} ins_encode( aarch64_enc_strd(src, mem) ); --- 6610,6620 ---- // Store Double instruct storeD(vRegD src, memory mem) %{ match(Set mem (StoreD mem src)); ! predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); format %{ "strd $src, $mem\t# double" %} ins_encode( aarch64_enc_strd(src, mem) );
*** 5829,5839 **** %} // Store Compressed Klass Pointer instruct storeNKlass(iRegN src, memory mem) %{ ! predicate(UseBarriersForVolatile || n->as_Store()->is_unordered()); match(Set mem (StoreNKlass mem src)); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# compressed klass ptr" %} --- 6623,6633 ---- %} // Store Compressed Klass Pointer instruct storeNKlass(iRegN src, memory mem) %{ ! predicate(!needs_releasing_store(n)); match(Set mem (StoreNKlass mem src)); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# compressed klass ptr" %}
*** 6291,6301 **** %} ins_pipe(pipe_serial); %} instruct unnecessary_membar_acquire() %{ ! predicate(! UseBarriersForVolatile && preceded_by_ordered_load(n)); match(MemBarAcquire); ins_cost(0); format %{ "membar_acquire (elided)" %} --- 7085,7095 ---- %} ins_pipe(pipe_serial); %} instruct unnecessary_membar_acquire() %{ ! predicate(unnecessary_acquire(n)); match(MemBarAcquire); ins_cost(0); format %{ "membar_acquire (elided)" %}
*** 6343,6352 **** --- 7137,7159 ---- __ membar(Assembler::LoadStore|Assembler::StoreStore); %} ins_pipe(pipe_serial); %} + instruct unnecessary_membar_release() %{ + predicate(unnecessary_release(n)); + match(MemBarRelease); + ins_cost(0); + + format %{ "membar_release (elided)" %} + + ins_encode %{ + __ block_comment("membar_release (elided)"); + %} + ins_pipe(pipe_serial); + %} + instruct membar_release() %{ match(MemBarRelease); ins_cost(VOLATILE_REF_COST); format %{ "membar_release" %}
*** 6380,6389 **** --- 7187,7210 ---- %} ins_pipe(pipe_serial); %} + instruct unnecessary_membar_volatile() %{ + predicate(unnecessary_volatile(n)); + match(MemBarVolatile); + ins_cost(0); + + format %{ "membar_volatile (elided)" %} + + ins_encode %{ + __ block_comment("membar_volatile (elided)"); + %} + + ins_pipe(pipe_serial); + %} + instruct membar_volatile() %{ match(MemBarVolatile); ins_cost(VOLATILE_REF_COST*100); format %{ "membar_volatile" %}
< prev index next >