--- old/src/share/vm/opto/library_call.cpp	2017-04-25 16:45:34.003173137 +0200
+++ new/src/share/vm/opto/library_call.cpp	2017-04-25 16:45:33.871173142 +0200
@@ -240,10 +240,6 @@
   // This returns Type::AnyPtr, RawPtr, or OopPtr.
   int classify_unsafe_addr(Node* &base, Node* &offset);
   Node* make_unsafe_address(Node* base, Node* offset);
-  // Helper for inline_unsafe_access.
-  // Generates the guards that check whether the result of
-  // Unsafe.getObject should be recorded in an SATB log buffer.
-  void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);
 
   typedef enum { Relaxed, Opaque, Volatile, Acquire, Release } AccessKind;
   bool inline_unsafe_access(bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
@@ -265,7 +261,7 @@
   bool inline_array_copyOf(bool is_copyOfRange);
   bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
   bool inline_preconditions_checkIndex();
-  void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark);
+  void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array);
   bool inline_native_clone(bool is_virtual);
   bool inline_native_Reflection_getCallerClass();
   // Helper function for inlining native object hash method
@@ -2151,106 +2147,6 @@
 
 //----------------------------inline_unsafe_access----------------------------
 
-// Helper that guards and inserts a pre-barrier.
-void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset,
-                                        Node* pre_val, bool need_mem_bar) {
-  // We could be accessing the referent field of a reference object. If so, when G1
-  // is enabled, we need to log the value in the referent field in an SATB buffer.
-  // This routine performs some compile time filters and generates suitable
-  // runtime filters that guard the pre-barrier code.
-  // Also add memory barrier for non volatile load from the referent field
-  // to prevent commoning of loads across safepoint.
-  if (!UseG1GC && !need_mem_bar)
-    return;
-
-  // Some compile time checks.
-
-  // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
-  const TypeX* otype = offset->find_intptr_t_type();
-  if (otype != NULL && otype->is_con() &&
-      otype->get_con() != java_lang_ref_Reference::referent_offset) {
-    // Constant offset but not the reference_offset so just return
-    return;
-  }
-
-  // We only need to generate the runtime guards for instances.
-  const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
-  if (btype != NULL) {
-    if (btype->isa_aryptr()) {
-      // Array type so nothing to do
-      return;
-    }
-
-    const TypeInstPtr* itype = btype->isa_instptr();
-    if (itype != NULL) {
-      // Can the klass of base_oop be statically determined to be
-      // _not_ a sub-class of Reference and _not_ Object?
-      ciKlass* klass = itype->klass();
-      if ( klass->is_loaded() &&
-          !klass->is_subtype_of(env()->Reference_klass()) &&
-          !env()->Object_klass()->is_subtype_of(klass)) {
-        return;
-      }
-    }
-  }
-
-  // The compile time filters did not reject base_oop/offset so
-  // we need to generate the following runtime filters
-  //
-  // if (offset == java_lang_ref_Reference::_reference_offset) {
-  //   if (instance_of(base, java.lang.ref.Reference)) {
-  //     pre_barrier(_, pre_val, ...);
-  //   }
-  // }
-
-  float likely   = PROB_LIKELY(  0.999);
-  float unlikely = PROB_UNLIKELY(0.999);
-
-  IdealKit ideal(this);
-#define __ ideal.
-
-  Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
-
-  __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
-      // Update graphKit memory and control from IdealKit.
-      sync_kit(ideal);
-
-      Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass()));
-      Node* is_instof = gen_instanceof(base_oop, ref_klass_con);
-
-      // Update IdealKit memory and control from graphKit.
-      __ sync_kit(this);
-
-      Node* one = __ ConI(1);
-      // is_instof == 0 if base_oop == NULL
-      __ if_then(is_instof, BoolTest::eq, one, unlikely); {
-
-        // Update graphKit from IdeakKit.
-        sync_kit(ideal);
-
-        // Use the pre-barrier to record the value in the referent field
-        pre_barrier(false /* do_load */,
-                    __ ctrl(),
-                    NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
-                    pre_val /* pre_val */,
-                    T_OBJECT);
-        if (need_mem_bar) {
-          // Add memory barrier to prevent commoning reads from this field
-          // across safepoint since GC can change its value.
-          insert_mem_bar(Op_MemBarCPUOrder);
-        }
-        // Update IdealKit from graphKit.
-        __ sync_kit(this);
-
-      } __ end_if(); // _ref_type != ref_none
-  } __ end_if(); // offset == referent_offset
-
-  // Final sync IdealKit and GraphKit.
-  final_sync(ideal);
-#undef __
-}
-
-
 const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type) {
   // Attempt to infer a sharper value type from the offset and base type.
   ciKlass* sharpened_klass = NULL;
@@ -2291,10 +2187,15 @@
 
 bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, const AccessKind kind, const bool unaligned) {
   if (callee()->is_static())  return false;  // caller must have the capability!
+  C2DecoratorSet decorators = C2_ACCESS_ON_ANONYMOUS;
   guarantee(!is_store || kind != Acquire, "Acquire accesses can be produced only for loads");
   guarantee( is_store || kind != Release, "Release accesses can be produced only for stores");
   assert(type != T_OBJECT || !unaligned, "unaligned access not supported with object type");
 
+  if (unaligned) {
+    decorators |= C2_ACCESS_UNALIGNED;
+  }
+
 #ifndef PRODUCT
   {
     ResourceMark rm;
@@ -2352,6 +2253,10 @@
   // Can base be NULL? Otherwise, always on-heap access.
   bool can_access_non_heap = TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop));
 
+  if (!can_access_non_heap) {
+    decorators |= C2_ACCESS_ON_HEAP;
+  }
+
   val = is_store ? argument(4) : NULL;
 
   const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
@@ -2389,65 +2294,57 @@
 
   assert(!mismatched || alias_type->adr_type()->is_oopptr(), "off-heap access can't be mismatched");
 
+  if (mismatched) {
+    decorators |= C2_MISMATCHED;
+  }
+
   // First guess at the value type.
   const Type *value_type = Type::get_const_basic_type(type);
 
-  // We will need memory barriers unless we can determine a unique
-  // alias category for this reference.  (Note:  If for some reason
-  // the barriers get omitted and the unsafe reference begins to "pollute"
-  // the alias analysis of the rest of the graph, either Compile::can_alias
-  // or Compile::must_alias will throw a diagnostic assert.)
-  bool need_mem_bar = false;
-  switch (kind) {
-      case Relaxed:
-          need_mem_bar = mismatched && !adr_type->isa_aryptr();
-          break;
-      case Opaque:
-          // Opaque uses CPUOrder membars for protection against code movement.
-      case Acquire:
-      case Release:
-      case Volatile:
-          need_mem_bar = true;
-          break;
-      default:
-          ShouldNotReachHere();
-  }
-
-  // Some accesses require access atomicity for all types, notably longs and doubles.
-  // When AlwaysAtomicAccesses is enabled, all accesses are atomic.
-  bool requires_atomic_access = false;
   switch (kind) {
       case Relaxed:
-          requires_atomic_access = AlwaysAtomicAccesses;
-          break;
+        decorators |= C2_MO_RELAXED;
+        break;
       case Opaque:
-          // Opaque accesses are atomic.
+        decorators |= C2_ACCESS_ATOMIC;
+        break;
       case Acquire:
+        decorators |= C2_MO_ACQUIRE;
+        break;
       case Release:
+        decorators |= C2_MO_RELEASE;
+        break;
       case Volatile:
-          requires_atomic_access = true;
-          break;
+        decorators |= C2_MO_VOLATILE;
+        break;
       default:
-          ShouldNotReachHere();
+        ShouldNotReachHere();
   }
 
-  // Figure out the memory ordering.
-  // Acquire/Release/Volatile accesses require marking the loads/stores with MemOrd
-  MemNode::MemOrd mo = access_kind_to_memord_LS(kind, is_store);
-
-  // If we are reading the value of the referent field of a Reference
-  // object (either by using Unsafe directly or through reflection)
-  // then, if G1 is enabled, we need to record the referent in an
-  // SATB log buffer using the pre-barrier mechanism.
-  // Also we need to add memory barrier to prevent commoning reads
-  // from this field across safepoint since GC can change its value.
-  bool need_read_barrier = !is_store &&
-                           offset != top() && heap_base_oop != top();
-
-  if (!is_store && type == T_OBJECT) {
-    const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type);
-    if (tjp != NULL) {
-      value_type = tjp;
+  if (type == T_OBJECT) {
+    if (!is_store) {
+      const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type);
+      if (tjp != NULL) {
+        value_type = tjp;
+      }
+    } else {
+      Compile::AliasType* at = C->alias_type(adr_type);
+      if (adr_type->isa_instptr()) {
+        if (at->field() != NULL) {
+          // known field.  This code is a copy of the do_put_xxx logic.
+          ciField* field = at->field();
+          if (!field->type()->is_loaded()) {
+            value_type = TypeInstPtr::BOTTOM;
+          } else {
+            value_type = TypeOopPtr::make_from_klass(field->type()->as_klass());
+          }
+        }
+      } else if (adr_type->isa_aryptr()) {
+        value_type = adr_type->is_aryptr()->elem()->make_oopptr();
+      }
+      if (value_type == NULL) {
+        value_type = TypeInstPtr::BOTTOM;
+      }
     }
   }
 
@@ -2460,39 +2357,6 @@
   // and it is not possible to fully distinguish unintended nulls
   // from intended ones in this API.
 
-  // We need to emit leading and trailing CPU membars (see below) in
-  // addition to memory membars for special access modes. This is a little
-  // too strong, but avoids the need to insert per-alias-type
-  // volatile membars (for stores; compare Parse::do_put_xxx), which
-  // we cannot do effectively here because we probably only have a
-  // rough approximation of type.
-
-  switch(kind) {
-    case Relaxed:
-    case Opaque:
-    case Acquire:
-      break;
-    case Release:
-    case Volatile:
-      if (is_store) {
-        insert_mem_bar(Op_MemBarRelease);
-      } else {
-        if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-          insert_mem_bar(Op_MemBarVolatile);
-        }
-      }
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-
-  // Memory barrier to prevent normal and 'unsafe' accesses from
-  // bypassing each other.  Happens after null checks, so the
-  // exception paths do not take memory state from the memory barrier,
-  // so there's no problems making a strong assert about mixing users
-  // of safe & unsafe memory.
-  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
-
   if (!is_store) {
     Node* p = NULL;
     // Try to constant fold a load from a constant field
@@ -2501,106 +2365,49 @@
       // final or stable field
       p = make_constant_from_field(field, heap_base_oop);
     }
-    if (p == NULL) {
-      // To be valid, unsafe loads may depend on other conditions than
-      // the one that guards them: pin the Load node
-      p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, requires_atomic_access, unaligned, mismatched);
-      // load value
-      switch (type) {
-      case T_BOOLEAN:
-      {
-        // Normalize the value returned by getBoolean in the following cases
-        if (mismatched ||
-            heap_base_oop == top() ||                            // - heap_base_oop is NULL or
-            (can_access_non_heap && alias_type->field() == NULL) // - heap_base_oop is potentially NULL
-                                                                 //   and the unsafe access is made to large offset
-                                                                 //   (i.e., larger than the maximum offset necessary for any
-                                                                 //   field access)
+
+    if (p == NULL) { // Could not constant fold the load
+      p = access_load_at(heap_base_oop, adr, adr_type, value_type, type, decorators);
+      // Normalize the value returned by getBoolean in the following cases
+      if (type == T_BOOLEAN &&
+          (mismatched ||
+           heap_base_oop == top() ||                  // - heap_base_oop is NULL or
+           (can_access_non_heap && field == NULL))    // - heap_base_oop is potentially NULL
+                                                      //   and the unsafe access is made to large offset
+                                                      //   (i.e., larger than the maximum offset necessary for any
+                                                      //   field access)
             ) {
           IdealKit ideal = IdealKit(this);
 #define __ ideal.
-          IdealVariable normalized_result(ideal);
-          __ declarations_done();
-          __ set(normalized_result, p);
-          __ if_then(p, BoolTest::ne, ideal.ConI(0));
-          __ set(normalized_result, ideal.ConI(1));
-          ideal.end_if();
-          final_sync(ideal);
-          p = __ value(normalized_result);
+        IdealVariable normalized_result(ideal);
+        __ declarations_done();
+        __ set(normalized_result, p);
+        __ if_then(p, BoolTest::ne, ideal.ConI(0));
+        __ set(normalized_result, ideal.ConI(1));
+        ideal.end_if();
+        final_sync(ideal);
+        p = __ value(normalized_result);
 #undef __
-        }
-      }
-      case T_CHAR:
-      case T_BYTE:
-      case T_SHORT:
-      case T_INT:
-      case T_LONG:
-      case T_FLOAT:
-      case T_DOUBLE:
-        break;
-      case T_OBJECT:
-        if (need_read_barrier) {
-          // We do not require a mem bar inside pre_barrier if need_mem_bar
-          // is set: the barriers would be emitted by us.
-          insert_pre_barrier(heap_base_oop, offset, p, !need_mem_bar);
-        }
-        break;
-      case T_ADDRESS:
-        // Cast to an int type.
-        p = _gvn.transform(new CastP2XNode(NULL, p));
-        p = ConvX2UL(p);
-        break;
-      default:
-        fatal("unexpected type %d: %s", type, type2name(type));
-        break;
       }
     }
+    if (type == T_ADDRESS) {
+      p = gvn().transform(new CastP2XNode(NULL, p));
+      p = ConvX2UL(p);
+    }
     // The load node has the control of the preceding MemBarCPUOrder.  All
     // following nodes will have the control of the MemBarCPUOrder inserted at
     // the end of this method.  So, pushing the load onto the stack at a later
     // point is fine.
     set_result(p);
   } else {
-    // place effect of store into memory
-    switch (type) {
-    case T_DOUBLE:
-      val = dstore_rounding(val);
-      break;
-    case T_ADDRESS:
+    if (bt == T_ADDRESS) {
       // Repackage the long as a pointer.
       val = ConvL2X(val);
-      val = _gvn.transform(new CastX2PNode(val));
-      break;
+      val = gvn().transform(new CastX2PNode(val));
     }
-
-    if (type == T_OBJECT) {
-      store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched);
-    } else {
-      store_to_memory(control(), adr, val, type, adr_type, mo, requires_atomic_access, unaligned, mismatched);
-    }
-  }
-
-  switch(kind) {
-    case Relaxed:
-    case Opaque:
-    case Release:
-      break;
-    case Acquire:
-    case Volatile:
-      if (!is_store) {
-        insert_mem_bar(Op_MemBarAcquire);
-      } else {
-        if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
-          insert_mem_bar(Op_MemBarVolatile);
-        }
-      }
-      break;
-    default:
-      ShouldNotReachHere();
+    access_store_at(control(), heap_base_oop, adr, adr_type, val, value_type, type, decorators);
   }
 
-  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
-
   return true;
 }
 
@@ -2662,6 +2469,18 @@
 
   if (callee()->is_static())  return false;  // caller must have the capability!
 
+  C2DecoratorSet decorators = C2_ACCESS_ON_HEAP;
+
+  switch(access_kind) {
+    case Opaque:
+    case Relaxed:  decorators |= C2_MO_RELAXED;  break;
+    case Acquire:  decorators |= C2_MO_ACQUIRE;  break;
+    case Release:  decorators |= C2_MO_RELEASE;  break;
+    case Volatile: decorators |= C2_MO_VOLATILE; break;
+    default:
+      ShouldNotReachHere();
+  }
+
 #ifndef PRODUCT
   BasicType rtype;
   {
@@ -2793,127 +2612,7 @@
 
   int alias_idx = C->get_alias_index(adr_type);
 
-  // Memory-model-wise, a LoadStore acts like a little synchronized
-  // block, so needs barriers on each side.  These don't translate
-  // into actual barriers on most machines, but we still need rest of
-  // compiler to respect ordering.
-
-  switch (access_kind) {
-    case Relaxed:
-    case Acquire:
-      break;
-    case Release:
-      insert_mem_bar(Op_MemBarRelease);
-      break;
-    case Volatile:
-      if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-        insert_mem_bar(Op_MemBarVolatile);
-      } else {
-        insert_mem_bar(Op_MemBarRelease);
-      }
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-  insert_mem_bar(Op_MemBarCPUOrder);
-
-  // Figure out the memory ordering.
-  MemNode::MemOrd mo = access_kind_to_memord(access_kind);
-
-  // 4984716: MemBars must be inserted before this
-  //          memory node in order to avoid a false
-  //          dependency which will confuse the scheduler.
-  Node *mem = memory(alias_idx);
-
-  // For now, we handle only those cases that actually exist: ints,
-  // longs, and Object. Adding others should be straightforward.
-  Node* load_store = NULL;
-  switch(type) {
-  case T_BYTE:
-    switch(kind) {
-      case LS_get_add:
-        load_store = _gvn.transform(new GetAndAddBNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_get_set:
-        load_store = _gvn.transform(new GetAndSetBNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_cmp_swap_weak:
-        load_store = _gvn.transform(new WeakCompareAndSwapBNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_swap:
-        load_store = _gvn.transform(new CompareAndSwapBNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_exchange:
-        load_store = _gvn.transform(new CompareAndExchangeBNode(control(), mem, adr, newval, oldval, adr_type, mo));
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-    break;
-  case T_SHORT:
-    switch(kind) {
-      case LS_get_add:
-        load_store = _gvn.transform(new GetAndAddSNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_get_set:
-        load_store = _gvn.transform(new GetAndSetSNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_cmp_swap_weak:
-        load_store = _gvn.transform(new WeakCompareAndSwapSNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_swap:
-        load_store = _gvn.transform(new CompareAndSwapSNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_exchange:
-        load_store = _gvn.transform(new CompareAndExchangeSNode(control(), mem, adr, newval, oldval, adr_type, mo));
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-    break;
-  case T_INT:
-    switch(kind) {
-      case LS_get_add:
-        load_store = _gvn.transform(new GetAndAddINode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_get_set:
-        load_store = _gvn.transform(new GetAndSetINode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_cmp_swap_weak:
-        load_store = _gvn.transform(new WeakCompareAndSwapINode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_swap:
-        load_store = _gvn.transform(new CompareAndSwapINode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_exchange:
-        load_store = _gvn.transform(new CompareAndExchangeINode(control(), mem, adr, newval, oldval, adr_type, mo));
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-    break;
-  case T_LONG:
-    switch(kind) {
-      case LS_get_add:
-        load_store = _gvn.transform(new GetAndAddLNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_get_set:
-        load_store = _gvn.transform(new GetAndSetLNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_cmp_swap_weak:
-        load_store = _gvn.transform(new WeakCompareAndSwapLNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_swap:
-        load_store = _gvn.transform(new CompareAndSwapLNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_exchange:
-        load_store = _gvn.transform(new CompareAndExchangeLNode(control(), mem, adr, newval, oldval, adr_type, mo));
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-    break;
-  case T_OBJECT:
+  if (type == T_OBJECT) {
     // Transformation of a value which could be NULL pointer (CastPP #NULL)
     // could be delayed during Parse (for example, in adjust_map_after_if()).
     // Execute transformation here to avoid barrier generation in such case.
@@ -2921,157 +2620,37 @@
       newval = _gvn.makecon(TypePtr::NULL_PTR);
 
     // Reference stores need a store barrier.
-    switch(kind) {
-      case LS_get_set: {
-        // If pre-barrier must execute before the oop store, old value will require do_load here.
-        if (!can_move_pre_barrier()) {
-          pre_barrier(true /* do_load*/,
-                      control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
-                      NULL /* pre_val*/,
-                      T_OBJECT);
-        } // Else move pre_barrier to use load_store value, see below.
-        break;
-      }
-      case LS_cmp_swap_weak:
-      case LS_cmp_swap:
-      case LS_cmp_exchange: {
-        // Same as for newval above:
-        if (_gvn.type(oldval) == TypePtr::NULL_PTR) {
-          oldval = _gvn.makecon(TypePtr::NULL_PTR);
-        }
-        // The only known value which might get overwritten is oldval.
-        pre_barrier(false /* do_load */,
-                    control(), NULL, NULL, max_juint, NULL, NULL,
-                    oldval /* pre_val */,
-                    T_OBJECT);
-        break;
-      }
-      default:
-        ShouldNotReachHere();
-    }
-
-#ifdef _LP64
-    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
-      Node *newval_enc = _gvn.transform(new EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
-
-      switch(kind) {
-        case LS_get_set:
-          load_store = _gvn.transform(new GetAndSetNNode(control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
-          break;
-        case LS_cmp_swap_weak: {
-          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
-          load_store = _gvn.transform(new WeakCompareAndSwapNNode(control(), mem, adr, newval_enc, oldval_enc, mo));
-          break;
-        }
-        case LS_cmp_swap: {
-          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
-          load_store = _gvn.transform(new CompareAndSwapNNode(control(), mem, adr, newval_enc, oldval_enc, mo));
-          break;
-        }
-        case LS_cmp_exchange: {
-          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
-          load_store = _gvn.transform(new CompareAndExchangeNNode(control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo));
-          break;
-        }
-        default:
-          ShouldNotReachHere();
-      }
-    } else
-#endif
-    switch (kind) {
-      case LS_get_set:
-        load_store = _gvn.transform(new GetAndSetPNode(control(), mem, adr, newval, adr_type, value_type->is_oopptr()));
-        break;
-      case LS_cmp_swap_weak:
-        load_store = _gvn.transform(new WeakCompareAndSwapPNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_swap:
-        load_store = _gvn.transform(new CompareAndSwapPNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_exchange:
-        load_store = _gvn.transform(new CompareAndExchangePNode(control(), mem, adr, newval, oldval, adr_type, value_type->is_oopptr(), mo));
-        break;
-      default:
-        ShouldNotReachHere();
+    if (kind == LS_cmp_exchange && _gvn.type(oldval) == TypePtr::NULL_PTR) {
+      oldval = _gvn.makecon(TypePtr::NULL_PTR);
     }
-
-    // Emit the post barrier only when the actual store happened. This makes sense
-    // to check only for LS_cmp_* that can fail to set the value.
-    // LS_cmp_exchange does not produce any branches by default, so there is no
-    // boolean result to piggyback on. TODO: When we merge CompareAndSwap with
-    // CompareAndExchange and move branches here, it would make sense to conditionalize
-    // post_barriers for LS_cmp_exchange as well.
-    //
-    // CAS success path is marked more likely since we anticipate this is a performance
-    // critical path, while CAS failure path can use the penalty for going through unlikely
-    // path as backoff. Which is still better than doing a store barrier there.
-    switch (kind) {
-      case LS_get_set:
-      case LS_cmp_exchange: {
-        post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
-        break;
-      }
-      case LS_cmp_swap_weak:
-      case LS_cmp_swap: {
-        IdealKit ideal(this);
-        ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); {
-          sync_kit(ideal);
-          post_barrier(ideal.ctrl(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
-          ideal.sync_kit(this);
-        } ideal.end_if();
-        final_sync(ideal);
-        break;
-      }
-      default:
-        ShouldNotReachHere();
-    }
-    break;
-  default:
-    fatal("unexpected type %d: %s", type, type2name(type));
-    break;
   }
 
-  // SCMemProjNodes represent the memory state of a LoadStore. Their
-  // main role is to prevent LoadStore nodes from being optimized away
-  // when their results aren't used.
-  Node* proj = _gvn.transform(new SCMemProjNode(load_store));
-  set_memory(proj, alias_idx);
-
-  if (type == T_OBJECT && (kind == LS_get_set || kind == LS_cmp_exchange)) {
-#ifdef _LP64
-    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
-      load_store = _gvn.transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
+  Node* result = NULL;
+  switch (kind) {
+    case LS_cmp_exchange: {
+      result = access_cas_val_at(control(), base, adr, adr_type, alias_idx, oldval, newval, value_type, type, decorators);
+      break;
     }
-#endif
-    if (can_move_pre_barrier() && kind == LS_get_set) {
-      // Don't need to load pre_val. The old value is returned by load_store.
-      // The pre_barrier can execute after the xchg as long as no safepoint
-      // gets inserted between them.
-      pre_barrier(false /* do_load */,
-                  control(), NULL, NULL, max_juint, NULL, NULL,
-                  load_store /* pre_val */,
-                  T_OBJECT);
+    case LS_cmp_swap_weak:
+      decorators |= C2_WEAK_CAS;
+    case LS_cmp_swap: {
+      result = access_cas_bool_at(control(), base, adr, adr_type, alias_idx, oldval, newval, value_type, type, decorators);
+      break;
     }
-  }
-
-  // Add the trailing membar surrounding the access
-  insert_mem_bar(Op_MemBarCPUOrder);
-
-  switch (access_kind) {
-    case Relaxed:
-    case Release:
-      break; // do nothing
-    case Acquire:
-    case Volatile:
-      insert_mem_bar(Op_MemBarAcquire);
-      // !support_IRIW_for_not_multiple_copy_atomic_cpu handled in platform code
+    case LS_get_set: {
+      result = access_swap_at(control(), base, adr, adr_type, alias_idx, newval, value_type, type, decorators);
+      break;
+    }
+    case LS_get_add: {
+      result = access_fetch_and_add_at(control(), base, adr, adr_type, alias_idx, newval, value_type, type, decorators);
       break;
+    }
     default:
       ShouldNotReachHere();
   }
 
-  assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
-  set_result(load_store);
+  assert(type2size[result->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
+  set_result(result);
   return true;
 }
 
@@ -3392,8 +2971,10 @@
 //---------------------------load_mirror_from_klass----------------------------
 // Given a klass oop, load its java mirror (a java.lang.Class oop).
 Node* LibraryCallKit::load_mirror_from_klass(Node* klass) {
+  C2DecoratorSet decorators = C2_ACCESS_ON_HEAP | C2_ACCESS_FREE_CONTROL;
   Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset()));
-  return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT, MemNode::unordered);
+  return access_load_at(klass, p, p->bottom_type()->is_ptr(),
+                        TypeInstPtr::MIRROR, T_OBJECT, decorators);
 }
 
 //-----------------------load_klass_from_mirror_common-------------------------
@@ -4539,7 +4120,7 @@
 
 //------------------------clone_coping-----------------------------------
 // Helper function for inline_native_clone.
-void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark) {
+void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array) {
   assert(obj_size != NULL, "");
   Node* raw_obj = alloc_obj->in(1);
   assert(alloc_obj->is_CheckCastPP() && raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
@@ -4559,66 +4140,9 @@
 
   // Copy the fastest available way.
   // TODO: generate fields copies for small objects instead.
-  Node* src  = obj;
-  Node* dest = alloc_obj;
   Node* size = _gvn.transform(obj_size);
 
-  // Exclude the header but include array length to copy by 8 bytes words.
-  // Can't use base_offset_in_bytes(bt) since basic type is unknown.
-  int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
-                            instanceOopDesc::base_offset_in_bytes();
-  // base_off:
-  // 8  - 32-bit VM
-  // 12 - 64-bit VM, compressed klass
-  // 16 - 64-bit VM, normal klass
-  if (base_off % BytesPerLong != 0) {
-    assert(UseCompressedClassPointers, "");
-    if (is_array) {
-      // Exclude length to copy by 8 bytes words.
-      base_off += sizeof(int);
-    } else {
-      // Include klass to copy by 8 bytes words.
-      base_off = instanceOopDesc::klass_offset_in_bytes();
-    }
-    assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
-  }
-  src  = basic_plus_adr(src,  base_off);
-  dest = basic_plus_adr(dest, base_off);
-
-  // Compute the length also, if needed:
-  Node* countx = size;
-  countx = _gvn.transform(new SubXNode(countx, MakeConX(base_off)));
-  countx = _gvn.transform(new URShiftXNode(countx, intcon(LogBytesPerLong) ));
-
-  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
-
-  ArrayCopyNode* ac = ArrayCopyNode::make(this, false, src, NULL, dest, NULL, countx, false, false);
-  ac->set_clonebasic();
-  Node* n = _gvn.transform(ac);
-  if (n == ac) {
-    set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
-  } else {
-    set_all_memory(n);
-  }
-
-  // If necessary, emit some card marks afterwards.  (Non-arrays only.)
-  if (card_mark) {
-    assert(!is_array, "");
-    // Put in store barrier for any and all oops we are sticking
-    // into this object.  (We could avoid this if we could prove
-    // that the object type contains no oop fields at all.)
-    Node* no_particular_value = NULL;
-    Node* no_particular_field = NULL;
-    int raw_adr_idx = Compile::AliasIdxRaw;
-    post_barrier(control(),
-                 memory(raw_adr_type),
-                 alloc_obj,
-                 no_particular_field,
-                 raw_adr_idx,
-                 no_particular_value,
-                 T_OBJECT,
-                 false);
-  }
+  access_clone(control(), obj, alloc_obj, size, is_array);
 
   // Do not let reads from the cloned object float above the arraycopy.
   if (alloc != NULL) {
@@ -4708,9 +4232,6 @@
     PhiNode*    result_mem = new PhiNode(result_reg, Type::MEMORY, TypePtr::BOTTOM);
     record_for_igvn(result_reg);
 
-    const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
-    int raw_adr_idx = Compile::AliasIdxRaw;
-
     Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
     if (array_ctl != NULL) {
       // It's an array.
@@ -4720,9 +4241,10 @@
       Node* obj_size  = NULL;
       Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size);  // no arguments to push
 
-      if (!use_ReduceInitialCardMarks()) {
+      C2BarrierSetCodeGen* code_gen = Universe::heap()->barrier_set()->c2_code_gen();
+      if (code_gen->array_copy_requires_gc_barriers(T_OBJECT)) {
         // If it is an oop array, it requires very special treatment,
-        // because card marking is required on each card of the array.
+        // because gc barriers are required when accessing the array.
         Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL);
         if (is_obja != NULL) {
           PreserveJVMState pjvms2(this);
@@ -4741,7 +4263,7 @@
           result_mem ->set_req(_objArray_path, reset_memory());
         }
       }
-      // Otherwise, there are no card marks to worry about.
+      // Otherwise, there are no barriers to worry about.
       // (We can dispense with card marks if we know the allocation
       //  comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
       //  causes the non-eden paths to take compensating steps to
@@ -4750,7 +4272,7 @@
       //  the object.)
 
       if (!stopped()) {
-        copy_to_clone(obj, alloc_obj, obj_size, true, false);
+        copy_to_clone(obj, alloc_obj, obj_size, true);
 
         // Present the results of the copy.
         result_reg->init_req(_array_path, control());
@@ -4796,7 +4318,7 @@
       // exception state between multiple Object.clone versions (reexecute=true vs reexecute=false).
       Node* alloc_obj = new_instance(obj_klass, NULL, &obj_size, /*deoptimize_on_exception=*/true);
 
-      copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks());
+      copy_to_clone(obj, alloc_obj, obj_size, false);
 
       // Present the results of the slow call.
       result_reg->init_req(_instance_path, control());
@@ -5995,21 +5517,23 @@
   Node* reference_obj = null_check_receiver();
   if (stopped()) return true;
 
+  const TypeInstPtr* tinst = _gvn.type(reference_obj)->isa_instptr();
+  assert(tinst != NULL, "obj is null");
+  assert(tinst->klass()->is_loaded(), "obj is not loaded");
+  ciInstanceKlass* referenceKlass = tinst->klass()->as_instance_klass();
+  ciField* field = referenceKlass->get_field_by_name(ciSymbol::make("referent"),
+                                                     ciSymbol::make("Ljava/lang/Object;"),
+                                                     false);
+  assert (field != NULL, "undefined field");
+
   Node* adr = basic_plus_adr(reference_obj, reference_obj, referent_offset);
+  const TypePtr* adr_type = C->alias_type(field)->adr_type();
 
   ciInstanceKlass* klass = env()->Object_klass();
   const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass);
 
-  Node* no_ctrl = NULL;
-  Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT, MemNode::unordered);
-
-  // Use the pre-barrier to record the value in the referent field
-  pre_barrier(false /* do_load */,
-              control(),
-              NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
-              result /* pre_val */,
-              T_OBJECT);
-
+  C2DecoratorSet decorators = C2_ACCESS_ON_WEAK | C2_ACCESS_ON_HEAP | C2_ACCESS_FREE_CONTROL;
+  Node* result = access_load_at(reference_obj, adr, adr_type, object_type, T_OBJECT, decorators);
   // Add memory barrier to prevent commoning reads from this field
   // across safepoint since GC can change its value.
   insert_mem_bar(Op_MemBarCPUOrder);
@@ -6062,20 +5586,13 @@
     type = Type::get_const_basic_type(bt);
   }
 
-  if (support_IRIW_for_not_multiple_copy_atomic_cpu && is_vol) {
-    insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
-  }
-  // Build the load.
-  MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered;
-  Node* loadedField = make_load(NULL, adr, type, bt, adr_type, mo, LoadNode::DependsOnlyOnTest, is_vol);
-  // If reference is volatile, prevent following memory ops from
-  // floating up past the volatile read.  Also prevents commoning
-  // another volatile read.
+  C2DecoratorSet decorators = C2_ACCESS_ON_HEAP | C2_ACCESS_FREE_CONTROL;
+
   if (is_vol) {
-    // Memory barrier includes bogus read of value to force load BEFORE membar
-    insert_mem_bar(Op_MemBarAcquire, loadedField);
+    decorators |= C2_MO_VOLATILE;
   }
-  return loadedField;
+
+  return access_load_at(fromObj, adr, adr_type, type, bt, decorators);
 }
 
 Node * LibraryCallKit::field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,