hotspot Cdiff src/share/vm/runtime/orderAccess.hpp

src/share/vm/runtime/orderAccess.hpp


*** 27,41 ****
  
  #include "memory/allocation.hpp"
  
  //                Memory Access Ordering Model
  //
! // This interface is based on the JSR-133 Cookbook for Compiler Writers
! // and on the IA64 memory model.  It is the dynamic equivalent of the
! // C/C++ volatile specifier.  I.e., volatility restricts compile-time
! // memory access reordering in a way similar to what we want to occur
! // at runtime.
  //
  // In the following, the terms 'previous', 'subsequent', 'before',
  // 'after', 'preceding' and 'succeeding' refer to program order.  The
  // terms 'down' and 'below' refer to forward load or store motion
  // relative to program order, while 'up' and 'above' refer to backward
--- 27,37 ----
  
  #include "memory/allocation.hpp"
  
  //                Memory Access Ordering Model
  //
! // This interface is based on the JSR-133 Cookbook for Compiler Writers.
  //
  // In the following, the terms 'previous', 'subsequent', 'before',
  // 'after', 'preceding' and 'succeeding' refer to program order.  The
  // terms 'down' and 'below' refer to forward load or store motion
  // relative to program order, while 'up' and 'above' refer to backward
*** 67,184 ****
  //
  // Ensures that Store1 completes before Load2 and any subsequent load
  // operations.  Stores before Store1 may *not* float below Load2 and any
  // subsequent load operations.
  //
  //
! // We define two further operations, 'release' and 'acquire'.  They are
! // mirror images of each other.
  //
- // Execution by a processor of release makes the effect of all memory
- // accesses issued by it previous to the release visible to all
- // processors *before* the release completes.  The effect of subsequent
- // memory accesses issued by it *may* be made visible *before* the
- // release.  I.e., subsequent memory accesses may float above the
- // release, but prior ones may not float below it.
- //
- // Execution by a processor of acquire makes the effect of all memory
- // accesses issued by it subsequent to the acquire visible to all
- // processors *after* the acquire completes.  The effect of prior memory
- // accesses issued by it *may* be made visible *after* the acquire.
- // I.e., prior memory accesses may float below the acquire, but
- // subsequent ones may not float above it.
- //
- // Finally, we define a 'fence' operation, which conceptually is a
- // release combined with an acquire.  In the real world these operations
- // require one or more machine instructions which can float above and
- // below the release or acquire, so we usually can't just issue the
- // release-acquire back-to-back.  All machines we know of implement some
- // sort of memory fence instruction.
- //
- //
- // The standalone implementations of release and acquire need an associated
- // dummy volatile store or load respectively.  To avoid redundant operations,
- // we can define the composite operators: 'release_store', 'store_fence' and
- // 'load_acquire'.  Here's a summary of the machine instructions corresponding
- // to each operation.
- //
- //               sparc RMO             ia64             x86
- // ---------------------------------------------------------------------
- // fence         membar #LoadStore |   mf               lock addl 0,(sp)
- //                      #StoreStore |
- //                      #LoadLoad |
- //                      #StoreLoad
- //
- // release       membar #LoadStore |   st.rel [sp]=r0   movl $0,<dummy>
- //                      #StoreStore
- //               st %g0,[]
- //
- // acquire       ld [%sp],%g0          ld.acq <r>=[sp]  movl (sp),<r>
- //               membar #LoadLoad |
- //                      #LoadStore
- //
- // release_store membar #LoadStore |   st.rel           <store>
- //                      #StoreStore
- //               st
- //
- // store_fence   st                    st               lock xchg
- //               fence                 mf
- //
- // load_acquire  ld                    ld.acq           <load>
- //               membar #LoadLoad |
- //                      #LoadStore
- //
- // Using only release_store and load_acquire, we can implement the
- // following ordered sequences.
- //
- // 1. load, load   == load_acquire,  load
- //                 or load_acquire,  load_acquire
- // 2. load, store  == load,          release_store
- //                 or load_acquire,  store
- //                 or load_acquire,  release_store
- // 3. store, store == store,         release_store
- //                 or release_store, release_store
  //
! // These require no membar instructions for sparc-TSO and no extra
! // instructions for ia64.
  //
! // Ordering a load relative to preceding stores requires a store_fence,
  // which implies a membar #StoreLoad between the store and load under
! // sparc-TSO.  A fence is required by ia64.  On x86, we use locked xchg.
! //
! // 4. store, load  == store_fence, load
! //
! // Use store_fence to make sure all stores done in an 'interesting'
! // region are made visible prior to both subsequent loads and stores.
  //
  // Conventional usage is to issue a load_acquire for ordered loads.  Use
  // release_store for ordered stores when you care only that prior stores
  // are visible before the release_store, but don't care exactly when the
  // store associated with the release_store becomes visible.  Use
  // release_store_fence to update values like the thread state, where we
  // don't want the current thread to continue until all our prior memory
  // accesses (including the new thread state) are visible to other threads.
! //
! //
! //                C++ Volatility
! //
! // C++ guarantees ordering at operations termed 'sequence points' (defined
! // to be volatile accesses and calls to library I/O functions).  'Side
! // effects' (defined as volatile accesses, calls to library I/O functions
! // and object modification) previous to a sequence point must be visible
! // at that sequence point.  See the C++ standard, section 1.9, titled
! // "Program Execution".  This means that all barrier implementations,
! // including standalone loadload, storestore, loadstore, storeload, acquire
! // and release must include a sequence point, usually via a volatile memory
! // access.  Other ways to guarantee a sequence point are, e.g., use of
! // indirect calls and linux's __asm__ volatile.
! // Note: as of 6973570, we have replaced the originally static "dummy" field
! // (see above) by a volatile store to the stack. All of the versions of the
! // compilers that we currently use (SunStudio, gcc and VC++) respect the
! // semantics of volatile here. If you build HotSpot using other
! // compilers, you may need to verify that no compiler reordering occurs
! // across the sequence point represented by the volatile access.
  //
  //
  //                os::is_MP Considered Redundant
  //
  // Callers of this interface do not need to test os::is_MP() before
--- 63,163 ----
  //
  // Ensures that Store1 completes before Load2 and any subsequent load
  // operations.  Stores before Store1 may *not* float below Load2 and any
  // subsequent load operations.
  //
+ // We define two further barriers: acquire and release.
  //
! // Conceptually, acquire/release semantics form unidirectional and
! // asynchronous barriers w.r.t. a synchronizing load(X) and store(X) pair.
! // They should always be used in pairs to publish (release store) and
! // access (load acquire) some implicitly understood shared data between
! // threads in a relatively cheap fashion not requiring storeload. If not
! // used in such a pair, it is adviced to use a membar instead:
! // acquire/release only make sense as pairs.
! //
! // T1: access_shared_data
! // T1: ]release
! // T1: (...)
! // T1: store(X)
! //
! // T2: load(X)
! // T2: (...)
! // T2: acquire[
! // T2: access_shared_data
! //
! // It is guaranteed that if T2: load(X) synchronizes with (observes the
! // value written by) T1: store(X), then the memory accesses before the T1:
! // ]release happen before the memory accesses after the T2: acquire[.
! //
! // Total Store Order (TSO) machines can be seen as machines issuing a
! // release store for each store and a load acquire for each load. Therefore
! // there is an inherent resemblence between TSO and acquire/release
! // semantics. TSO can be seen as an abstract machine where loads are
! // executed immediately when encountered (hence loadload reordering not
! // happening) but enqueues stores in a FIFO queue
! // for asynchronous serialization (neither storestore or loadstore
! // reordering happening). The only reordering happening is storeload due to
! // the queue asynchronously serializing stores (yet in order).
! //
! // Acquire/release semantics essentially exploits this asynchronicity: when
! // the load(X) acquire[ observes the store of ]release store(X), the
! // accesses before the release must have happened before the accesses after
! // acquire.
! //
! // The API offers both stand-alone acquire() and release() as well as joined
! // load_acquire() and release_store(). It is guaranteed that these are
! // semantically equivalent w.r.t. the defined model. However, since
! // stand-alone acquire()/release() does not know which previous
! // load/subsequent store is considered the synchronizing load/store, they
! // may be more conservative in implementations. We advice using the joined
! // variants whenever possible.
! //
! // Finally, we define a "fence" operation, as a bidirectional barrier.
! // It guarantees that any memory access preceding the fence is not
! // reordered w.r.t. any memory accesses subsequent to the fence in program
! // order. This may be used to prevent sequences of loads from floating up
! // above sequences of stores.
! //
! // The following table shows the implementations on some architectures:
! //
! //                       Constraint     x86          sparc              ppc
! // ---------------------------------------------------------------------------
! // fence                 LoadStore  |   lock         membar #StoreLoad  sync
! //                       StoreStore |   addl 0,(sp)
! //                       LoadLoad   |
! //                       StoreLoad
! //
! // release               LoadStore  |                                   lwsync
! //                       StoreStore
! //
! // acquire               LoadLoad   |                                   lwsync
! //                       LoadStore
! //
! // release_store                        <store>      <store>            lwsync
! //                                                                      <store>
! //
! // release_store_fence                  xchg         <store>            lwsync
! //                                                   membar #StoreLoad  <store>
! //                                                                      sync
  //
  //
! // load_acquire                         <load>       <load>             <load>
! //                                                                      lwsync
  //
! // Ordering a load relative to preceding stores requires a StoreLoad,
  // which implies a membar #StoreLoad between the store and load under
! // sparc-TSO. On x86, we use explicitly locked add.
  //
  // Conventional usage is to issue a load_acquire for ordered loads.  Use
  // release_store for ordered stores when you care only that prior stores
  // are visible before the release_store, but don't care exactly when the
  // store associated with the release_store becomes visible.  Use
  // release_store_fence to update values like the thread state, where we
  // don't want the current thread to continue until all our prior memory
  // accesses (including the new thread state) are visible to other threads.
! // This is equivalent to the volatile semantics of the Java Memory Model.
  //
  //
  //                os::is_MP Considered Redundant
  //
  // Callers of this interface do not need to test os::is_MP() before
*** 238,249 ****
--- 217,254 ----
  // and friends' constructors do a fence, a lock and an acquire *in that
  // order*.  And that their destructors do a release and unlock, in *that*
  // order.  If their implementations change such that these assumptions
  // are violated, a whole lot of code will break.
  
+ enum ScopedFenceType {
+     X_ACQUIRE
+   , RELEASE_X
+   , RELEASE_X_FENCE
+ };
+ 
+ template <ScopedFenceType T>
+ class ScopedFenceGeneral: public StackObj {
+  public:
+   void prefix() {}
+   void postfix() {}
+ };
+ 
+ template <ScopedFenceType T>
+ class ScopedFence : public ScopedFenceGeneral<T> {
+   void *const _field;
+  public:
+   ScopedFence(void *const field) : _field(field) { prefix(); }
+   ~ScopedFence() { postfix(); }
+   void prefix() { ScopedFenceGeneral<T>::prefix(); }
+   void postfix() { ScopedFenceGeneral<T>::postfix(); }
+ };
+ 
+ // This class implements some fences for different platforms and specializes
+ // the methods of its superclass using template specialization for improved performance.
  class OrderAccess : AllStatic {
   public:
+   // barriers
    static void     loadload();
    static void     storestore();
    static void     loadstore();
    static void     storeload();
  
*** 278,301 ****
    static void     release_store(volatile jdouble* p, jdouble v);
  
    static void     release_store_ptr(volatile intptr_t* p, intptr_t v);
    static void     release_store_ptr(volatile void*     p, void*    v);
  
-   static void     store_fence(jbyte*   p, jbyte   v);
-   static void     store_fence(jshort*  p, jshort  v);
-   static void     store_fence(jint*    p, jint    v);
-   static void     store_fence(jlong*   p, jlong   v);
-   static void     store_fence(jubyte*  p, jubyte  v);
-   static void     store_fence(jushort* p, jushort v);
-   static void     store_fence(juint*   p, juint   v);
-   static void     store_fence(julong*  p, julong  v);
-   static void     store_fence(jfloat*  p, jfloat  v);
-   static void     store_fence(jdouble* p, jdouble v);
- 
-   static void     store_ptr_fence(intptr_t* p, intptr_t v);
-   static void     store_ptr_fence(void**    p, void*    v);
- 
    static void     release_store_fence(volatile jbyte*   p, jbyte   v);
    static void     release_store_fence(volatile jshort*  p, jshort  v);
    static void     release_store_fence(volatile jint*    p, jint    v);
    static void     release_store_fence(volatile jlong*   p, jlong   v);
    static void     release_store_fence(volatile jubyte*  p, jubyte  v);
--- 283,292 ----
*** 311,318 ****
--- 302,350 ----
   private:
    // This is a helper that invokes the StubRoutines::fence_entry()
    // routine if it exists, It should only be used by platforms that
    // don't have another way to do the inline assembly.
    static void StubRoutines_fence();
+ 
+   // Give platforms a varation point to specialize.
+   template<typename T> static T    specialized_load_acquire       (volatile T* p     );
+   template<typename T> static void specialized_release_store      (volatile T* p, T v);
+   template<typename T> static void specialized_release_store_fence(volatile T* p, T v);
+ 
+   template<typename FieldType, ScopedFenceType FenceType>
+   static void ordered_store(volatile FieldType* p, FieldType v);
+ 
+   template<typename FieldType, ScopedFenceType FenceType>
+   static FieldType ordered_load(volatile FieldType* p);
+ 
+   static void    store(volatile jbyte*   p, jbyte   v);
+   static void    store(volatile jshort*  p, jshort  v);
+   static void    store(volatile jint*    p, jint    v);
+   static void    store(volatile jlong*   p, jlong   v);
+   static void    store(volatile jdouble* p, jdouble v);
+   static void    store(volatile jfloat*  p, jfloat  v);
+ 
+   static jbyte   load (volatile jbyte*   p);
+   static jshort  load (volatile jshort*  p);
+   static jint    load (volatile jint*    p);
+   static jlong   load (volatile jlong*   p);
+   static jdouble load (volatile jdouble* p);
+   static jfloat  load (volatile jfloat*  p);
+ 
+   // The following store_fence methods are deprecated and will be removed
+   // when all repos conform to the new generalized OrderAccess.
+   static void    store_fence(jbyte*   p, jbyte   v);
+   static void    store_fence(jshort*  p, jshort  v);
+   static void    store_fence(jint*    p, jint    v);
+   static void    store_fence(jlong*   p, jlong   v);
+   static void    store_fence(jubyte*  p, jubyte  v);
+   static void    store_fence(jushort* p, jushort v);
+   static void    store_fence(juint*   p, juint   v);
+   static void    store_fence(julong*  p, julong  v);
+   static void    store_fence(jfloat*  p, jfloat  v);
+   static void    store_fence(jdouble* p, jdouble v);
+ 
+   static void    store_ptr_fence(intptr_t* p, intptr_t v);
+   static void    store_ptr_fence(void**    p, void*    v);
  };
  
  #endif // SHARE_VM_RUNTIME_ORDERACCESS_HPP
< prev index next >