# HG changeset patch
# User goetz
# Date 1385976374 -3600
# Node ID 86af4160ce9fdae2bba8649d09c86835ebcd84b3
# Parent  41b780b43b7485b23d8ea7e906f412955935cb87
8029396: PPC64 (part 212): Several memory ordering fixes in C-code.

diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@@ -997,6 +997,13 @@
     if (FreeChunk::indicatesFreeChunk(p)) {
       volatile FreeChunk* fc = (volatile FreeChunk*)p;
       size_t res = fc->size();
+
+      // Bugfix for systems with weak memory model (PPC64/IA64). The
+      // block's free bit was set and we have read the size of the
+      // block. Acquire and check the free bit again. If the block is
+      // still free, the read size is correct.
+      OrderAccess::acquire();
+
       // If the object is still a free chunk, return the size, else it
       // has been allocated so try again.
       if (FreeChunk::indicatesFreeChunk(p)) {
@@ -1010,6 +1017,12 @@
         assert(k->is_klass(), "Should really be klass oop.");
         oop o = (oop)p;
         assert(o->is_oop(true /* ignore mark word */), "Should be an oop.");
+
+        // Bugfix for systems with weak memory model (PPC64/IA64).
+        // The object o may be an array. Acquire to make sure that the array
+        // size (third word) is consistent.
+        OrderAccess::acquire();
+
         size_t res = o->size_given_klass(k);
         res = adjustObjectSize(res);
         assert(res != 0, "Block size should not be 0");
@@ -1040,6 +1053,13 @@
     if (FreeChunk::indicatesFreeChunk(p)) {
       volatile FreeChunk* fc = (volatile FreeChunk*)p;
       size_t res = fc->size();
+
+      // Bugfix for systems with weak memory model (PPC64/IA64). The
+      // free bit of the block was set and we have read the size of
+      // the block. Acquire and check the free bit again. If the
+      // block is still free, the read size is correct.
+      OrderAccess::acquire();
+
       if (FreeChunk::indicatesFreeChunk(p)) {
         assert(res != 0, "Block size should not be 0");
         assert(loops == 0, "Should be 0");
@@ -1055,6 +1075,12 @@
         assert(k->is_klass(), "Should really be klass oop.");
         oop o = (oop)p;
         assert(o->is_oop(), "Should be an oop");
+
+        // Bugfix for systems with weak memory model (PPC64/IA64).
+        // The object o may be an array. Acquire to make sure that the array
+        // size (third word) is consistent.
+        OrderAccess::acquire();
+
         size_t res = o->size_given_klass(k);
         res = adjustObjectSize(res);
         assert(res != 0, "Block size should not be 0");
diff --git a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp
@@ -115,7 +115,8 @@
 
 void
 G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field,
-                                                     oop new_val) {
+                                                     oop new_val,
+                                                     bool release) {
   volatile jbyte* byte = byte_for(field);
   if (*byte == g1_young_gen) {
     return;
diff --git a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp
@@ -151,7 +151,7 @@
       G1SATBCardTableModRefBS::is_a(bsn);
   }
 
-  void write_ref_field_work(void* field, oop new_val);
+  void write_ref_field_work(void* field, oop new_val, bool release = false);
 
   // Can be called from static contexts.
   static void write_ref_field_static(void* field, oop new_val);
diff --git a/src/share/vm/memory/barrierSet.hpp b/src/share/vm/memory/barrierSet.hpp
--- a/src/share/vm/memory/barrierSet.hpp
+++ b/src/share/vm/memory/barrierSet.hpp
@@ -100,9 +100,9 @@
 public:
 
   // ...then the post-write version.
-  inline void write_ref_field(void* field, oop new_val);
+  inline void write_ref_field(void* field, oop new_val, bool release = false);
 protected:
-  virtual void write_ref_field_work(void* field, oop new_val) = 0;
+  virtual void write_ref_field_work(void* field, oop new_val, bool release = false) = 0;
 public:
 
   // Invoke the barrier, if any, necessary when writing the "bytes"-byte
diff --git a/src/share/vm/memory/barrierSet.inline.hpp b/src/share/vm/memory/barrierSet.inline.hpp
--- a/src/share/vm/memory/barrierSet.inline.hpp
+++ b/src/share/vm/memory/barrierSet.inline.hpp
@@ -40,11 +40,11 @@
   }
 }
 
-void BarrierSet::write_ref_field(void* field, oop new_val) {
+void BarrierSet::write_ref_field(void* field, oop new_val, bool release) {
   if (kind() == CardTableModRef) {
-    ((CardTableModRefBS*)this)->inline_write_ref_field(field, new_val);
+    ((CardTableModRefBS*)this)->inline_write_ref_field(field, new_val, release);
   } else {
-    write_ref_field_work(field, new_val);
+    write_ref_field_work(field, new_val, release);
   }
 }
 
diff --git a/src/share/vm/memory/cardTableModRefBS.cpp b/src/share/vm/memory/cardTableModRefBS.cpp
--- a/src/share/vm/memory/cardTableModRefBS.cpp
+++ b/src/share/vm/memory/cardTableModRefBS.cpp
@@ -419,8 +419,8 @@
 // Note that these versions are precise!  The scanning code has to handle the
 // fact that the write barrier may be either precise or imprecise.
 
-void CardTableModRefBS::write_ref_field_work(void* field, oop newVal) {
-  inline_write_ref_field(field, newVal);
+void CardTableModRefBS::write_ref_field_work(void* field, oop newVal, bool release) {
+  inline_write_ref_field(field, newVal, release);
 }
 
 
diff --git a/src/share/vm/memory/cardTableModRefBS.hpp b/src/share/vm/memory/cardTableModRefBS.hpp
--- a/src/share/vm/memory/cardTableModRefBS.hpp
+++ b/src/share/vm/memory/cardTableModRefBS.hpp
@@ -292,7 +292,7 @@
   // these functions here for performance.
 protected:
   void write_ref_field_work(oop obj, size_t offset, oop newVal);
-  virtual void write_ref_field_work(void* field, oop newVal);
+  virtual void write_ref_field_work(void* field, oop newVal, bool release = false);
 public:
 
   bool has_write_ref_array_opt() { return true; }
@@ -324,9 +324,14 @@
 
   template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {}
 
-  template <class T> inline void inline_write_ref_field(T* field, oop newVal) {
+  template <class T> inline void inline_write_ref_field(T* field, oop newVal, bool release) {
     jbyte* byte = byte_for((void*)field);
-    *byte = dirty_card;
+    if (release) {
+      // Perform a releasing store if requested.
+      OrderAccess::release_store((volatile jbyte*) byte, dirty_card);
+    } else {
+      *byte = dirty_card;
+    }
   }
 
   // These are used by G1, when it uses the card table as a temporary data
diff --git a/src/share/vm/memory/modRefBarrierSet.hpp b/src/share/vm/memory/modRefBarrierSet.hpp
--- a/src/share/vm/memory/modRefBarrierSet.hpp
+++ b/src/share/vm/memory/modRefBarrierSet.hpp
@@ -60,7 +60,7 @@
   void read_ref_field(void* field) {}
   void read_prim_field(HeapWord* field, size_t bytes) {}
 protected:
-  virtual void write_ref_field_work(void* field, oop new_val) = 0;
+  virtual void write_ref_field_work(void* field, oop new_val, bool release = false) = 0;
 public:
   void write_prim_field(HeapWord* field, size_t bytes,
                         juint val1, juint val2) {}
diff --git a/src/share/vm/oops/cpCache.cpp b/src/share/vm/oops/cpCache.cpp
--- a/src/share/vm/oops/cpCache.cpp
+++ b/src/share/vm/oops/cpCache.cpp
@@ -363,7 +363,7 @@
   // Decode the action of set_method and set_interface_call
   Bytecodes::Code invoke_code = bytecode_1();
   if (invoke_code != (Bytecodes::Code)0) {
-    Metadata* f1 = (Metadata*)_f1;
+    Metadata* f1 = f1_ord();
     if (f1 != NULL) {
       switch (invoke_code) {
       case Bytecodes::_invokeinterface:
diff --git a/src/share/vm/oops/cpCache.hpp b/src/share/vm/oops/cpCache.hpp
--- a/src/share/vm/oops/cpCache.hpp
+++ b/src/share/vm/oops/cpCache.hpp
@@ -138,7 +138,7 @@
 
   void set_bytecode_1(Bytecodes::Code code);
   void set_bytecode_2(Bytecodes::Code code);
-  void set_f1(Metadata* f1)                            {
+  void set_f1(Metadata* f1) {
     Metadata* existing_f1 = (Metadata*)_f1; // read once
     assert(existing_f1 == NULL || existing_f1 == f1, "illegal field change");
     _f1 = f1;
@@ -325,14 +325,21 @@
 
   // Accessors
   int indices() const                            { return _indices; }
+  int indices_ord() const                        { return (intx)OrderAccess::load_ptr_acquire(&_indices); }
   int constant_pool_index() const                { return (indices() & cp_index_mask); }
-  Bytecodes::Code bytecode_1() const             { return Bytecodes::cast((indices() >> bytecode_1_shift) & bytecode_1_mask); }
-  Bytecodes::Code bytecode_2() const             { return Bytecodes::cast((indices() >> bytecode_2_shift) & bytecode_2_mask); }
-  Method* f1_as_method() const                   { Metadata* f1 = (Metadata*)_f1; assert(f1 == NULL || f1->is_method(), ""); return (Method*)f1; }
-  Klass*    f1_as_klass() const                  { Metadata* f1 = (Metadata*)_f1; assert(f1 == NULL || f1->is_klass(), ""); return (Klass*)f1; }
-  bool      is_f1_null() const                   { Metadata* f1 = (Metadata*)_f1; return f1 == NULL; }  // classifies a CPC entry as unbound
+  Bytecodes::Code bytecode_1() const             { return Bytecodes::cast((indices_ord() >> bytecode_1_shift) & bytecode_1_mask); }
+  Bytecodes::Code bytecode_2() const             { return Bytecodes::cast((indices_ord() >> bytecode_2_shift) & bytecode_2_mask); }
+  Metadata* f1_ord() const                       { return (Metadata *)OrderAccess::load_ptr_acquire(&_f1); }
+  Method*   f1_as_method() const                 { Metadata* f1 = f1_ord(); assert(f1 == NULL || f1->is_method(), ""); return (Method*)f1; }
+  Klass*    f1_as_klass() const                  { Metadata* f1 = f1_ord(); assert(f1 == NULL || f1->is_klass(), ""); return (Klass*)f1; }
+  // Use the accessor f1() to acquire _f1's value. This is needed for
+  // example in BytecodeInterpreter::run(), where is_f1_null() is
+  // called to check if an invokedynamic call is resolved. This load
+  // of _f1 must be ordered with the loads performed by
+  // cache->main_entry_index().
+  bool      is_f1_null() const                   { Metadata* f1 = f1_ord(); return f1 == NULL; }  // classifies a CPC entry as unbound
   int       f2_as_index() const                  { assert(!is_vfinal(), ""); return (int) _f2; }
-  Method* f2_as_vfinal_method() const            { assert(is_vfinal(), ""); return (Method*)_f2; }
+  Method*   f2_as_vfinal_method() const          { assert(is_vfinal(), ""); return (Method*)_f2; }
   int  field_index() const                       { assert(is_field_entry(),  ""); return (_flags & field_index_mask); }
   int  parameter_size() const                    { assert(is_method_entry(), ""); return (_flags & parameter_size_mask); }
   bool is_volatile() const                       { return (_flags & (1 << is_volatile_shift))       != 0; }
diff --git a/src/share/vm/oops/instanceKlass.cpp b/src/share/vm/oops/instanceKlass.cpp
--- a/src/share/vm/oops/instanceKlass.cpp
+++ b/src/share/vm/oops/instanceKlass.cpp
@@ -1191,7 +1191,11 @@
     MutexLocker x(OopMapCacheAlloc_lock);
     // First time use. Allocate a cache in C heap
     if (_oop_map_cache == NULL) {
-      _oop_map_cache = new OopMapCache();
+      // Release stores from OopMapCache constructor before assignment
+      // to _oop_map_cache. C++ compilers on ppc do not emit the
+      // required memory barrier only because of the volatile
+      // qualifier of _oop_map_cache.
+      OrderAccess::release_store_ptr(&_oop_map_cache, new OopMapCache());
     }
   }
   // _oop_map_cache is constant after init; lookup below does is own locking.
diff --git a/src/share/vm/oops/method.hpp b/src/share/vm/oops/method.hpp
--- a/src/share/vm/oops/method.hpp
+++ b/src/share/vm/oops/method.hpp
@@ -350,16 +350,21 @@
   }
 
   void set_method_data(MethodData* data)       {
-    _method_data = data;
+    // The store into method must be released. On platforms without
+    // total store order (TSO) the reference may become visible before
+    // the initialization of data otherwise.
+    OrderAccess::release_store_ptr((volatile void *)&_method_data, data);
   }
 
   MethodCounters* method_counters() const {
     return _method_counters;
   }
 
-
   void set_method_counters(MethodCounters* counters) {
-    _method_counters = counters;
+    // The store into method must be released. On platforms without
+    // total store order (TSO) the reference may become visible before
+    // the initialization of data otherwise.
+    OrderAccess::release_store_ptr((volatile void *)&_method_counters, counters);
   }
 
 #ifdef TIERED
diff --git a/src/share/vm/oops/oop.inline.hpp b/src/share/vm/oops/oop.inline.hpp
--- a/src/share/vm/oops/oop.inline.hpp
+++ b/src/share/vm/oops/oop.inline.hpp
@@ -490,9 +490,9 @@
   return size_given_klass(klass());
 }
 
-inline void update_barrier_set(void* p, oop v) {
+inline void update_barrier_set(void* p, oop v, bool release = false) {
   assert(oopDesc::bs() != NULL, "Uninitialized bs in oop!");
-  oopDesc::bs()->write_ref_field(p, v);
+  oopDesc::bs()->write_ref_field(p, v, release);
 }
 
 template <class T> inline void update_barrier_set_pre(T* p, oop v) {
@@ -505,7 +505,10 @@
   } else {
     update_barrier_set_pre(p, v);
     oopDesc::encode_store_heap_oop(p, v);
-    update_barrier_set((void*)p, v);  // cast away type
+    // always_do_update_barrier == false =>
+    // Either we are at a safepoint (in GC) or CMS is not used. In both
+    // cases it's unnecessary to mark the card as dirty with release sematics.
+    update_barrier_set((void*)p, v, false /* release */);  // cast away type
   }
 }
 
@@ -513,7 +516,12 @@
   update_barrier_set_pre((T*)p, v);   // cast away volatile
   // Used by release_obj_field_put, so use release_store_ptr.
   oopDesc::release_encode_store_heap_oop(p, v);
-  update_barrier_set((void*)p, v);    // cast away type
+  // When using CMS we must mark the card corresponding to p as dirty
+  // with release sematics to prevent that CMS sees the dirty card but
+  // not the new value v at p due to reordering of the two
+  // stores. Note that CMS has a concurrent precleaning phase, where
+  // it reads the card table while the Java threads are running.
+  update_barrier_set((void*)p, v, true /* release */);    // cast away type
 }
 
 // Should replace *addr = oop assignments where addr type depends on UseCompressedOops
diff --git a/src/share/vm/runtime/biasedLocking.cpp b/src/share/vm/runtime/biasedLocking.cpp
--- a/src/share/vm/runtime/biasedLocking.cpp
+++ b/src/share/vm/runtime/biasedLocking.cpp
@@ -233,8 +233,10 @@
     // Fix up highest lock to contain displaced header and point
     // object at it
     highest_lock->set_displaced_header(unbiased_prototype);
-    // Reset object header to point to displaced mark
-    obj->set_mark(markOopDesc::encode(highest_lock));
+    // Reset object header to point to displaced mark.
+    // Must release storing the lock address for platforms without TSO
+    // ordering (e.g. ppc).
+    obj->release_set_mark(markOopDesc::encode(highest_lock));
     assert(!obj->mark()->has_bias_pattern(), "illegal mark state: stack lock used bias bit");
     if (TraceBiasedLocking && (Verbose || !is_bulk)) {
       tty->print_cr("  Revoked bias of currently-locked object");
diff --git a/src/share/vm/runtime/sweeper.cpp b/src/share/vm/runtime/sweeper.cpp
--- a/src/share/vm/runtime/sweeper.cpp
+++ b/src/share/vm/runtime/sweeper.cpp
@@ -297,7 +297,8 @@
         _bytes_changed = 0;
       }
     }
-    _sweep_started = 0;
+    // Release work, because another compiler thread could continue.
+    OrderAccess::release_store((int*)&_sweep_started, 0);
   }
 }
 
diff --git a/src/share/vm/runtime/thread.hpp b/src/share/vm/runtime/thread.hpp
--- a/src/share/vm/runtime/thread.hpp
+++ b/src/share/vm/runtime/thread.hpp
@@ -1044,8 +1044,14 @@
   address last_Java_pc(void)                         { return _anchor.last_Java_pc(); }
 
   // Safepoint support
-  JavaThreadState thread_state() const           { return _thread_state; }
-  void set_thread_state(JavaThreadState s)       { _thread_state=s;      }
+  // Use membars when accessing volatile _thread_state. See
+  // Threads::create_vm() for size checks.
+  JavaThreadState thread_state() const           {
+    return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
+  }
+  void set_thread_state(JavaThreadState s)       {
+    OrderAccess::release_store((volatile jint*)&_thread_state, (jint)s);
+  }
   ThreadSafepointState *safepoint_state() const  { return _safepoint_state;  }
   void set_safepoint_state(ThreadSafepointState *state) { _safepoint_state = state; }
   bool is_at_poll_safepoint()                    { return _safepoint_state->is_at_poll_safepoint(); }