# HG changeset patch # User goetz # Date 1385976374 -3600 # Node ID 86af4160ce9fdae2bba8649d09c86835ebcd84b3 # Parent 41b780b43b7485b23d8ea7e906f412955935cb87 8029396: PPC64 (part 212): Several memory ordering fixes in C-code. diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp @@ -997,6 +997,13 @@ if (FreeChunk::indicatesFreeChunk(p)) { volatile FreeChunk* fc = (volatile FreeChunk*)p; size_t res = fc->size(); + + // Bugfix for systems with weak memory model (PPC64/IA64). The + // block's free bit was set and we have read the size of the + // block. Acquire and check the free bit again. If the block is + // still free, the read size is correct. + OrderAccess::acquire(); + // If the object is still a free chunk, return the size, else it // has been allocated so try again. if (FreeChunk::indicatesFreeChunk(p)) { @@ -1010,6 +1017,12 @@ assert(k->is_klass(), "Should really be klass oop."); oop o = (oop)p; assert(o->is_oop(true /* ignore mark word */), "Should be an oop."); + + // Bugfix for systems with weak memory model (PPC64/IA64). + // The object o may be an array. Acquire to make sure that the array + // size (third word) is consistent. + OrderAccess::acquire(); + size_t res = o->size_given_klass(k); res = adjustObjectSize(res); assert(res != 0, "Block size should not be 0"); @@ -1040,6 +1053,13 @@ if (FreeChunk::indicatesFreeChunk(p)) { volatile FreeChunk* fc = (volatile FreeChunk*)p; size_t res = fc->size(); + + // Bugfix for systems with weak memory model (PPC64/IA64). The + // free bit of the block was set and we have read the size of + // the block. Acquire and check the free bit again. If the + // block is still free, the read size is correct. + OrderAccess::acquire(); + if (FreeChunk::indicatesFreeChunk(p)) { assert(res != 0, "Block size should not be 0"); assert(loops == 0, "Should be 0"); @@ -1055,6 +1075,12 @@ assert(k->is_klass(), "Should really be klass oop."); oop o = (oop)p; assert(o->is_oop(), "Should be an oop"); + + // Bugfix for systems with weak memory model (PPC64/IA64). + // The object o may be an array. Acquire to make sure that the array + // size (third word) is consistent. + OrderAccess::acquire(); + size_t res = o->size_given_klass(k); res = adjustObjectSize(res); assert(res != 0, "Block size should not be 0"); diff --git a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp --- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp +++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp @@ -115,7 +115,8 @@ void G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field, - oop new_val) { + oop new_val, + bool release) { volatile jbyte* byte = byte_for(field); if (*byte == g1_young_gen) { return; diff --git a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp --- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp +++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp @@ -151,7 +151,7 @@ G1SATBCardTableModRefBS::is_a(bsn); } - void write_ref_field_work(void* field, oop new_val); + void write_ref_field_work(void* field, oop new_val, bool release = false); // Can be called from static contexts. static void write_ref_field_static(void* field, oop new_val); diff --git a/src/share/vm/memory/barrierSet.hpp b/src/share/vm/memory/barrierSet.hpp --- a/src/share/vm/memory/barrierSet.hpp +++ b/src/share/vm/memory/barrierSet.hpp @@ -100,9 +100,9 @@ public: // ...then the post-write version. - inline void write_ref_field(void* field, oop new_val); + inline void write_ref_field(void* field, oop new_val, bool release = false); protected: - virtual void write_ref_field_work(void* field, oop new_val) = 0; + virtual void write_ref_field_work(void* field, oop new_val, bool release = false) = 0; public: // Invoke the barrier, if any, necessary when writing the "bytes"-byte diff --git a/src/share/vm/memory/barrierSet.inline.hpp b/src/share/vm/memory/barrierSet.inline.hpp --- a/src/share/vm/memory/barrierSet.inline.hpp +++ b/src/share/vm/memory/barrierSet.inline.hpp @@ -40,11 +40,11 @@ } } -void BarrierSet::write_ref_field(void* field, oop new_val) { +void BarrierSet::write_ref_field(void* field, oop new_val, bool release) { if (kind() == CardTableModRef) { - ((CardTableModRefBS*)this)->inline_write_ref_field(field, new_val); + ((CardTableModRefBS*)this)->inline_write_ref_field(field, new_val, release); } else { - write_ref_field_work(field, new_val); + write_ref_field_work(field, new_val, release); } } diff --git a/src/share/vm/memory/cardTableModRefBS.cpp b/src/share/vm/memory/cardTableModRefBS.cpp --- a/src/share/vm/memory/cardTableModRefBS.cpp +++ b/src/share/vm/memory/cardTableModRefBS.cpp @@ -419,8 +419,8 @@ // Note that these versions are precise! The scanning code has to handle the // fact that the write barrier may be either precise or imprecise. -void CardTableModRefBS::write_ref_field_work(void* field, oop newVal) { - inline_write_ref_field(field, newVal); +void CardTableModRefBS::write_ref_field_work(void* field, oop newVal, bool release) { + inline_write_ref_field(field, newVal, release); } diff --git a/src/share/vm/memory/cardTableModRefBS.hpp b/src/share/vm/memory/cardTableModRefBS.hpp --- a/src/share/vm/memory/cardTableModRefBS.hpp +++ b/src/share/vm/memory/cardTableModRefBS.hpp @@ -292,7 +292,7 @@ // these functions here for performance. protected: void write_ref_field_work(oop obj, size_t offset, oop newVal); - virtual void write_ref_field_work(void* field, oop newVal); + virtual void write_ref_field_work(void* field, oop newVal, bool release = false); public: bool has_write_ref_array_opt() { return true; } @@ -324,9 +324,14 @@ template inline void inline_write_ref_field_pre(T* field, oop newVal) {} - template inline void inline_write_ref_field(T* field, oop newVal) { + template inline void inline_write_ref_field(T* field, oop newVal, bool release) { jbyte* byte = byte_for((void*)field); - *byte = dirty_card; + if (release) { + // Perform a releasing store if requested. + OrderAccess::release_store((volatile jbyte*) byte, dirty_card); + } else { + *byte = dirty_card; + } } // These are used by G1, when it uses the card table as a temporary data diff --git a/src/share/vm/memory/modRefBarrierSet.hpp b/src/share/vm/memory/modRefBarrierSet.hpp --- a/src/share/vm/memory/modRefBarrierSet.hpp +++ b/src/share/vm/memory/modRefBarrierSet.hpp @@ -60,7 +60,7 @@ void read_ref_field(void* field) {} void read_prim_field(HeapWord* field, size_t bytes) {} protected: - virtual void write_ref_field_work(void* field, oop new_val) = 0; + virtual void write_ref_field_work(void* field, oop new_val, bool release = false) = 0; public: void write_prim_field(HeapWord* field, size_t bytes, juint val1, juint val2) {} diff --git a/src/share/vm/oops/cpCache.cpp b/src/share/vm/oops/cpCache.cpp --- a/src/share/vm/oops/cpCache.cpp +++ b/src/share/vm/oops/cpCache.cpp @@ -363,7 +363,7 @@ // Decode the action of set_method and set_interface_call Bytecodes::Code invoke_code = bytecode_1(); if (invoke_code != (Bytecodes::Code)0) { - Metadata* f1 = (Metadata*)_f1; + Metadata* f1 = f1_ord(); if (f1 != NULL) { switch (invoke_code) { case Bytecodes::_invokeinterface: diff --git a/src/share/vm/oops/cpCache.hpp b/src/share/vm/oops/cpCache.hpp --- a/src/share/vm/oops/cpCache.hpp +++ b/src/share/vm/oops/cpCache.hpp @@ -138,7 +138,7 @@ void set_bytecode_1(Bytecodes::Code code); void set_bytecode_2(Bytecodes::Code code); - void set_f1(Metadata* f1) { + void set_f1(Metadata* f1) { Metadata* existing_f1 = (Metadata*)_f1; // read once assert(existing_f1 == NULL || existing_f1 == f1, "illegal field change"); _f1 = f1; @@ -325,14 +325,21 @@ // Accessors int indices() const { return _indices; } + int indices_ord() const { return (intx)OrderAccess::load_ptr_acquire(&_indices); } int constant_pool_index() const { return (indices() & cp_index_mask); } - Bytecodes::Code bytecode_1() const { return Bytecodes::cast((indices() >> bytecode_1_shift) & bytecode_1_mask); } - Bytecodes::Code bytecode_2() const { return Bytecodes::cast((indices() >> bytecode_2_shift) & bytecode_2_mask); } - Method* f1_as_method() const { Metadata* f1 = (Metadata*)_f1; assert(f1 == NULL || f1->is_method(), ""); return (Method*)f1; } - Klass* f1_as_klass() const { Metadata* f1 = (Metadata*)_f1; assert(f1 == NULL || f1->is_klass(), ""); return (Klass*)f1; } - bool is_f1_null() const { Metadata* f1 = (Metadata*)_f1; return f1 == NULL; } // classifies a CPC entry as unbound + Bytecodes::Code bytecode_1() const { return Bytecodes::cast((indices_ord() >> bytecode_1_shift) & bytecode_1_mask); } + Bytecodes::Code bytecode_2() const { return Bytecodes::cast((indices_ord() >> bytecode_2_shift) & bytecode_2_mask); } + Metadata* f1_ord() const { return (Metadata *)OrderAccess::load_ptr_acquire(&_f1); } + Method* f1_as_method() const { Metadata* f1 = f1_ord(); assert(f1 == NULL || f1->is_method(), ""); return (Method*)f1; } + Klass* f1_as_klass() const { Metadata* f1 = f1_ord(); assert(f1 == NULL || f1->is_klass(), ""); return (Klass*)f1; } + // Use the accessor f1() to acquire _f1's value. This is needed for + // example in BytecodeInterpreter::run(), where is_f1_null() is + // called to check if an invokedynamic call is resolved. This load + // of _f1 must be ordered with the loads performed by + // cache->main_entry_index(). + bool is_f1_null() const { Metadata* f1 = f1_ord(); return f1 == NULL; } // classifies a CPC entry as unbound int f2_as_index() const { assert(!is_vfinal(), ""); return (int) _f2; } - Method* f2_as_vfinal_method() const { assert(is_vfinal(), ""); return (Method*)_f2; } + Method* f2_as_vfinal_method() const { assert(is_vfinal(), ""); return (Method*)_f2; } int field_index() const { assert(is_field_entry(), ""); return (_flags & field_index_mask); } int parameter_size() const { assert(is_method_entry(), ""); return (_flags & parameter_size_mask); } bool is_volatile() const { return (_flags & (1 << is_volatile_shift)) != 0; } diff --git a/src/share/vm/oops/instanceKlass.cpp b/src/share/vm/oops/instanceKlass.cpp --- a/src/share/vm/oops/instanceKlass.cpp +++ b/src/share/vm/oops/instanceKlass.cpp @@ -1191,7 +1191,11 @@ MutexLocker x(OopMapCacheAlloc_lock); // First time use. Allocate a cache in C heap if (_oop_map_cache == NULL) { - _oop_map_cache = new OopMapCache(); + // Release stores from OopMapCache constructor before assignment + // to _oop_map_cache. C++ compilers on ppc do not emit the + // required memory barrier only because of the volatile + // qualifier of _oop_map_cache. + OrderAccess::release_store_ptr(&_oop_map_cache, new OopMapCache()); } } // _oop_map_cache is constant after init; lookup below does is own locking. diff --git a/src/share/vm/oops/method.hpp b/src/share/vm/oops/method.hpp --- a/src/share/vm/oops/method.hpp +++ b/src/share/vm/oops/method.hpp @@ -350,16 +350,21 @@ } void set_method_data(MethodData* data) { - _method_data = data; + // The store into method must be released. On platforms without + // total store order (TSO) the reference may become visible before + // the initialization of data otherwise. + OrderAccess::release_store_ptr((volatile void *)&_method_data, data); } MethodCounters* method_counters() const { return _method_counters; } - void set_method_counters(MethodCounters* counters) { - _method_counters = counters; + // The store into method must be released. On platforms without + // total store order (TSO) the reference may become visible before + // the initialization of data otherwise. + OrderAccess::release_store_ptr((volatile void *)&_method_counters, counters); } #ifdef TIERED diff --git a/src/share/vm/oops/oop.inline.hpp b/src/share/vm/oops/oop.inline.hpp --- a/src/share/vm/oops/oop.inline.hpp +++ b/src/share/vm/oops/oop.inline.hpp @@ -490,9 +490,9 @@ return size_given_klass(klass()); } -inline void update_barrier_set(void* p, oop v) { +inline void update_barrier_set(void* p, oop v, bool release = false) { assert(oopDesc::bs() != NULL, "Uninitialized bs in oop!"); - oopDesc::bs()->write_ref_field(p, v); + oopDesc::bs()->write_ref_field(p, v, release); } template inline void update_barrier_set_pre(T* p, oop v) { @@ -505,7 +505,10 @@ } else { update_barrier_set_pre(p, v); oopDesc::encode_store_heap_oop(p, v); - update_barrier_set((void*)p, v); // cast away type + // always_do_update_barrier == false => + // Either we are at a safepoint (in GC) or CMS is not used. In both + // cases it's unnecessary to mark the card as dirty with release sematics. + update_barrier_set((void*)p, v, false /* release */); // cast away type } } @@ -513,7 +516,12 @@ update_barrier_set_pre((T*)p, v); // cast away volatile // Used by release_obj_field_put, so use release_store_ptr. oopDesc::release_encode_store_heap_oop(p, v); - update_barrier_set((void*)p, v); // cast away type + // When using CMS we must mark the card corresponding to p as dirty + // with release sematics to prevent that CMS sees the dirty card but + // not the new value v at p due to reordering of the two + // stores. Note that CMS has a concurrent precleaning phase, where + // it reads the card table while the Java threads are running. + update_barrier_set((void*)p, v, true /* release */); // cast away type } // Should replace *addr = oop assignments where addr type depends on UseCompressedOops diff --git a/src/share/vm/runtime/biasedLocking.cpp b/src/share/vm/runtime/biasedLocking.cpp --- a/src/share/vm/runtime/biasedLocking.cpp +++ b/src/share/vm/runtime/biasedLocking.cpp @@ -233,8 +233,10 @@ // Fix up highest lock to contain displaced header and point // object at it highest_lock->set_displaced_header(unbiased_prototype); - // Reset object header to point to displaced mark - obj->set_mark(markOopDesc::encode(highest_lock)); + // Reset object header to point to displaced mark. + // Must release storing the lock address for platforms without TSO + // ordering (e.g. ppc). + obj->release_set_mark(markOopDesc::encode(highest_lock)); assert(!obj->mark()->has_bias_pattern(), "illegal mark state: stack lock used bias bit"); if (TraceBiasedLocking && (Verbose || !is_bulk)) { tty->print_cr(" Revoked bias of currently-locked object"); diff --git a/src/share/vm/runtime/sweeper.cpp b/src/share/vm/runtime/sweeper.cpp --- a/src/share/vm/runtime/sweeper.cpp +++ b/src/share/vm/runtime/sweeper.cpp @@ -297,7 +297,8 @@ _bytes_changed = 0; } } - _sweep_started = 0; + // Release work, because another compiler thread could continue. + OrderAccess::release_store((int*)&_sweep_started, 0); } } diff --git a/src/share/vm/runtime/thread.hpp b/src/share/vm/runtime/thread.hpp --- a/src/share/vm/runtime/thread.hpp +++ b/src/share/vm/runtime/thread.hpp @@ -1044,8 +1044,14 @@ address last_Java_pc(void) { return _anchor.last_Java_pc(); } // Safepoint support - JavaThreadState thread_state() const { return _thread_state; } - void set_thread_state(JavaThreadState s) { _thread_state=s; } + // Use membars when accessing volatile _thread_state. See + // Threads::create_vm() for size checks. + JavaThreadState thread_state() const { + return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state); + } + void set_thread_state(JavaThreadState s) { + OrderAccess::release_store((volatile jint*)&_thread_state, (jint)s); + } ThreadSafepointState *safepoint_state() const { return _safepoint_state; } void set_safepoint_state(ThreadSafepointState *state) { _safepoint_state = state; } bool is_at_poll_safepoint() { return _safepoint_state->is_at_poll_safepoint(); }