# HG changeset patch # User eosterlund # Date 1431448445 -3600 # Tue May 12 17:34:05 2015 +0100 # Node ID a2088c7efd104af99d6895e0bd868e791e93b852 # Parent 610dc20a7bc8add7f85ee78f069928282ae41e3a CMS fence eliding using fancy synchronization when using UseCondCardMark diff --git a/src/os/aix/vm/os_aix.cpp b/src/os/aix/vm/os_aix.cpp --- a/src/os/aix/vm/os_aix.cpp +++ b/src/os/aix/vm/os_aix.cpp @@ -3683,6 +3683,12 @@ return JNI_OK; } +void *os::alloc_memory_serialize_page() { + address mem_serialize_page = (address) ::mmap(NULL, Aix::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + guarantee(mem_serialize_page != NULL, "mmap Failed for memory serialize page"); + return mem_serialize_page; +} + // Mark the polling page as unreadable void os::make_polling_page_unreadable(void) { if (!guard_memory((char*)_polling_page, Aix::page_size())) { diff --git a/src/os/bsd/vm/os_bsd.cpp b/src/os/bsd/vm/os_bsd.cpp --- a/src/os/bsd/vm/os_bsd.cpp +++ b/src/os/bsd/vm/os_bsd.cpp @@ -3632,6 +3632,12 @@ return JNI_OK; } +void *os::alloc_memory_serialize_page() { + address mem_serialize_page = (address) ::mmap(NULL, Bsd::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + guarantee(mem_serialize_page != MAP_FAILED, "mmap Failed for memory serialize page"); + return mem_serialize_page; +} + // Mark the polling page as unreadable void os::make_polling_page_unreadable(void) { if (!guard_memory((char*)_polling_page, Bsd::page_size())) { diff --git a/src/os/linux/vm/os_linux.cpp b/src/os/linux/vm/os_linux.cpp --- a/src/os/linux/vm/os_linux.cpp +++ b/src/os/linux/vm/os_linux.cpp @@ -4859,6 +4859,12 @@ return JNI_OK; } +void *os::alloc_memory_serialize_page() { + address mem_serialize_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + guarantee(mem_serialize_page != MAP_FAILED, "mmap Failed for memory serialize page"); + return mem_serialize_page; +} + // Mark the polling page as unreadable void os::make_polling_page_unreadable(void) { if (!guard_memory((char*)_polling_page, Linux::page_size())) { diff --git a/src/os/solaris/vm/os_solaris.cpp b/src/os/solaris/vm/os_solaris.cpp --- a/src/os/solaris/vm/os_solaris.cpp +++ b/src/os/solaris/vm/os_solaris.cpp @@ -4770,6 +4770,12 @@ return JNI_OK; } +void *os::alloc_memory_serialize_page() { + address mem_serialize_page = (address)Solaris::mmap_chunk(NULL, page_size, MAP_PRIVATE, PROT_READ | PROT_WRITE); + guarantee(mem_serialize_page != NULL, "mmap Failed for memory serialize page"); + return mem_serialize_page; +} + // Mark the polling page as unreadable void os::make_polling_page_unreadable(void) { if (mprotect((char *)_polling_page, page_size, PROT_NONE) != 0) { diff --git a/src/os/windows/vm/os_windows.cpp b/src/os/windows/vm/os_windows.cpp --- a/src/os/windows/vm/os_windows.cpp +++ b/src/os/windows/vm/os_windows.cpp @@ -4163,6 +4163,15 @@ return JNI_OK; } +void *os::alloc_memory_serialize_page() { + address mem_serialize_page = (address)VirtualAlloc(NULL, os::vm_page_size(), MEM_RESERVE, PAGE_READWRITE); + guarantee(mem_serialize_page != NULL, "Reserve Failed for memory serialize page"); + + address return_page = (address)VirtualAlloc(mem_serialize_page, os::vm_page_size(), MEM_COMMIT, PAGE_READWRITE); + guarantee(return_page != NULL, "Commit Failed for memory serialize page"); + return mem_serialize_page; +} + // Mark the polling page as unreadable void os::make_polling_page_unreadable(void) { DWORD old_status; diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @@ -4098,6 +4098,13 @@ // We'll scan the cards in the dirty region (with periodic // yields for foreground GC as needed). if (!dirtyRegion.is_empty()) { + if (UseCondCardMark) { + // This global fence serializes all stale memory accesses in the process + // allowing fence elision in the mutator write barrier. + // It protects against reading stale references after writing the precleaned card value. + OrderAccess::global_fence(); + } + assert(numDirtyCards > 0, "consistency check"); HeapWord* stop_point = NULL; stopTimer(); @@ -4189,6 +4196,13 @@ dirtyRegion.word_size()/CardTableModRefBS::card_size_in_words; if (!dirtyRegion.is_empty()) { + if (UseCondCardMark) { + // This global fence serializes all stale memory accesses in the process + // allowing fence elision in the mutator write barrier. + // It protects against reading stale references after writing the precleaned card value. + OrderAccess::global_fence(); + } + stopTimer(); CMSTokenSyncWithLocks ts(true, gen->freelistLock(), bitMapLock()); startTimer(); diff --git a/src/share/vm/runtime/orderAccess.cpp b/src/share/vm/runtime/orderAccess.cpp --- a/src/share/vm/runtime/orderAccess.cpp +++ b/src/share/vm/runtime/orderAccess.cpp @@ -38,3 +38,13 @@ } assert(Threads::number_of_threads() == 0, "for bootstrap only"); } + +void OrderAccess::general_global_fence() { + Thread *thread = Thread::current(); + volatile int *serialization_page = (volatile int*)thread->get_serialization_page(); + *serialization_page = 0; + os::protect_memory((char *)serialization_page, + os::vm_page_size(), os::MEM_PROT_READ); + os::protect_memory((char *)serialization_page, + os::vm_page_size(), os::MEM_PROT_RW); +} diff --git a/src/share/vm/runtime/orderAccess.hpp b/src/share/vm/runtime/orderAccess.hpp --- a/src/share/vm/runtime/orderAccess.hpp +++ b/src/share/vm/runtime/orderAccess.hpp @@ -264,6 +264,9 @@ static void release(); static void fence(); + // flushes all pending memory_accesses even on remote CPUs + static void global_fence(); + static jbyte load_acquire(volatile jbyte* p); static jshort load_acquire(volatile jshort* p); static jint load_acquire(volatile jint* p); @@ -318,6 +321,10 @@ template static void specialized_release_store (volatile T* p, T v); template static void specialized_release_store_fence(volatile T* p, T v); + template + static void specialized_global_fence(); + static void general_global_fence(); + template static void ordered_store(volatile FieldType* p, FieldType v); diff --git a/src/share/vm/runtime/orderAccess.inline.hpp b/src/share/vm/runtime/orderAccess.inline.hpp --- a/src/share/vm/runtime/orderAccess.inline.hpp +++ b/src/share/vm/runtime/orderAccess.inline.hpp @@ -138,6 +138,8 @@ inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { specialized_release_store_fence(p, v); } inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { specialized_release_store_fence((volatile intptr_t*)p, (intptr_t)v); } +inline void OrderAccess::global_fence() { specialized_global_fence(); } + // The following methods can be specialized using simple template specialization // in the platform specific files for optimization purposes. Otherwise the // generalized variant is used. @@ -145,6 +147,12 @@ template inline void OrderAccess::specialized_release_store (volatile T* p, T v) { ordered_store(p, v); } template inline void OrderAccess::specialized_release_store_fence(volatile T* p, T v) { ordered_store(p, v); } +template +void OrderAccess::specialized_global_fence() { + // no specialization: call general variant + general_global_fence(); +} + // Generalized atomic volatile accesses valid in OrderAccess // All other types can be expressed in terms of these. inline void OrderAccess::store(volatile jbyte* p, jbyte v) { *p = v; } diff --git a/src/share/vm/runtime/os.hpp b/src/share/vm/runtime/os.hpp --- a/src/share/vm/runtime/os.hpp +++ b/src/share/vm/runtime/os.hpp @@ -425,6 +425,8 @@ static void block_on_serialize_page_trap(); + static void *alloc_memory_serialize_page(); + // threads enum ThreadType { diff --git a/src/share/vm/runtime/thread.cpp b/src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp +++ b/src/share/vm/runtime/thread.cpp @@ -271,6 +271,7 @@ "bug in forced alignment of thread objects"); } #endif // ASSERT + _serialization_page = NULL; } // Non-inlined version to be used where thread.inline.hpp shouldn't be included. @@ -368,6 +369,13 @@ CHECK_UNHANDLED_OOPS_ONLY(if (CheckUnhandledOops) delete unhandled_oops();) } +void *Thread::get_serialization_page() { + if (_serialization_page) return _serialization_page; + + _serialization_page = os::alloc_memory_serialize_page(); + return _serialization_page; +} + // NOTE: dummy function for assertion purpose. void Thread::run() { ShouldNotReachHere(); diff --git a/src/share/vm/runtime/thread.hpp b/src/share/vm/runtime/thread.hpp --- a/src/share/vm/runtime/thread.hpp +++ b/src/share/vm/runtime/thread.hpp @@ -635,6 +635,11 @@ static void muxAcquire(volatile intptr_t * Lock, const char * Name); static void muxAcquireW(volatile intptr_t * Lock, ParkEvent * ev); static void muxRelease(volatile intptr_t * Lock); + + private: + void *_serialization_page; + public: + void *get_serialization_page(); }; // Inline implementation of Thread::current()