--- old/src/hotspot/share/gc/shared/collectedHeap.cpp 2017-10-31 13:44:20.135672433 -0700 +++ new/src/hotspot/share/gc/shared/collectedHeap.cpp 2017-10-31 13:44:19.807673744 -0700 @@ -291,13 +291,13 @@ void CollectedHeap::sample_allocation(Thread* thread, HeapWord* obj, - size_t size, size_t fix_sample_rate) { + size_t size, size_t overflowed_words) { // Object is allocated, sample it now. HeapMonitoring::object_alloc_do_sample(thread, reinterpret_cast(obj), size * HeapWordSize); // Pick a next sample in this case, we allocated right. - thread->tlab().pick_next_sample(fix_sample_rate); + thread->tlab().pick_next_sample(overflowed_words); } HeapWord* CollectedHeap::allocate_sampled_object(Thread* thread, size_t size) { @@ -309,7 +309,7 @@ HeapWord* CollectedHeap::allocate_from_tlab_slow(Klass* klass, Thread* thread, size_t size) { // In case the tlab changes, remember if this one wanted a sample. - bool should_sample = thread->tlab().should_sample() && HeapMonitoring::enabled(); + bool should_sample = HeapMonitoring::enabled() && thread->tlab().should_sample(); HeapWord* obj = NULL; if (should_sample) { @@ -320,10 +320,10 @@ // If we did allocate in this tlab, sample it. Otherwise, we wait for the // new tlab's first allocation at the end of this method. if (obj != NULL) { - // Fix sample rate by removing the extra bytes allocated in this last + // Fix sample rate by removing the extra words allocated in this last // sample. - size_t fix_sample_rate = thread->tlab().top() - tlab_old_end; - sample_allocation(thread, obj, size, fix_sample_rate); + size_t overflowed_words = pointer_delta(thread->tlab().top(), tlab_old_end); + sample_allocation(thread, obj, size, overflowed_words); return obj; } } --- old/src/hotspot/share/gc/shared/collectedHeap.hpp 2017-10-31 13:44:21.019668900 -0700 +++ new/src/hotspot/share/gc/shared/collectedHeap.hpp 2017-10-31 13:44:20.679670260 -0700 @@ -146,8 +146,10 @@ static HeapWord* allocate_from_tlab_slow(Klass* klass, Thread* thread, size_t size); // Sample the allocation via HeapMonitoring. + // overflowed_words represents the number of HeapWords that went passed the + // sampling boundary. This is used to fix the next sampling rate. static void sample_allocation(Thread* thread, HeapWord* obj, size_t size, - size_t fix_sample_rate = 0); + size_t overflowed_words = 0); // Try to allocate the object we want to sample in this tlab, returns NULL if // fails to allocate. static HeapWord* allocate_sampled_object(Thread* thread, size_t size); --- old/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp 2017-10-31 13:44:21.871665496 -0700 +++ new/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp 2017-10-31 13:44:21.539666823 -0700 @@ -128,7 +128,7 @@ } assert(!(retire || ZeroTLAB) || (start() == NULL && end() == NULL && top() == NULL && - actual_end() == NULL && slow_path_end() == NULL), + _actual_end == NULL && _slow_path_end == NULL), "TLAB must be reset"); } @@ -179,7 +179,7 @@ // actual refill. size_t old_bytes_until_sample = 0; if (_number_of_refills > 1) { - old_bytes_until_sample = bytes_until_sample(); + old_bytes_until_sample = _bytes_until_sample; } initialize(start, top, start + new_size - alignment_reserve()); @@ -326,8 +326,8 @@ } void ThreadLocalAllocBuffer::set_sample_end() { - size_t heap_words_remaining = _end - _top; - size_t bytes_left = bytes_until_sample(); + size_t heap_words_remaining = pointer_delta(_end, _top); + size_t bytes_left = _bytes_until_sample; size_t words_until_sample = bytes_left / HeapWordSize; if (heap_words_remaining > words_until_sample) { @@ -341,29 +341,30 @@ } } -void ThreadLocalAllocBuffer::pick_next_sample(size_t diff) { +void ThreadLocalAllocBuffer::pick_next_sample(size_t overflowed_words) { if (!HeapMonitoring::enabled()) { return; } - if (bytes_until_sample() == 0) { - HeapMonitoring::pick_next_sample(bytes_until_sample_addr()); + if (_bytes_until_sample == 0) { + HeapMonitoring::pick_next_sample(&_bytes_until_sample); } - if (diff > 0) { + if (overflowed_words > 0) { // Try to correct sample size by removing extra space from last allocation. - if (bytes_until_sample() > diff * HeapWordSize) { - set_bytes_until_sample(bytes_until_sample() - diff * HeapWordSize); + if (_bytes_until_sample > overflowed_words * HeapWordSize) { + set_bytes_until_sample(_bytes_until_sample - overflowed_words * HeapWordSize); } } set_sample_end(); log_trace(gc, tlab)("TLAB picked next sample: thread: " INTPTR_FORMAT " [id: %2d]" - " start: %p top: %p end: %p actual_end: %p slow_path_end: %p", + " start: " INTPTR_FORMAT " top: " INTPTR_FORMAT " end: " INTPTR_FORMAT " actual_end:" + INTPTR_FORMAT " slow_path_end: " INTPTR_FORMAT, p2i(myThread()), myThread()->osthread()->thread_id(), - start(), top(), end(), - actual_end(), slow_path_end()); + p2i(start()), p2i(top()), p2i(end()), + p2i(_actual_end), p2i(_slow_path_end)); } Thread* ThreadLocalAllocBuffer::myThread() { @@ -390,8 +391,8 @@ } size_t size_in_bytes = size * HeapWordSize; - if (bytes_until_sample() > size_in_bytes) { - set_bytes_until_sample(bytes_until_sample() - size_in_bytes); + if (_bytes_until_sample > size_in_bytes) { + set_bytes_until_sample(_bytes_until_sample - size_in_bytes); } else { // Technically this is not exactly right, we probably should remember how many bytes are // negative probably to then reduce our next sample size. --- old/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp 2017-10-31 13:44:22.759661948 -0700 +++ new/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp 2017-10-31 13:44:22.403663369 -0700 @@ -93,6 +93,10 @@ size_t remaining() { return end() == NULL ? 0 : pointer_delta(hard_end(), top()); } + // Obtain the actual end of the TLAB. + HeapWord* hard_end(); + void set_sample_end(); + // Make parsable and release it. void reset(); @@ -130,9 +134,6 @@ HeapWord* start() const { return _start; } HeapWord* end() const { return _end; } - HeapWord* slow_path_end() const { return _slow_path_end; } - HeapWord* actual_end() const { return _actual_end; } - HeapWord* hard_end(); HeapWord* top() const { return _top; } HeapWord* pf_top() const { return _pf_top; } size_t desired_size() const { return _desired_size; } @@ -180,19 +181,15 @@ void initialize(); void pick_next_sample(size_t diff = 0); - void set_sample_end(); void set_back_actual_end(); void handle_sample(Thread* thread, HeapWord* result, size_t size); - size_t bytes_until_sample() { return _bytes_until_sample; } - size_t *bytes_until_sample_addr() { return &_bytes_until_sample; } - bool should_sample() { return bytes_until_sample() == 0; } + bool should_sample() { return _bytes_until_sample == 0; } static size_t refill_waste_limit_increment() { return TLABWasteIncrement; } // Code generation support static ByteSize start_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _start); } static ByteSize end_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _end ); } - static ByteSize actual_end_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _actual_end ); } static ByteSize top_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _top ); } static ByteSize pf_top_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _pf_top ); } static ByteSize size_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _desired_size ); } --- old/src/hotspot/share/runtime/heapMonitoring.cpp 2017-10-31 13:44:23.647658399 -0700 +++ new/src/hotspot/share/runtime/heapMonitoring.cpp 2017-10-31 13:44:23.295659805 -0700 @@ -209,7 +209,7 @@ } void initialize(int max_storage) { - MutexLocker mu(HeapMonitor_lock); + MutexLocker mu(HeapMonitorStorage_lock); free_storage(); allocate_storage(max_storage); memset(&_stats, 0, sizeof(_stats)); @@ -295,7 +295,6 @@ // Statics for Sampler double HeapMonitoring::_log_table[1 << FastLogNumBits]; bool HeapMonitoring::_enabled; -AlwaysTrueClosure HeapMonitoring::_always_true; jint HeapMonitoring::_monitoring_rate; // Cheap random number generator @@ -382,7 +381,7 @@ } void StackTraceStorage::add_trace(jvmtiStackTrace *trace, oop o) { - MutexLocker mu(HeapMonitor_lock); + MutexLocker mu(HeapMonitorStorage_lock); StackTraceData new_data(trace, o); _stats.sample_count++; _stats.stack_depth_accumulation += trace->frame_count; @@ -391,7 +390,7 @@ void StackTraceStorage::weak_oops_do(BoolObjectClosure *is_alive, OopClosure *f) { - MutexLocker mu(HeapMonitor_lock); + MutexLocker mu(HeapMonitorStorage_lock); size_t count = 0; if (initialized()) { int len = _allocated_traces->length(); @@ -476,7 +475,7 @@ void StackTraceStorage::copy_stack_traces(const StackTraceDataCopier &copier, jvmtiStackTraces *traces) { - MutexLocker mu(HeapMonitor_lock); + MutexLocker mu(HeapMonitorStorage_lock); int len = copier.size(); // Create a new array to store the StackTraceData objects. @@ -566,6 +565,7 @@ void HeapMonitoring::initialize_profiling(jint monitoring_rate, jint max_gc_storage) { + MutexLocker mu(HeapMonitor_lock); // Ignore if already enabled. if (_enabled) { return; @@ -592,6 +592,7 @@ } void HeapMonitoring::stop_profiling() { + MutexLocker mu(HeapMonitor_lock); _enabled = false; } @@ -613,10 +614,10 @@ // 5194297183973780480 bytes. In this case, // for sample_parameter = 1<<19, max possible step is // 9448372 bytes (24 bits). - const uint64_t prng_mod_power = 48; // Number of bits in prng + const uint64_t PrngModPower = 48; // Number of bits in prng // The uint32_t cast is to prevent a (hard-to-reproduce) NAN // under piii debug for some binaries. - double q = static_cast(_rnd >> (prng_mod_power - 26)) + 1.0; + double q = static_cast(_rnd >> (PrngModPower - 26)) + 1.0; // Put the computed p-value through the CDF of a geometric. // For faster performance (save ~1/20th exec time), replace // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705) --- old/src/hotspot/share/runtime/heapMonitoring.hpp 2017-10-31 13:44:24.563654738 -0700 +++ new/src/hotspot/share/runtime/heapMonitoring.hpp 2017-10-31 13:44:24.195656209 -0700 @@ -40,7 +40,6 @@ static const int FastLogNumBits = 10; static const int FastLogMask = (1 << FastLogNumBits) - 1; static double _log_table[1<(0)) << prng_mod_power); + const uint64_t prng_mod_mask = right_n_bits(prng_mod_power); return (PrngMult * rnd + prng_add) & prng_mod_mask; } @@ -96,6 +94,7 @@ // but which no longer have other references in the heap. static void weak_oops_do(BoolObjectClosure* is_alive, OopClosure *f); static void weak_oops_do(OopClosure* oop_closure) { + AlwaysTrueClosure _always_true; weak_oops_do(&_always_true, oop_closure); } --- old/src/hotspot/share/runtime/mutexLocker.cpp 2017-10-31 13:44:25.459651157 -0700 +++ new/src/hotspot/share/runtime/mutexLocker.cpp 2017-10-31 13:44:25.099652597 -0700 @@ -125,6 +125,7 @@ Monitor* RedefineClasses_lock = NULL; Monitor* HeapMonitor_lock = NULL; +Monitor* HeapMonitorStorage_lock = NULL; #if INCLUDE_TRACE Mutex* JfrStacktrace_lock = NULL; @@ -276,7 +277,8 @@ def(PeriodicTask_lock , PaddedMonitor, nonleaf+5, true, Monitor::_safepoint_check_sometimes); def(RedefineClasses_lock , PaddedMonitor, nonleaf+5, true, Monitor::_safepoint_check_always); - def(HeapMonitor_lock , PaddedMonitor, leaf, true, Monitor::_safepoint_check_always); + def(HeapMonitorStorage_lock , PaddedMonitor, nonleaf, true, Monitor::_safepoint_check_always); + def(HeapMonitor_lock , PaddedMonitor, nonleaf+1, true, Monitor::_safepoint_check_always); if (WhiteBoxAPI) { def(Compilation_lock , PaddedMonitor, leaf, false, Monitor::_safepoint_check_never); --- old/src/hotspot/share/runtime/mutexLocker.hpp 2017-10-31 13:44:26.307647769 -0700 +++ new/src/hotspot/share/runtime/mutexLocker.hpp 2017-10-31 13:44:25.959649160 -0700 @@ -123,7 +123,8 @@ extern Monitor* PeriodicTask_lock; // protects the periodic task structure extern Monitor* RedefineClasses_lock; // locks classes from parallel redefinition -extern Monitor* HeapMonitor_lock; // protects internal storage in HeapMonitoring +extern Monitor* HeapMonitor_lock; // protects HeapMonitor initialize and stop calls +extern Monitor* HeapMonitorStorage_lock; // protects internal storage in HeapMonitoring #if INCLUDE_TRACE extern Mutex* JfrStacktrace_lock; // used to guard access to the JFR stacktrace table --- old/src/hotspot/share/runtime/thread.hpp 2017-10-31 13:44:27.179644284 -0700 +++ new/src/hotspot/share/runtime/thread.hpp 2017-10-31 13:44:26.827645690 -0700 @@ -622,7 +622,6 @@ TLAB_FIELD_OFFSET(start) TLAB_FIELD_OFFSET(end) - TLAB_FIELD_OFFSET(actual_end) TLAB_FIELD_OFFSET(top) TLAB_FIELD_OFFSET(pf_top) TLAB_FIELD_OFFSET(size) // desired_size