--- /dev/null 2017-05-01 09:42:45.355096588 -0700 +++ new/src/share/vm/runtime/heapMonitoring.cpp 2017-05-22 08:52:15.207034323 -0700 @@ -0,0 +1,748 @@ +/* + * Copyright (c) 2017, Google and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "prims/forte.hpp" +#include "runtime/heapMonitoring.hpp" + +// TODO: add capability to get stacktraces in forte.cpp + +const int kMaxStackDepth = 64; + +// Internal data structure representing traces. +struct StackTraceData : CHeapObj { + ASGCT_CallTrace *trace; + intx byte_size; + jlong thread_id; + oop obj; + int references; + + StackTraceData(ASGCT_CallTrace *t, intx size, jlong tid, oop o) : trace(t), + byte_size(size), thread_id(tid), obj(o), references(0) {} + + StackTraceData() : trace(NULL), byte_size(0), thread_id(0), obj(NULL), + references(0) {} + + // StackTraceDatas are shared around the board between various lists. So + // handle this by hand instead of having this in the destructor. There are + // cases where the struct is on the stack but holding heap data not to be + // freed. + static void FreeData(StackTraceData *data) { + if (data->trace != NULL) { + FREE_C_HEAP_ARRAY(ASGCT_CallFrame, data->trace->frames); + FREE_C_HEAP_OBJ(data->trace); + } + delete data; + } +}; + +// RAII class that acquires / releases lock +class MuxLocker { + private: + volatile intptr_t *_lock; + const char *_name; + public: + MuxLocker(volatile intptr_t *lock, const char *name) : + _lock(lock), + _name(name) { + Thread::muxAcquire(lock, name); + } + ~MuxLocker() { + Thread::muxRelease(_lock); + } +}; + +// Fixed size buffer for holding garbage traces. +class GarbageTracesBuffer : public CHeapObj { + public: + GarbageTracesBuffer(uint32_t size) : _size(size) { + _garbage_traces = NEW_C_HEAP_ARRAY(StackTraceData*, + size, + mtInternal); + memset(_garbage_traces, 0, sizeof(StackTraceData*) * size); + } + + virtual ~GarbageTracesBuffer() { + FREE_C_HEAP_ARRAY(StackTraceData*, _garbage_traces); + } + + StackTraceData** get_traces() const { + return _garbage_traces; + } + + bool store_trace(StackTraceData *trace) { + uint32_t index; + if (!select_replacement(&index)) { + return false; + } + + StackTraceData *old_data = _garbage_traces[index]; + + if (old_data != NULL) { + old_data->references--; + + if (old_data->references == 0) { + StackTraceData::FreeData(old_data); + } + } + + trace->references++; + _garbage_traces[index] = trace; + return true; + } + + uint32_t size() const { + return _size; + } + + protected: + // Subclasses select the trace to replace. Returns false if no replacement + // is to happen, otherwise stores the index of the trace to replace in + // *index. + virtual bool select_replacement(uint32_t *index) = 0; + + const uint32_t _size; + + private: + // The current garbage traces. A fixed-size ring buffer. + StackTraceData **_garbage_traces; +}; + +// Keep statistical sample of traces over the lifetime of the server. +// When the buffer is full, replace a random entry with probability +// 1/samples_seen. This strategy tends towards preserving the most frequently +// occuring traces over time. +class FrequentGarbageTraces : public GarbageTracesBuffer { + public: + FrequentGarbageTraces(int size) + : GarbageTracesBuffer(size), + _garbage_traces_pos(0), + _samples_seen(0) { + } + + virtual ~FrequentGarbageTraces() { + } + + virtual bool select_replacement(uint32_t* index) { + ++_samples_seen; + + if (_garbage_traces_pos < _size) { + *index = _garbage_traces_pos++; + return true; + } + + uint64_t random_uint64 = + (static_cast(::random()) << 32) + | ::random(); + + uint32_t random_index = random_uint64 % _samples_seen; + if (random_index < _size) { + *index = random_index; + return true; + } + + return false; + } + + private: + // The current position in the buffer as we initially fill it. + uint32_t _garbage_traces_pos; + + uint64_t _samples_seen; +}; + +// Store most recent garbage traces. +class MostRecentGarbageTraces : public GarbageTracesBuffer { + public: + MostRecentGarbageTraces(int size) + : GarbageTracesBuffer(size), + _garbage_traces_pos(0) { + } + + virtual ~MostRecentGarbageTraces() { + } + + virtual bool select_replacement(uint32_t* index) { + *index = _garbage_traces_pos; + + _garbage_traces_pos = + (_garbage_traces_pos + 1) % _size; + + return true; + } + + private: + // The current position in the buffer. + uint32_t _garbage_traces_pos; +}; + +// Each object that we profile is stored as trace with the thread_id. +class StackTraceStorage { + public: + // The function that gets called to add a trace to the list of + // traces we are maintaining. trace is the stacktrace, and thread + // is the thread that did the allocation. + void add_trace(ASGCT_CallTrace *trace, intx byte_size, Thread *thread, oop o); + + // The function that gets called by the client to retrieve the list + // of stack traces. Passes a jvmtiStackTraces which will get mutated. + void get_all_stack_traces(jvmtiStackTraces *traces); + + // The function that gets called by the client to retrieve the list + // of stack traces. Passes a jvmtiStackTraces which will get mutated. + void get_garbage_stack_traces(jvmtiStackTraces *traces); + + // The function that gets called by the client to retrieve the list + // of stack traces. Passes a jvmtiStackTraces which will get mutated. + void get_frequent_garbage_stack_traces(jvmtiStackTraces *traces); + + // Executes whenever weak references are traversed. is_alive tells + // you if the given oop is still reachable and live. + void do_weak_oops(BoolObjectClosure* is_alive, + OopClosure *f, + VoidClosure* complete_gc); + + ~StackTraceStorage(); + StackTraceStorage(); + + // The global storage. Not a global static because + // StackTraceStorage isn't available at module-loading time. + static StackTraceStorage* storage() { + static StackTraceStorage storage; + return &storage; + } + + bool IsInitialized() { + return _initialized; + } + + // Static method to set the storage in place at initialization. + static void InitializeStackTraceStorage(int max_storage) { + StackTraceStorage *storage = StackTraceStorage::storage(); + storage->InitializeStorage(max_storage); + } + + bool initialized() { return _initialized; } + volatile bool *initialized_address() { return &_initialized; } + + private: + // Protects the traces currently sampled (below). + volatile intptr_t _allocated_traces_lock[1]; + + // The traces currently sampled. + GrowableArray *_allocated_traces; + + // Recent garbage traces. + MostRecentGarbageTraces *_recent_garbage_traces; + + // Frequent garbage traces. + FrequentGarbageTraces *_frequent_garbage_traces; + + // Maximum size of the allocation. + size_t _allocated_traces_size; + + // Maximum amount of storage provided by the JVMTI call initialize_profiling. + int _max_storage; + + volatile bool _initialized; + + // Support functions and classes for copying data to the external + // world. + class StackTraceDataCopier { + public: + virtual int size() const = 0; + virtual const StackTraceData *get(uint32_t i) const = 0; + }; + + class LiveStackTraceDataCopier : public StackTraceDataCopier { + public: + LiveStackTraceDataCopier(GrowableArray *data) : + _data(data) {} + int size() const { return _data->length(); } + const StackTraceData *get(uint32_t i) const { return _data->adr_at(i); } + + private: + GrowableArray *_data; + }; + + class GarbageStackTraceDataCopier : public StackTraceDataCopier { + public: + GarbageStackTraceDataCopier(StackTraceData **data, int size) : + _data(data), _size(size) {} + int size() const { return _size; } + const StackTraceData *get(uint32_t i) const { return _data[i]; } + + private: + StackTraceData **_data; + int _size; + }; + + // Instance initialization. + void InitializeStorage(int max_storage); + + // Copies from StackTraceData to jvmtiStackTrace. + bool deep_copy(jvmtiStackTrace *to, const StackTraceData *from); + + // Creates a deep copy of the list of StackTraceData. + void copy_stack_traces(const StackTraceDataCopier &copier, + jvmtiStackTraces *traces); + + void store_garbage_trace(const StackTraceData &trace); + + void FreeGarbage(); + +}; + +// Statics for Sampler +double HeapMonitoring::_log_table[1 << kFastlogNumBits]; + +jint HeapMonitoring::_monitoring_rate; + +// Cheap random number generator +uint64_t HeapMonitoring::_rnd; + +StackTraceStorage::StackTraceStorage() : + _allocated_traces(NULL), + _recent_garbage_traces(NULL), + _frequent_garbage_traces(NULL), + _max_storage(0), + _initialized(false) { + _allocated_traces_lock[0] = 0; +} + +void StackTraceStorage::FreeGarbage() { + StackTraceData **recent_garbage = NULL; + uint32_t recent_size = 0; + + StackTraceData **frequent_garbage = NULL; + uint32_t frequent_size = 0; + + if (_recent_garbage_traces != NULL) { + recent_garbage = _recent_garbage_traces->get_traces(); + recent_size = _recent_garbage_traces->size(); + } + + if (_frequent_garbage_traces != NULL) { + frequent_garbage = _frequent_garbage_traces->get_traces(); + frequent_size = _frequent_garbage_traces->size(); + } + + // Simple solution since this happens at exit. + // Go through the recent and remove any that only are referenced there. + for (uint32_t i = 0; i < recent_size; i++) { + StackTraceData *trace = recent_garbage[i]; + if (trace != NULL) { + trace->references--; + + if (trace->references == 0) { + StackTraceData::FreeData(trace); + } + } + } + + // Then go through the frequent and remove those that are now only there. + for (uint32_t i = 0; i < frequent_size; i++) { + StackTraceData *trace = frequent_garbage[i]; + if (trace != NULL) { + trace->references--; + + if (trace->references == 0) { + StackTraceData::FreeData(trace); + } + } + } +} + +StackTraceStorage::~StackTraceStorage() { + MuxLocker mu(_allocated_traces_lock, "StackTraceStorage::Destructor"); + delete _allocated_traces; + + FreeGarbage(); + delete _recent_garbage_traces; + delete _frequent_garbage_traces; + _initialized = false; +} + +void StackTraceStorage::InitializeStorage(int max_storage) { + MuxLocker mu(_allocated_traces_lock, "StackTraceStorage::InitializeStorage"); + + // In case multiple threads got locked and then 1 by 1 got through. + if (_initialized) { + return; + } + + _allocated_traces = new (ResourceObj::C_HEAP, mtInternal) + GrowableArray(128, true); + + _recent_garbage_traces = new MostRecentGarbageTraces(max_storage); + _frequent_garbage_traces = new FrequentGarbageTraces(max_storage); + + _max_storage = max_storage; + _initialized = true; +} + +void StackTraceStorage::add_trace(ASGCT_CallTrace *trace, + intx byte_size, + Thread *thread, + oop o) { + StackTraceData new_data(trace, byte_size, SharedRuntime::get_java_tid(thread), o); + + MuxLocker mu(_allocated_traces_lock, "StackTraceStorage::add_trace"); + _allocated_traces->append(new_data); +} + +void StackTraceStorage::do_weak_oops(BoolObjectClosure *is_alive, + OopClosure *f, + VoidClosure *complete_gc) { + MuxLocker mu(_allocated_traces_lock, "StackTraceStorage::do_weak_oops"); + + if (IsInitialized()) { + int len = _allocated_traces->length(); + + // Compact the oop traces. Moves the live oops to the beginning of the + // growable array, potentially overwriting the dead ones. + int curr_pos = 0; + for (int i = 0; i < len; i++) { + StackTraceData &trace = _allocated_traces->at(i); + oop value = trace.obj; + if ((value != NULL && Universe::heap()->is_in_reserved(value)) && + (is_alive == NULL || is_alive->do_object_b(value))) { + // Update the oop to point to the new object if it is still alive. + f->do_oop(&(trace.obj)); + + // Copy the old trace, if it is still live. + _allocated_traces->at_put(curr_pos++, trace); + } else { + // If the old trace is no longer live, add it to the list of + // recently collected garbage. + store_garbage_trace(trace); + } + } + + // Zero out remaining array elements. Even though the call to trunc_to + // below truncates these values, zeroing them out is good practice. + StackTraceData zero_trace; + for (int i = curr_pos; i < len; i++) { + _allocated_traces->at_put(i, zero_trace); + } + + // Set the array's length to the number of live elements. + _allocated_traces->trunc_to(curr_pos); + if (complete_gc != NULL) { + complete_gc->do_void(); + } + } +} + +bool StackTraceStorage::deep_copy(jvmtiStackTrace *to, + const StackTraceData *from) { + to->thread_id = from->thread_id; + to->size = from->byte_size; + + // ASGCT_CallTrace is folded into jvmtiStackTrace. + const ASGCT_CallTrace *src = from->trace; + to->env_id =src->env_id; + to->frame_count = src->num_frames; + + to->frames = + NEW_C_HEAP_ARRAY(jvmtiCallFrame, kMaxStackDepth, mtInternal); + + if (to->frames == NULL) { + return false; + } + + // This supposes right now that the ASGCT_CallFrame is the same size as the + // jvmtiCallFrame structure. If not, we have to do it by hand and it means + // something is off between both structures. + if (sizeof(ASGCT_CallFrame) != sizeof(jvmtiCallFrame)) { + for (int i = 0; i < to->frame_count; i++) { + // Note we still have the hack where ASCGT is piggy backing the bci via + // the lineno. This might change soon. + to->frames[i].bci = src->frames[i].lineno; + to->frames[i].method_id = src->frames[i].method_id; + } + } + memcpy(to->frames, + src->frames, + sizeof(ASGCT_CallFrame) * kMaxStackDepth); + return true; +} + +// Called by the outside world; returns a copy of the stack traces +// (because we could be replacing them as the user handles them). +// The array is secretly null-terminated (to make it easier to reclaim). +void StackTraceStorage::get_all_stack_traces(jvmtiStackTraces *traces) { + LiveStackTraceDataCopier copier(_allocated_traces); + copy_stack_traces(copier, traces); +} + +// See comment on get_all_stack_traces +void StackTraceStorage::get_garbage_stack_traces(jvmtiStackTraces *traces) { + GarbageStackTraceDataCopier copier(_recent_garbage_traces->get_traces(), + _recent_garbage_traces->size()); + copy_stack_traces(copier, traces); +} + +// See comment on get_all_stack_traces +void StackTraceStorage::get_frequent_garbage_stack_traces( + jvmtiStackTraces *traces) { + GarbageStackTraceDataCopier copier(_frequent_garbage_traces->get_traces(), + _frequent_garbage_traces->size()); + copy_stack_traces(copier, traces); +} + + +void StackTraceStorage::copy_stack_traces(const StackTraceDataCopier &copier, + jvmtiStackTraces *traces) { + MuxLocker mu(_allocated_traces_lock, "StackTraceStorage::copy_stack_traces"); + int len = copier.size(); + + // Create a new array to store the StackTraceData objects. + // + 1 for a NULL at the end. + jvmtiStackTrace *t = + NEW_C_HEAP_ARRAY(jvmtiStackTrace, len + 1, mtInternal); + if (t == NULL) { + traces->stack_traces = NULL; + traces->trace_count = 0; + return; + } + // +1 to have a NULL at the end of the array. + memset(t, 0, (len + 1) * sizeof(*t)); + + // Copy the StackTraceData objects into the new array. + int trace_count = 0; + for (int i = 0; i < len; i++) { + const StackTraceData *stack_trace = copier.get(i); + if (stack_trace != NULL) { + jvmtiStackTrace *to = &t[trace_count]; + if (!deep_copy(to, stack_trace)) { + continue; + } + trace_count++; + } + } + + traces->stack_traces = t; + traces->trace_count = trace_count; +} + +void StackTraceStorage::store_garbage_trace(const StackTraceData &trace) { + StackTraceData *new_trace = new StackTraceData(); + *new_trace = trace; + + bool accepted = _recent_garbage_traces->store_trace(new_trace); + + // Accepted is on the right of the boolean to force the store_trace to happen. + accepted = _frequent_garbage_traces->store_trace(new_trace) || accepted; + + if (!accepted) { + // No one wanted to use it. + delete new_trace; + } +} + +// Delegate the initialization question to the underlying storage system. +bool HeapMonitoring::initialized() { + return StackTraceStorage::storage()->initialized(); +} + +// Delegate the initialization question to the underlying storage system. +bool *HeapMonitoring::initialized_address() { + return + const_cast(StackTraceStorage::storage()->initialized_address()); +} + +void HeapMonitoring::get_live_traces(jvmtiStackTraces *traces) { + StackTraceStorage::storage()->get_all_stack_traces(traces); +} + +void HeapMonitoring::get_frequent_garbage_traces(jvmtiStackTraces *traces) { + StackTraceStorage::storage()->get_frequent_garbage_stack_traces(traces); +} + +void HeapMonitoring::get_garbage_traces(jvmtiStackTraces *traces) { + StackTraceStorage::storage()->get_garbage_stack_traces(traces); +} + +void HeapMonitoring::release_traces(jvmtiStackTraces *trace_info) { + jint trace_count = trace_info->trace_count; + jvmtiStackTrace *traces = trace_info->stack_traces; + + for (jint i = 0; i < trace_count; i++) { + jvmtiStackTrace *current_trace = traces + i; + FREE_C_HEAP_ARRAY(jvmtiCallFrame, current_trace->frames); + } + + FREE_C_HEAP_ARRAY(jvmtiStackTrace, trace_info->stack_traces); + trace_info->trace_count = 0; + trace_info->stack_traces = NULL; +} + +// Invoked by the GC to clean up old stack traces and remove old arrays +// of instrumentation that are still lying around. +void HeapMonitoring::do_weak_oops( + AbstractRefProcTaskExecutor *task_executor, + BoolObjectClosure* is_alive, + OopClosure *f, + VoidClosure *complete_gc) { + if (task_executor != NULL) { + task_executor->set_single_threaded_mode(); + } + StackTraceStorage::storage()->do_weak_oops(is_alive, f, complete_gc); +} + +void HeapMonitoring::initialize_profiling(jint monitoring_rate, jint max_storage) { + _monitoring_rate = monitoring_rate; + + StackTraceStorage::InitializeStackTraceStorage(max_storage); + + // Populate the lookup table for fast_log2. + // This approximates the log2 curve with a step function. + // Steps have height equal to log2 of the mid-point of the step. + for (int i = 0; i < (1 << kFastlogNumBits); i++) { + double half_way = static_cast(i + 0.5); + _log_table[i] = (log(1.0 + half_way / (1 << kFastlogNumBits)) / log(2.0)); + } + + JavaThread *t = static_cast(Thread::current()); + _rnd = static_cast(reinterpret_cast(t)); + if (_rnd == 0) { + _rnd = 1; + } + for (int i = 0; i < 20; i++) { + _rnd = next_random(_rnd); + } +} + +// Generates a geometric variable with the specified mean (512K by default). +// This is done by generating a random number between 0 and 1 and applying +// the inverse cumulative distribution function for an exponential. +// Specifically: Let m be the inverse of the sample rate, then +// the probability distribution function is m*exp(-mx) so the CDF is +// p = 1 - exp(-mx), so +// q = 1 - p = exp(-mx) +// log_e(q) = -mx +// -log_e(q)/m = x +// log_2(q) * (-log_e(2) * 1/m) = x +// In the code, q is actually in the range 1 to 2**26, hence the -26 below +void HeapMonitoring::pick_next_sample(JavaThread *t) { + _rnd = next_random(_rnd); + // Take the top 26 bits as the random number + // (This plus a 1<<58 sampling bound gives a max possible step of + // 5194297183973780480 bytes. In this case, + // for sample_parameter = 1<<19, max possible step is + // 9448372 bytes (24 bits). + const uint64_t prng_mod_power = 48; // Number of bits in prng + // The uint32_t cast is to prevent a (hard-to-reproduce) NAN + // under piii debug for some binaries. + double q = static_cast(_rnd >> (prng_mod_power - 26)) + 1.0; + // Put the computed p-value through the CDF of a geometric. + // For faster performance (save ~1/20th exec time), replace + // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705) + // The value 26.000705 is used rather than 26 to compensate + // for inaccuracies in FastLog2 which otherwise result in a + // negative answer. + size_t *bytes_until_sample = t->bytes_until_sample(); + double log_val = (fast_log2(q) - 26); + *bytes_until_sample = static_cast( + (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (_monitoring_rate)) + 1); +} + +// Called from the interpreter and C1 +void HeapMonitoring::object_alloc_unsized(oopDesc* o) { + JavaThread *thread = static_cast(Thread::current()); + assert(o->size() << LogHeapWordSize == static_cast(byte_size), + "Object size is incorrect."); + object_alloc_do_sample(thread, o, o->size() << LogHeapWordSize); +} + +void HeapMonitoring::object_alloc(oopDesc* o, intx byte_size) { + JavaThread *thread = static_cast(Thread::current()); + object_alloc_do_sample(thread, o, byte_size); +} + +// Called directly by C2 +void HeapMonitoring::object_alloc_do_sample(Thread *t, oopDesc *o, intx byte_size) { +#if defined(X86) || defined(PPC) + JavaThread *thread = static_cast(t); + size_t *bytes_until_sample = thread->bytes_until_sample(); + if (StackTraceStorage::storage()->IsInitialized()) { + assert(t->is_Java_thread(), "non-Java thread passed to do_sample"); + JavaThread *thread = static_cast(t); + + pick_next_sample(thread); + + ASGCT_CallTrace *trace = NEW_C_HEAP_OBJ(ASGCT_CallTrace, mtInternal); + if (trace == NULL) { + return; + } + + ASGCT_CallFrame *frames = + NEW_C_HEAP_ARRAY(ASGCT_CallFrame, kMaxStackDepth, mtInternal); + if (frames == NULL) { + FREE_C_HEAP_OBJ(trace); + return; + } + + trace->frames = frames; + trace->env_id = (JavaThread::current())->jni_environment(); + + ucontext_t uc; + if (!getcontext(&uc)) { +#if defined(IA32) + // On Linux/x86 (but not x64), AsyncGetCallTrace/JVM reads the + // stack pointer from the REG_UESP field (as opposed to the + // REG_ESP field). The kernel sets both the REG_UESP and REG_ESP + // fields to the correct stack pointer for the ucontexts passed to + // signal handlers. However, getcontext() sets only REG_ESP, + // leaving REG_UESP uninitialized. Since there is no way to + // distinguish where a ucontext_t came from, copy from REG_ESP to + // REG_UESP so that AGCT will read the right stack pointer. + uc.uc_mcontext.gregs[REG_UESP] = uc.uc_mcontext.gregs[REG_ESP]; +#endif + + AsyncGetCallTrace(trace, kMaxStackDepth, &uc); + + if (trace->num_frames > 0) { + // Success! + StackTraceStorage::storage()->add_trace(trace, byte_size, thread, o); + return; + } + } + // Failure! + FREE_C_HEAP_ARRAY(ASGCT_CallFrame, trace->frames); + FREE_C_HEAP_OBJ(trace); + return; + } else { + // There is something like 64K worth of allocation before the VM + // initializes. This is just in the interests of not slowing down + // startup. + assert(t->is_Java_thread(), "non-Java thread passed to do_sample"); + JavaThread *thread = static_cast(t); + *(thread->bytes_until_sample()) = 65536; + } +#else + Unimplemented(); +#endif +}