/* * Copyright (c) 2017, Google and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ #include "precompiled.hpp" #include "prims/forte.hpp" #include "runtime/heapMonitoring.hpp" static const int MaxStackDepth = 64; // Internal data structure representing traces. struct StackTraceData : CHeapObj { jvmtiStackTrace *trace; oop obj; int references; StackTraceData(jvmtiStackTrace *t, oop o) : trace(t), obj(o), references(0) {} StackTraceData() : trace(NULL), obj(NULL), references(0) {} // StackTraceDatas are shared around the board between various lists. So // handle this by hand instead of having this in the destructor. There are // cases where the struct is on the stack but holding heap data not to be // freed. static void free_data(StackTraceData *data) { if (data->trace != NULL) { FREE_C_HEAP_ARRAY(jvmtiFrameInfo, data->trace->frames); FREE_C_HEAP_OBJ(data->trace); } delete data; } }; // Fixed size buffer for holding garbage traces. class GarbageTracesBuffer : public CHeapObj { public: GarbageTracesBuffer(uint32_t size) : _size(size) { _garbage_traces = NEW_C_HEAP_ARRAY(StackTraceData*, size, mtInternal); memset(_garbage_traces, 0, sizeof(StackTraceData*) * size); } virtual ~GarbageTracesBuffer() { FREE_C_HEAP_ARRAY(StackTraceData*, _garbage_traces); } StackTraceData** get_traces() const { return _garbage_traces; } bool store_trace(StackTraceData *trace) { uint32_t index; if (!select_replacement(&index)) { return false; } StackTraceData *old_data = _garbage_traces[index]; if (old_data != NULL) { old_data->references--; if (old_data->references == 0) { StackTraceData::free_data(old_data); } } trace->references++; _garbage_traces[index] = trace; return true; } uint32_t size() const { return _size; } protected: // Subclasses select the trace to replace. Returns false if no replacement // is to happen, otherwise stores the index of the trace to replace in // *index. virtual bool select_replacement(uint32_t *index) = 0; const uint32_t _size; private: // The current garbage traces. A fixed-size ring buffer. StackTraceData **_garbage_traces; }; // Keep statistical sample of traces over the lifetime of the server. // When the buffer is full, replace a random entry with probability // 1/samples_seen. This strategy tends towards preserving the most frequently // occuring traces over time. class FrequentGarbageTraces : public GarbageTracesBuffer { public: FrequentGarbageTraces(int size) : GarbageTracesBuffer(size), _garbage_traces_pos(0), _samples_seen(0) { } virtual ~FrequentGarbageTraces() { } virtual bool select_replacement(uint32_t* index) { ++_samples_seen; if (_garbage_traces_pos < _size) { *index = _garbage_traces_pos++; return true; } uint64_t random_uint64 = (static_cast(::random()) << 32) | ::random(); uint32_t random_index = random_uint64 % _samples_seen; if (random_index < _size) { *index = random_index; return true; } return false; } private: // The current position in the buffer as we initially fill it. uint32_t _garbage_traces_pos; uint64_t _samples_seen; }; // Store most recent garbage traces. class MostRecentGarbageTraces : public GarbageTracesBuffer { public: MostRecentGarbageTraces(int size) : GarbageTracesBuffer(size), _garbage_traces_pos(0) { } virtual ~MostRecentGarbageTraces() { } virtual bool select_replacement(uint32_t* index) { *index = _garbage_traces_pos; _garbage_traces_pos = (_garbage_traces_pos + 1) % _size; return true; } private: // The current position in the buffer. uint32_t _garbage_traces_pos; }; // Each object that we profile is stored as trace with the thread_id. class StackTraceStorage : public CHeapObj { public: // The function that gets called to add a trace to the list of // traces we are maintaining. void add_trace(jvmtiStackTrace *trace, oop o); // The function that gets called by the client to retrieve the list // of stack traces. Passes a jvmtiStackTraces which will get mutated. void get_all_stack_traces(jvmtiStackTraces *traces); // The function that gets called by the client to retrieve the list // of stack traces. Passes a jvmtiStackTraces which will get mutated. void get_garbage_stack_traces(jvmtiStackTraces *traces); // The function that gets called by the client to retrieve the list // of stack traces. Passes a jvmtiStackTraces which will get mutated. void get_frequent_garbage_stack_traces(jvmtiStackTraces *traces); // Executes whenever weak references are traversed. is_alive tells // you if the given oop is still reachable and live. size_t weak_oops_do(BoolObjectClosure* is_alive, OopClosure *f); ~StackTraceStorage(); StackTraceStorage(); static StackTraceStorage* storage() { if (internal_storage == NULL) { internal_storage = new StackTraceStorage(); } return internal_storage; } static void reset_stack_trace_storage() { delete internal_storage, internal_storage = NULL; } bool is_initialized() { return _initialized; } const jvmtiHeapSamplingStats& get_heap_sampling_stats() const { return _stats; } // Static method to set the storage in place at initialization. static void initialize_stack_trace_storage(int max_storage) { reset_stack_trace_storage(); StackTraceStorage *storage = StackTraceStorage::storage(); storage->initialize_storage(max_storage); } void accumulate_sample_rate(size_t rate) { _stats.sample_rate_accumulation += rate; _stats.sample_rate_count++; } bool initialized() { return _initialized; } volatile bool *initialized_address() { return &_initialized; } private: // The traces currently sampled. GrowableArray *_allocated_traces; // Recent garbage traces. MostRecentGarbageTraces *_recent_garbage_traces; // Frequent garbage traces. FrequentGarbageTraces *_frequent_garbage_traces; // Heap Sampling statistics. jvmtiHeapSamplingStats _stats; // Maximum amount of storage provided by the JVMTI call initialize_profiling. int _max_storage; static StackTraceStorage* internal_storage; volatile bool _initialized; // Support functions and classes for copying data to the external // world. class StackTraceDataCopier { public: virtual int size() const = 0; virtual const StackTraceData *get(uint32_t i) const = 0; }; class LiveStackTraceDataCopier : public StackTraceDataCopier { public: LiveStackTraceDataCopier(GrowableArray *data) : _data(data) {} int size() const { return _data ? _data->length() : 0; } const StackTraceData *get(uint32_t i) const { return _data->adr_at(i); } private: GrowableArray *_data; }; class GarbageStackTraceDataCopier : public StackTraceDataCopier { public: GarbageStackTraceDataCopier(StackTraceData **data, int size) : _data(data), _size(size) {} int size() const { return _size; } const StackTraceData *get(uint32_t i) const { return _data[i]; } private: StackTraceData **_data; int _size; }; // Instance initialization. void initialize_storage(int max_storage); // Copies from StackTraceData to jvmtiStackTrace. bool deep_copy(jvmtiStackTrace *to, const StackTraceData *from); // Creates a deep copy of the list of StackTraceData. void copy_stack_traces(const StackTraceDataCopier &copier, jvmtiStackTraces *traces); void store_garbage_trace(const StackTraceData &trace); void free_garbage(); }; StackTraceStorage* StackTraceStorage::internal_storage; // Statics for Sampler double HeapMonitoring::_log_table[1 << FastLogNumBits]; bool HeapMonitoring::_enabled; AlwaysTrueClosure HeapMonitoring::_always_true; jint HeapMonitoring::_monitoring_rate; // Cheap random number generator uint64_t HeapMonitoring::_rnd; StackTraceStorage::StackTraceStorage() : _allocated_traces(NULL), _recent_garbage_traces(NULL), _frequent_garbage_traces(NULL), _max_storage(0), _initialized(false) { memset(&_stats, 0, sizeof(_stats)); } void StackTraceStorage::free_garbage() { StackTraceData **recent_garbage = NULL; uint32_t recent_size = 0; StackTraceData **frequent_garbage = NULL; uint32_t frequent_size = 0; if (_recent_garbage_traces != NULL) { recent_garbage = _recent_garbage_traces->get_traces(); recent_size = _recent_garbage_traces->size(); } if (_frequent_garbage_traces != NULL) { frequent_garbage = _frequent_garbage_traces->get_traces(); frequent_size = _frequent_garbage_traces->size(); } // Simple solution since this happens at exit. // Go through the recent and remove any that only are referenced there. for (uint32_t i = 0; i < recent_size; i++) { StackTraceData *trace = recent_garbage[i]; if (trace != NULL) { trace->references--; if (trace->references == 0) { StackTraceData::free_data(trace); } } } // Then go through the frequent and remove those that are now only there. for (uint32_t i = 0; i < frequent_size; i++) { StackTraceData *trace = frequent_garbage[i]; if (trace != NULL) { trace->references--; if (trace->references == 0) { StackTraceData::free_data(trace); } } } } StackTraceStorage::~StackTraceStorage() { delete _allocated_traces; free_garbage(); delete _recent_garbage_traces; delete _frequent_garbage_traces; _initialized = false; } void StackTraceStorage::initialize_storage(int max_storage) { // In case multiple threads got locked and then 1 by 1 got through. if (_initialized) { return; } _allocated_traces = new (ResourceObj::C_HEAP, mtInternal) GrowableArray(128, true); _recent_garbage_traces = new MostRecentGarbageTraces(max_storage); _frequent_garbage_traces = new FrequentGarbageTraces(max_storage); _max_storage = max_storage; _initialized = true; } void StackTraceStorage::add_trace(jvmtiStackTrace *trace, oop o) { StackTraceData new_data(trace, o); _stats.sample_count++; _stats.stack_depth_accumulation += trace->frame_count; _allocated_traces->append(new_data); } size_t StackTraceStorage::weak_oops_do(BoolObjectClosure *is_alive, OopClosure *f) { size_t count = 0; if (is_initialized()) { int len = _allocated_traces->length(); // Compact the oop traces. Moves the live oops to the beginning of the // growable array, potentially overwriting the dead ones. int curr_pos = 0; for (int i = 0; i < len; i++) { StackTraceData &trace = _allocated_traces->at(i); oop value = trace.obj; if ((value != NULL && Universe::heap()->is_in_reserved(value)) && is_alive->do_object_b(value)) { // Update the oop to point to the new object if it is still alive. f->do_oop(&(trace.obj)); // Copy the old trace, if it is still live. _allocated_traces->at_put(curr_pos++, trace); count++; } else { // If the old trace is no longer live, add it to the list of // recently collected garbage. store_garbage_trace(trace); } } // Zero out remaining array elements. Even though the call to trunc_to // below truncates these values, zeroing them out is good practice. StackTraceData zero_trace; for (int i = curr_pos; i < len; i++) { _allocated_traces->at_put(i, zero_trace); } // Set the array's length to the number of live elements. _allocated_traces->trunc_to(curr_pos); } return count; } bool StackTraceStorage::deep_copy(jvmtiStackTrace *to, const StackTraceData *from) { const jvmtiStackTrace *src = from->trace; *to = *src; to->frames = NEW_C_HEAP_ARRAY(jvmtiFrameInfo, MaxStackDepth, mtInternal); if (to->frames == NULL) { return false; } memcpy(to->frames, src->frames, sizeof(jvmtiFrameInfo) * MaxStackDepth); return true; } // Called by the outside world; returns a copy of the stack traces // (because we could be replacing them as the user handles them). // The array is secretly null-terminated (to make it easier to reclaim). void StackTraceStorage::get_all_stack_traces(jvmtiStackTraces *traces) { LiveStackTraceDataCopier copier(_allocated_traces); copy_stack_traces(copier, traces); } // See comment on get_all_stack_traces void StackTraceStorage::get_garbage_stack_traces(jvmtiStackTraces *traces) { GarbageStackTraceDataCopier copier(_recent_garbage_traces->get_traces(), _recent_garbage_traces->size()); copy_stack_traces(copier, traces); } // See comment on get_all_stack_traces void StackTraceStorage::get_frequent_garbage_stack_traces( jvmtiStackTraces *traces) { GarbageStackTraceDataCopier copier(_frequent_garbage_traces->get_traces(), _frequent_garbage_traces->size()); copy_stack_traces(copier, traces); } void StackTraceStorage::copy_stack_traces(const StackTraceDataCopier &copier, jvmtiStackTraces *traces) { int len = copier.size(); // Create a new array to store the StackTraceData objects. // + 1 for a NULL at the end. jvmtiStackTrace *t = NEW_C_HEAP_ARRAY(jvmtiStackTrace, len + 1, mtInternal); if (t == NULL) { traces->stack_traces = NULL; traces->trace_count = 0; return; } // +1 to have a NULL at the end of the array. memset(t, 0, (len + 1) * sizeof(*t)); // Copy the StackTraceData objects into the new array. int trace_count = 0; for (int i = 0; i < len; i++) { const StackTraceData *stack_trace = copier.get(i); if (stack_trace != NULL) { jvmtiStackTrace *to = &t[trace_count]; if (!deep_copy(to, stack_trace)) { continue; } trace_count++; } } traces->stack_traces = t; traces->trace_count = trace_count; } void StackTraceStorage::store_garbage_trace(const StackTraceData &trace) { StackTraceData *new_trace = new StackTraceData(); *new_trace = trace; bool accepted = _recent_garbage_traces->store_trace(new_trace); // Accepted is on the right of the boolean to force the store_trace to happen. accepted = _frequent_garbage_traces->store_trace(new_trace) || accepted; if (!accepted) { // No one wanted to use it. delete new_trace; } _stats.garbage_collected_samples++; } // Delegate the initialization question to the underlying storage system. bool HeapMonitoring::initialized() { return StackTraceStorage::storage()->initialized(); } // Delegate the initialization question to the underlying storage system. bool *HeapMonitoring::initialized_address() { return const_cast(StackTraceStorage::storage()->initialized_address()); } void HeapMonitoring::get_live_traces(jvmtiStackTraces *traces) { StackTraceStorage::storage()->get_all_stack_traces(traces); } void HeapMonitoring::get_sampling_statistics(jvmtiHeapSamplingStats *stats) { const jvmtiHeapSamplingStats& internal_stats = StackTraceStorage::storage()->get_heap_sampling_stats(); *stats = internal_stats; } void HeapMonitoring::get_frequent_garbage_traces(jvmtiStackTraces *traces) { StackTraceStorage::storage()->get_frequent_garbage_stack_traces(traces); } void HeapMonitoring::get_garbage_traces(jvmtiStackTraces *traces) { StackTraceStorage::storage()->get_garbage_stack_traces(traces); } void HeapMonitoring::release_traces(jvmtiStackTraces *traces) { jint trace_count = traces->trace_count; jvmtiStackTrace *stack_traces = traces->stack_traces; for (jint i = 0; i < trace_count; i++) { jvmtiStackTrace *current_trace = stack_traces + i; FREE_C_HEAP_ARRAY(jvmtiFrameInfo, current_trace->frames); } FREE_C_HEAP_ARRAY(jvmtiStackTrace, traces->stack_traces); traces->trace_count = 0; traces->stack_traces = NULL; } // Invoked by the GC to clean up old stack traces and remove old arrays // of instrumentation that are still lying around. size_t HeapMonitoring::weak_oops_do(BoolObjectClosure* is_alive, OopClosure *f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); return StackTraceStorage::storage()->weak_oops_do(is_alive, f); } void HeapMonitoring::initialize_profiling(jint monitoring_rate, jint max_storage) { // Ignore if already enabled. if (_enabled) { return; } _monitoring_rate = monitoring_rate; // Initalize and reset. StackTraceStorage::initialize_stack_trace_storage(max_storage); // Populate the lookup table for fast_log2. // This approximates the log2 curve with a step function. // Steps have height equal to log2 of the mid-point of the step. for (int i = 0; i < (1 << FastLogNumBits); i++) { double half_way = static_cast(i + 0.5); _log_table[i] = (log(1.0 + half_way / (1 << FastLogNumBits)) / log(2.0)); } JavaThread *t = static_cast(Thread::current()); _rnd = static_cast(reinterpret_cast(t)); if (_rnd == 0) { _rnd = 1; } _enabled = true; } void HeapMonitoring::stop_profiling() { _enabled = false; } // Generates a geometric variable with the specified mean (512K by default). // This is done by generating a random number between 0 and 1 and applying // the inverse cumulative distribution function for an exponential. // Specifically: Let m be the inverse of the sample rate, then // the probability distribution function is m*exp(-mx) so the CDF is // p = 1 - exp(-mx), so // q = 1 - p = exp(-mx) // log_e(q) = -mx // -log_e(q)/m = x // log_2(q) * (-log_e(2) * 1/m) = x // In the code, q is actually in the range 1 to 2**26, hence the -26 below void HeapMonitoring::pick_next_sample(size_t *ptr) { _rnd = next_random(_rnd); // Take the top 26 bits as the random number // (This plus a 1<<58 sampling bound gives a max possible step of // 5194297183973780480 bytes. In this case, // for sample_parameter = 1<<19, max possible step is // 9448372 bytes (24 bits). const uint64_t prng_mod_power = 48; // Number of bits in prng // The uint32_t cast is to prevent a (hard-to-reproduce) NAN // under piii debug for some binaries. double q = static_cast(_rnd >> (prng_mod_power - 26)) + 1.0; // Put the computed p-value through the CDF of a geometric. // For faster performance (save ~1/20th exec time), replace // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705) // The value 26.000705 is used rather than 26 to compensate // for inaccuracies in FastLog2 which otherwise result in a // negative answer. double log_val = (fast_log2(q) - 26); size_t rate = static_cast( (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (_monitoring_rate)) + 1); *ptr = rate; StackTraceStorage::storage()->accumulate_sample_rate(rate); } // Called from the interpreter and C1 void HeapMonitoring::object_alloc_unsized(oopDesc* o) { JavaThread *thread = static_cast(Thread::current()); object_alloc_do_sample(thread, o, o->size() << LogHeapWordSize); } void HeapMonitoring::object_alloc(oopDesc* o, intx byte_size) { JavaThread *thread = static_cast(Thread::current()); assert(o->size() << LogHeapWordSize == static_cast(byte_size), "Object size is incorrect."); object_alloc_do_sample(thread, o, byte_size); } // Called directly by C2 void HeapMonitoring::object_alloc_do_sample(Thread *t, oopDesc *o, intx byte_size) { #if defined(X86) || defined(PPC) JavaThread *thread = static_cast(t); if (StackTraceStorage::storage()->is_initialized()) { assert(t->is_Java_thread(), "non-Java thread passed to do_sample"); JavaThread *thread = static_cast(t); jvmtiStackTrace *trace = NEW_C_HEAP_OBJ(jvmtiStackTrace, mtInternal); if (trace == NULL) { return; } jvmtiFrameInfo *frames = NEW_C_HEAP_ARRAY(jvmtiFrameInfo, MaxStackDepth, mtInternal); if (frames == NULL) { FREE_C_HEAP_OBJ(trace); return; } trace->frames = frames; trace->thread_id = SharedRuntime::get_java_tid(thread); trace->size = byte_size; trace->frame_count = 0; if (thread->has_last_Java_frame()) { // just to be safe vframeStream vfst(thread, true); int count = 0; while (!vfst.at_end() && count < MaxStackDepth) { Method* m = vfst.method(); frames[count].location = vfst.bci(); frames[count].method = m->jmethod_id(); count++; vfst.next(); } trace->frame_count = count; } if (trace->frame_count> 0) { // Success! StackTraceStorage::storage()->add_trace(trace, o); return; } // Failure! FREE_C_HEAP_ARRAY(jvmtiFrameInfo, trace->frames); FREE_C_HEAP_OBJ(trace); return; } else { // There is something like 64K worth of allocation before the VM // initializes. This is just in the interests of not slowing down // startup. assert(t->is_Java_thread(), "non-Java thread passed to do_sample"); } #else Unimplemented(); #endif }