/* * Copyright (c) 2017, Google and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ #include "precompiled.hpp" #include "prims/forte.hpp" #include "runtime/heapMonitoring.hpp" const int kMaxStackDepth = 64; // The resulting data, as they appear to the client. struct StackTraceData : CHeapObj { ASGCT_CallTrace *trace; intx byte_size; jlong thread_id; StackTraceData(ASGCT_CallTrace *t, intx size, jlong tid) : trace(t), byte_size(size), thread_id(tid) {} }; // RAII class that acquires / releases lock class MuxLocker { private: volatile intptr_t *_lock; const char *_name; public: MuxLocker(volatile intptr_t *lock, const char *name) : _lock(lock), _name(name) { Thread::muxAcquire(lock, name); } ~MuxLocker() { Thread::muxRelease(_lock); } }; // Each object that we profile is stored as trace with the thread_id. class StackTraceStorage { public: // The function that gets called to add a trace to the list of // traces we are maintaining. trace is the stacktrace, and thread // is the thread that did the allocation. void add_trace(ASGCT_CallTrace *trace, intx byte_size, Thread *thread); // The function that gets called by the client to retrieve the list // of stack traces. Passes a jvmtiStackTraces which will get mutated. void get_all_stack_traces(jvmtiStackTraces *traces); ~StackTraceStorage(); StackTraceStorage(); // The global storage. Not a global static because // StackTraceStorage isn't available at module-loading time. static StackTraceStorage* storage() { static StackTraceStorage storage; return &storage; } // Static method to set the storage in place at initialization. static void InitializeStorage(int max_storage) { _max_storage = max_storage; StackTraceStorage *storage = StackTraceStorage::storage(); storage->InitializeStorage(); } // Protects the traces currently sampled (below). volatile intptr_t _allocated_traces_lock[1]; // The current allocated traces. A fixed-size ring buffer. // This is a temporay fix until the GC handlers are in place. Then this // becomes a growable array that is emptied as elements get garbage // collected. StackTraceData** _allocated_traces; // Maximum size of the allocation. size_t _allocated_traces_size; // The current position in _allocated_traces (above); // This is a temporay fix until the GC handlers are in place. Then this // becomes a growable array that is emptied as elements get garbage // collected. int _allocated_traces_pos; private: // Maximum amount of storage provided by the JVMTI call initialize_profiling. static int _max_storage; // Instance initialization to thread safe initialize storage. void InitializeStorage(); // Support functions and classes for copying data to the external // world. class StackTraceDataCopier { public: virtual int size() const = 0; virtual StackTraceData *get(int i) const = 0; }; class LiveStackTraceDataCopier : public StackTraceDataCopier { public: LiveStackTraceDataCopier(StackTraceData **data, int size) : _data(data), _size(size) {} int size() const { return _size; } StackTraceData *get(int i) const { return _data[i]; } private: StackTraceData **_data; int _size; }; // Copies from StackTraceData to jvmtiStackTrace. bool deep_copy(jvmtiStackTrace *to, StackTraceData *from); // Creates a deep copy of the list of StackTraceData. void copy_stack_traces(const StackTraceDataCopier &copier, jvmtiStackTraces *traces); }; // Statics for Sampler double HeapMonitoring::_log_table[1 << kFastlogNumBits]; bool HeapMonitoring::_initialized = true; jint HeapMonitoring::_monitoring_rate; // Cheap random number generator uint64_t HeapMonitoring::_rnd; jint StackTraceStorage::_max_storage; StackTraceStorage::StackTraceStorage() : _allocated_traces(NULL), _allocated_traces_size(0), _allocated_traces_pos(0) { _allocated_traces_lock[0] = 0; } StackTraceStorage::~StackTraceStorage() { FREE_C_HEAP_ARRAY(StackTraceData*, _allocated_traces); } void StackTraceStorage::InitializeStorage() { MuxLocker mu(_allocated_traces_lock, "StackTraceStorage::InitializeStorage"); _allocated_traces_size = _max_storage; _allocated_traces = NEW_C_HEAP_ARRAY(StackTraceData*, _allocated_traces_size, mtInternal); memset(_allocated_traces, 0, sizeof(*_allocated_traces) * _allocated_traces_size); } void StackTraceStorage::add_trace(ASGCT_CallTrace *trace, intx byte_size, Thread *thread) { StackTraceData *new_data = new StackTraceData(trace, byte_size, SharedRuntime::get_java_tid(thread)); MuxLocker mu(_allocated_traces_lock, "StackTraceStorage::add_trace"); StackTraceData *current_allocated_trace = _allocated_traces[_allocated_traces_pos]; if (current_allocated_trace != NULL) { delete current_allocated_trace; } _allocated_traces[_allocated_traces_pos] = new_data; _allocated_traces_pos = (_allocated_traces_pos + 1) % _allocated_traces_size; } bool StackTraceStorage::deep_copy(jvmtiStackTrace *to, StackTraceData *from) { to->thread_id = from->thread_id; to->size = from->byte_size; // ASGCT_CallTrace is folded into jvmtiStackTrace. const ASGCT_CallTrace *src = reinterpret_cast(from->trace); to->env_id =src->env_id; to->frame_count = src->num_frames; to->frames = NEW_C_HEAP_ARRAY(jvmtiCallFrame, kMaxStackDepth, mtInternal); if (to->frames == NULL) { return false; } // This supposes right now that the ASGCT_CallFrame is the same size as the // jvmtiCallFrame structure. If not, we have to do it by hand and it means // something is off between both structures. if (sizeof(ASGCT_CallFrame) != sizeof(jvmtiCallFrame)) { for (int i = 0; i < to->frame_count; i++) { // Note we still have the hack where ASCGT is piggy backing the bci via // the lineno. This might change soon. to->frames[i].bci = src->frames[i].lineno; to->frames[i].method_id = src->frames[i].method_id; } } memcpy(to->frames, src->frames, sizeof(ASGCT_CallFrame) * kMaxStackDepth); return true; } // Called by the outside world; returns a copy of the stack traces // (because we could be replacing them as the user handles them). // The array is secretly null-terminated (to make it easier to reclaim). void StackTraceStorage::get_all_stack_traces(jvmtiStackTraces *traces) { LiveStackTraceDataCopier copier(_allocated_traces, _allocated_traces_size); copy_stack_traces(copier, traces); } void StackTraceStorage::copy_stack_traces(const StackTraceDataCopier &copier, jvmtiStackTraces *traces) { MuxLocker mu(_allocated_traces_lock, "StackTraceStorage::copy_stack_traces"); int len = copier.size(); // Create a new array to store the StackTraceData objects. // + 1 for a NULL at the end. jvmtiStackTrace *t = NEW_C_HEAP_ARRAY(jvmtiStackTrace, len + 1, mtInternal); if (t == NULL) { traces->stack_traces = NULL; traces->trace_count = 0; return; } // +1 to have a NULL at the end of the array. memset(t, 0, (len + 1) * sizeof(*t)); // Copy the StackTraceData objects into the new array. int trace_count = 0; for (int i = 0; i < len; i++) { StackTraceData *stack_trace = copier.get(i); if (stack_trace != NULL && stack_trace->trace != NULL) { jvmtiStackTrace *to = &t[trace_count]; if (!deep_copy(to, stack_trace)) { continue; } trace_count++; } } traces->stack_traces = t; traces->trace_count = trace_count; } void HeapMonitoring::get_live_traces(jvmtiStackTraces *traces) { StackTraceStorage::storage()->get_all_stack_traces(traces); } void HeapMonitoring::release_traces(jvmtiStackTraces *trace_info) { jint trace_count = trace_info->trace_count; jvmtiStackTrace *traces = trace_info->stack_traces; for (jint i = 0; i < trace_count; i++) { jvmtiStackTrace *current_trace = traces + i; FREE_C_HEAP_ARRAY(ASGCT_CallFrame, current_trace->frames); } FREE_C_HEAP_ARRAY(jvmtiStackTrace, trace_info->stack_traces); trace_info->trace_count = 0; trace_info->stack_traces = NULL; } void HeapMonitoring::initialize_profiling(jint monitoring_rate, jint max_storage) { _monitoring_rate = monitoring_rate; StackTraceStorage::InitializeStorage(max_storage); // Populate the lookup table for fast_log2. // This approximates the log2 curve with a step function. // Steps have height equal to log2 of the mid-point of the step. for (int i = 0; i < (1 << kFastlogNumBits); i++) { double half_way = static_cast(i + 0.5); _log_table[i] = (log(1.0 + half_way / (1 << kFastlogNumBits)) / log(2.0)); } JavaThread *t = reinterpret_cast(Thread::current()); _rnd = static_cast(reinterpret_cast(t)); if (_rnd == 0) { _rnd = 1; } for (int i = 0; i < 20; i++) { _rnd = next_random(_rnd); } _initialized = true; } // Generates a geometric variable with the specified mean (512K by default). // This is done by generating a random number between 0 and 1 and applying // the inverse cumulative distribution function for an exponential. // Specifically: Let m be the inverse of the sample rate, then // the probability distribution function is m*exp(-mx) so the CDF is // p = 1 - exp(-mx), so // q = 1 - p = exp(-mx) // log_e(q) = -mx // -log_e(q)/m = x // log_2(q) * (-log_e(2) * 1/m) = x // In the code, q is actually in the range 1 to 2**26, hence the -26 below void HeapMonitoring::pick_next_sample(JavaThread *t) { _rnd = next_random(_rnd); // Take the top 26 bits as the random number // (This plus a 1<<58 sampling bound gives a max possible step of // 5194297183973780480 bytes. In this case, // for sample_parameter = 1<<19, max possible step is // 9448372 bytes (24 bits). const uint64_t prng_mod_power = 48; // Number of bits in prng // The uint32_t cast is to prevent a (hard-to-reproduce) NAN // under piii debug for some binaries. double q = static_cast(_rnd >> (prng_mod_power - 26)) + 1.0; // Put the computed p-value through the CDF of a geometric. // For faster performance (save ~1/20th exec time), replace // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705) // The value 26.000705 is used rather than 26 to compensate // for inaccuracies in FastLog2 which otherwise result in a // negative answer. size_t *bytes_until_sample = t->bytes_until_sample(); double log_val = (fast_log2(q) - 26); *bytes_until_sample = static_cast( (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (_monitoring_rate)) + 1); } // Called from the interpreter and C1 void HeapMonitoring::object_alloc_unsized(oopDesc* o) { JavaThread *thread = reinterpret_cast(Thread::current()); assert(o->size() << LogHeapWordSize == static_cast(byte_size), "Object size is incorrect."); object_alloc_do_sample(thread, o, o->size() << LogHeapWordSize); } void HeapMonitoring::object_alloc(oopDesc* o, intx byte_size) { JavaThread *thread = reinterpret_cast(Thread::current()); object_alloc_do_sample(thread, o, byte_size); } // Called directly by C2 void HeapMonitoring::object_alloc_do_sample(Thread *t, oopDesc *o, intx byte_size) { #if defined(X86) || defined(PPC) JavaThread *thread = reinterpret_cast(t); size_t *bytes_until_sample = thread->bytes_until_sample(); if (StackTraceStorage::storage()->_allocated_traces) { assert(t->is_Java_thread(), "non-Java thread passed to do_sample"); JavaThread *thread = reinterpret_cast(t); pick_next_sample(thread); ASGCT_CallTrace *trace = NEW_C_HEAP_OBJ(ASGCT_CallTrace, mtInternal); if (trace == NULL) { return; } ASGCT_CallFrame *frames = NEW_C_HEAP_ARRAY(ASGCT_CallFrame, kMaxStackDepth, mtInternal); if (frames == NULL) { FreeHeap(reinterpret_cast(trace)); return; } trace->frames = frames; trace->env_id = (JavaThread::current())->jni_environment(); ucontext_t uc; if (!getcontext(&uc)) { #if defined(IA32) // On Linux/x86 (but not x64), AsyncGetCallTrace/JVM reads the // stack pointer from the REG_UESP field (as opposed to the // REG_ESP field). The kernel sets both the REG_UESP and REG_ESP // fields to the correct stack pointer for the ucontexts passed to // signal handlers. However, getcontext() sets only REG_ESP, // leaving REG_UESP uninitialized. Since there is no way to // distinguish where a ucontext_t came from, copy from REG_ESP to // REG_UESP so that AGCT will read the right stack pointer. uc.uc_mcontext.gregs[REG_UESP] = uc.uc_mcontext.gregs[REG_ESP]; #endif AsyncGetCallTrace(trace, kMaxStackDepth, &uc); if (trace->num_frames > 0) { // Success! StackTraceStorage::storage()->add_trace(trace, byte_size, thread); return; } } // Failure! FREE_C_HEAP_ARRAY(ASGCT_CallFrame, trace->frames); FreeHeap(reinterpret_cast(trace)); return; } else { // There is something like 64K worth of allocation before the VM // initializes. This is just in the interests of not slowing down // startup. assert(t->is_Java_thread(), "non-Java thread passed to do_sample"); JavaThread *thread = reinterpret_cast(t); *(thread->bytes_until_sample()) = 65536; } #else Unimplemented(); #endif }