--- old/src/cpu/x86/vm/assembler_x86.hpp 2017-04-18 10:46:05.178578292 -0700
+++ new/src/cpu/x86/vm/assembler_x86.hpp 2017-04-18 10:46:05.054578777 -0700
@@ -2225,6 +2225,44 @@
_embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
}
+// This is shared between the interpreter and C1, and needs to be in multiple
+// places for each. The code to invoke the actual sampling methods needs
+// to be provided by the user; thus, a macro.
+#define HEAP_MONITORING(ma, thread, var_size_in_bytes, con_size_in_bytes, object, t1, t2, sample_invocation) \
+do { \
+ { \
+ SkipIfEqual skip_if(ma, &HeapMonitor, 0); \
+ Label skip_sample; \
+ Register thr = thread; \
+ if (!thr->is_valid()) { \
+ NOT_LP64(assert(t1 != noreg, \
+ "Need temporary register for constants")); \
+ thr = NOT_LP64(t1) LP64_ONLY(r15_thread); \
+ NOT_LP64(ma -> get_thread(thr);) \
+ } \
+ /* Trigger heap monitoring event */ \
+ Address bus(thr, \
+ JavaThread::bytes_until_sample_offset()); \
+ \
+ if (var_size_in_bytes->is_valid()) { \
+ ma -> NOT_LP64(subl) LP64_ONLY(subq)(bus, var_size_in_bytes); \
+ } else { \
+ int csib = (con_size_in_bytes); \
+ assert(t2 != noreg, \
+ "Need temporary register for constants"); \
+ ma -> NOT_LP64(movl) LP64_ONLY(mov64)(t2, csib); \
+ ma -> NOT_LP64(subl) LP64_ONLY(subq)(bus, t2); \
+ } \
+ \
+ ma -> jcc(Assembler::positive, skip_sample); \
+ \
+ { \
+ sample_invocation \
+ } \
+ ma -> bind(skip_sample); \
+ } \
+} while(0)
+
};
#endif // CPU_X86_VM_ASSEMBLER_X86_HPP
--- old/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp 2017-04-18 10:46:05.606576618 -0700
+++ new/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp 2017-04-18 10:46:05.486577088 -0700
@@ -23,6 +23,7 @@
*/
#include "precompiled.hpp"
+#include "assembler_x86.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "c1/c1_Runtime1.hpp"
#include "classfile/systemDictionary.hpp"
@@ -201,6 +202,10 @@
try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
+
+ HEAP_MONITORING(this, noreg, noreg, object_size * HeapWordSize, obj,
+ t1, t2, call(RuntimeAddress(Runtime1::entry_for(
+ Runtime1::heap_object_sample_id))););
}
void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
@@ -277,13 +282,19 @@
// clear rest of allocated space
const Register len_zero = len;
+ // Initialize body destroys arr_size so remember it.
+ push(arr_size);
initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
+ pop(arr_size);
if (CURRENT_ENV->dtrace_alloc_probes()) {
assert(obj == rax, "must be");
call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
}
+ HEAP_MONITORING(this, noreg, arr_size, 0, obj, t1, noreg,
+ call(RuntimeAddress(Runtime1::entry_for(
+ Runtime1::heap_object_sample_id))););
verify_oop(obj);
}
--- old/src/cpu/x86/vm/c1_Runtime1_x86.cpp 2017-04-18 10:46:05.998575086 -0700
+++ new/src/cpu/x86/vm/c1_Runtime1_x86.cpp 2017-04-18 10:46:05.878575555 -0700
@@ -414,7 +414,8 @@
}
static OopMap* save_live_registers(StubAssembler* sasm, int num_rt_args,
- bool save_fpu_registers = true) {
+ bool save_fpu_registers = true,
+ bool do_generate_oop_map = true) {
__ block_comment("save_live_registers");
__ pusha(); // integer registers
@@ -489,7 +490,9 @@
// FPU stack must be empty now
__ verify_FPU(0, "save_live_registers");
- return generate_oop_map(sasm, num_rt_args, save_fpu_registers);
+ return do_generate_oop_map
+ ? generate_oop_map(sasm, num_rt_args, save_fpu_registers)
+ : NULL;
}
@@ -957,6 +960,24 @@
return oop_maps;
}
+static void heap_support_stub(StubAssembler* sasm, Register obj,
+ Register size_in_bytes, int con_size_in_bytes,
+ Register t1, Register t2) {
+ // Usually, when we invoke the sampling methods from within the client
+ // compiler, we do so in a stub. However, sometimes, we are already in a stub
+ // when we want to call these things, and stack trace gathering gets confused
+ // when you call a stub inside another stub.
+ HEAP_MONITORING(sasm, noreg, size_in_bytes, con_size_in_bytes, obj, t1, t2, \
+ { \
+ save_live_registers(sasm, 1, true, false); \
+ __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax)); \
+ __ call(RuntimeAddress(
+ CAST_FROM_FN_PTR(address, \
+ HeapMonitoring::object_alloc_unsized))); \
+ NOT_LP64(__ pop(rax)); \
+ restore_live_registers(sasm); \
+ });
+}
OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
@@ -1042,6 +1063,7 @@
__ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
__ verify_oop(obj);
+ heap_support_stub(sasm, obj, obj_size, 0, t1, t2);
__ pop(rbx);
__ pop(rdi);
__ ret(0);
@@ -1170,8 +1192,12 @@
__ subptr(arr_size, t1); // body length
__ addptr(t1, obj); // body start
if (!ZeroTLAB) {
+ // Initialize body destroys arr_size so remember it.
+ __ push(arr_size);
__ initialize_body(t1, arr_size, 0, t2);
+ __ pop(arr_size);
}
+ heap_support_stub(sasm, obj, arr_size, 0, t1, t2);
__ verify_oop(obj);
__ ret(0);
@@ -1504,6 +1530,22 @@
NOT_LP64(__ pop(rax));
restore_live_registers(sasm);
+ }
+ break;
+
+ case heap_object_sample_id:
+ { // rax,: object
+ StubFrame f(sasm, "heap_object_sample", dont_gc_arguments);
+ // We can't gc here so skip the oopmap but make sure that all
+ // the live registers get saved
+ save_live_registers(sasm, 1);
+
+ __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax));
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+ HeapMonitoring::object_alloc)));
+ NOT_LP64(__ pop(rax));
+
+ restore_live_registers(sasm);
}
break;
--- old/src/cpu/x86/vm/templateTable_x86.cpp 2017-04-18 10:46:06.406573491 -0700
+++ new/src/cpu/x86/vm/templateTable_x86.cpp 2017-04-18 10:46:06.290573944 -0700
@@ -3927,6 +3927,7 @@
// The object is initialized before the header. If the object size is
// zero, go directly to the header initialization.
__ bind(initialize_object);
+ __ movq(rbx, rdx); // Save the size for HeapMonitoring
__ decrement(rdx, sizeof(oopDesc));
__ jcc(Assembler::zero, initialize_header);
@@ -3957,6 +3958,10 @@
// initialize object header only.
__ bind(initialize_header);
+
+ // Restore size for HeapMonitoring
+ __ movq(rdx, rbx);
+
if (UseBiasedLocking) {
__ pop(rcx); // get saved klass back in the register.
__ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
@@ -3977,10 +3982,20 @@
// Trigger dtrace event for fastpath
__ push(atos);
__ call_VM_leaf(
- CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc),
+ rax, rdx);
__ pop(atos);
}
+ HEAP_MONITORING(_masm, noreg, rdx, 0, rax, rcx, noreg, \
+ { \
+ __ push(atos); \
+ __ call_VM_leaf( \
+ CAST_FROM_FN_PTR(address, HeapMonitoring::object_alloc), \
+ rax, rdx); \
+ __ pop(atos); \
+ });
+
__ jmp(done);
}
--- old/src/share/vm/c1/c1_Runtime1.cpp 2017-04-18 10:46:06.834571818 -0700
+++ new/src/share/vm/c1/c1_Runtime1.cpp 2017-04-18 10:46:06.730572224 -0700
@@ -202,6 +202,7 @@
switch (id) {
// These stubs don't need to have an oopmap
case dtrace_object_alloc_id:
+ case heap_object_sample_id:
case g1_pre_barrier_slow_id:
case g1_post_barrier_slow_id:
case slow_subtype_check_id:
--- old/src/share/vm/c1/c1_Runtime1.hpp 2017-04-18 10:46:07.222570300 -0700
+++ new/src/share/vm/c1/c1_Runtime1.hpp 2017-04-18 10:46:07.102570770 -0700
@@ -39,6 +39,7 @@
#define RUNTIME1_STUBS(stub, last_entry) \
stub(dtrace_object_alloc) \
+ stub(heap_object_sample) \
stub(unwind_exception) \
stub(forward_exception) \
stub(throw_range_check_failed) /* throws ArrayIndexOutOfBoundsException */ \
--- old/src/share/vm/gc/shared/collectedHeap.inline.hpp 2017-04-18 10:46:07.610568784 -0700
+++ new/src/share/vm/gc/shared/collectedHeap.inline.hpp 2017-04-18 10:46:07.482569284 -0700
@@ -33,6 +33,7 @@
#include "oops/arrayOop.hpp"
#include "oops/oop.inline.hpp"
#include "prims/jvmtiExport.hpp"
+#include "runtime/heapMonitoring.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/thread.inline.hpp"
#include "services/lowMemoryDetector.hpp"
@@ -81,6 +82,24 @@
SharedRuntime::dtrace_object_alloc(obj, size);
}
}
+
+ if (HeapMonitor) {
+ // support for object alloc event (no-op most of the time)
+ if (klass() != NULL && klass()->name() != NULL) {
+ Thread *base_thread = Thread::current();
+ if (base_thread->is_Java_thread()) {
+ JavaThread *thread = (JavaThread *) base_thread;
+ size_t *bytes_until_sample = thread->bytes_until_sample();
+ size_t size_in_bytes = ((size_t) size) << LogHeapWordSize;
+ assert(size > 0, "positive size");
+ if (*bytes_until_sample < size_in_bytes) {
+ HeapMonitoring::object_alloc_do_sample(thread, obj, size_in_bytes);
+ } else {
+ *bytes_until_sample -= size_in_bytes;
+ }
+ }
+ }
+ }
}
void CollectedHeap::post_allocation_setup_obj(KlassHandle klass,
--- old/src/share/vm/opto/macro.cpp 2017-04-18 10:46:07.978567345 -0700
+++ new/src/share/vm/opto/macro.cpp 2017-04-18 10:46:07.870567767 -0700
@@ -1126,6 +1126,75 @@
}
}
+void PhaseMacroExpand::conditional_sample(Node *should_sample,
+ BoolTest::mask test,
+ float probability,
+ CallLeafNode *call,
+ Node *thread,
+ Node **fast_oop_ctrl,
+ Node **fast_oop_rawmem,
+ Node **fast_oop,
+ Node *size_in_bytes,
+ Node *in_node) {
+ Node* sample_cmp = new CmpXNode(should_sample, _igvn.MakeConX(0));
+ transform_later(sample_cmp);
+
+ Node *sample_bool = new BoolNode(sample_cmp, test);
+ transform_later(sample_bool);
+
+ IfNode *sample_if = new IfNode(*fast_oop_ctrl,
+ sample_bool,
+ probability,
+ COUNT_UNKNOWN);
+ transform_later(sample_if);
+
+ // Slow-path call to sample
+ Node *sample_true = new IfTrueNode(sample_if);
+ transform_later(sample_true);
+
+ // Fast path to no sample
+ Node *sample_false = new IfFalseNode(sample_if);
+ transform_later(sample_false);
+
+ // Create postdominators for both the control and data flow paths.
+ Node *sample_region = new RegionNode(3);
+ Node *sample_phi_rawmem = new PhiNode(sample_region,
+ Type::MEMORY,
+ TypeRawPtr::BOTTOM);
+
+ sample_region->init_req(1, sample_false);
+ sample_phi_rawmem->init_req(1, *fast_oop_rawmem);
+
+ // Invoke the sampling method on the slow path.
+ int size = TypeFunc::Parms + 2;
+
+ call->init_req(TypeFunc::Parms+0, thread);
+ call->init_req(TypeFunc::Parms+1, *fast_oop);
+ call->init_req(TypeFunc::Parms+2, size_in_bytes);
+#ifdef _LP64
+ // The size is TypeX, so in a 64-bit JVM this a long, and we need
+ // // a second, dummy argument (an idiosyncracy of C2).
+ call->init_req(TypeFunc::Parms+3, C->top());
+#endif
+ call->init_req( TypeFunc::Control, sample_true);
+ call->init_req( TypeFunc::I_O , top()); // does no i/o
+ call->init_req( TypeFunc::Memory , *fast_oop_rawmem );
+ call->init_req( TypeFunc::ReturnAdr, in_node->in(TypeFunc::ReturnAdr));
+ call->init_req( TypeFunc::FramePtr, in_node->in(TypeFunc::FramePtr));
+ transform_later(call);
+ Node *sample_oop_rawmem = new ProjNode(call, TypeFunc::Memory);
+ transform_later(sample_oop_rawmem);
+
+ // Tie the slow path to the postdominating node.
+ sample_region->init_req(2, sample_true);
+ sample_phi_rawmem->init_req(2, sample_oop_rawmem);
+ transform_later(sample_region);
+
+ *fast_oop_ctrl = sample_region;
+ *fast_oop_rawmem = sample_phi_rawmem;
+ transform_later(*fast_oop_rawmem);
+}
+
bool PhaseMacroExpand::eliminate_allocate_node(AllocateNode *alloc) {
// Don't do scalar replacement if the frame can be popped by JVMTI:
// if reallocation fails during deoptimization we'll pop all
@@ -1636,6 +1705,60 @@
transform_later(fast_oop_rawmem);
}
+ if (HeapMonitor) {
+ // Inlined version of HeapMonitoring::object_alloc_base
+ // Get base of thread-local storage area
+ Node* thread = new ThreadLocalNode();
+ transform_later(thread);
+
+ ByteSize sample_offset = JavaThread::bytes_until_sample_offset();
+
+ // Do test to see if we should sample.
+ // Get bytes_until_sample from thread local storage.
+ Node *bytes_until_sample = make_load(fast_oop_ctrl,
+ fast_oop_rawmem,
+ thread,
+ in_bytes(sample_offset),
+ TypeX_X,
+ TypeX_X->basic_type());
+
+ // new_bytes_until_sample = bytes_until_sample - size_in_bytes
+ Node *new_bytes_until_sample =
+ new SubXNode(bytes_until_sample, size_in_bytes);
+ transform_later(new_bytes_until_sample);
+
+ // bytes_until_sample = new_bytes_until_sample;
+ fast_oop_rawmem = make_store(fast_oop_ctrl,
+ fast_oop_rawmem,
+ thread,
+ in_bytes(sample_offset),
+ new_bytes_until_sample,
+ TypeX_X->basic_type());
+
+ // Call to make if sampling succeeds
+ int size = TypeFunc::Parms + 2;
+ CallLeafNode *call = new CallLeafNode(
+ OptoRuntime::heap_object_alloc_Type(),
+ CAST_FROM_FN_PTR(address,
+ HeapMonitoring::object_alloc_do_sample),
+ "object_alloc_do_sample",
+ TypeRawPtr::BOTTOM);
+
+ // if (new_bytes_until_sample < 0)
+ conditional_sample(new_bytes_until_sample,
+ BoolTest::le,
+ // Probability
+ // ~1/10000
+ PROB_UNLIKELY_MAG(4),
+ call,
+ thread,
+ &fast_oop_ctrl,
+ &fast_oop_rawmem,
+ &fast_oop,
+ size_in_bytes,
+ alloc);
+ }
+
// Plug in the successful fast-path into the result merge point
result_region ->init_req(fast_result_path, fast_oop_ctrl);
result_phi_rawoop->init_req(fast_result_path, fast_oop);
--- old/src/share/vm/opto/macro.hpp 2017-04-18 10:46:08.390565734 -0700
+++ new/src/share/vm/opto/macro.hpp 2017-04-18 10:46:08.274566187 -0700
@@ -66,6 +66,19 @@
Node* make_store(Node* ctl, Node* mem, Node* base, int offset,
Node* value, BasicType bt);
+ // For Heap-related sampling - will generate code to invoke call()
+ // if the given sampling parameters are true.
+ void conditional_sample(Node *should_sample,
+ BoolTest::mask test,
+ float probability,
+ CallLeafNode *call,
+ Node *thread,
+ Node **fast_oop_ctrl,
+ Node **fast_oop_rawmem,
+ Node **fast_oop,
+ Node* size_in_bytes,
+ Node *in_node);
+
// projections extracted from a call node
ProjNode *_fallthroughproj;
ProjNode *_fallthroughcatchproj;
--- old/src/share/vm/opto/runtime.cpp 2017-04-18 10:46:08.782564201 -0700
+++ new/src/share/vm/opto/runtime.cpp 2017-04-18 10:46:08.662564670 -0700
@@ -1558,6 +1558,28 @@
return TypeFunc::make(domain,range);
}
+const TypeFunc *OptoRuntime::heap_object_alloc_Type() {
+ // Keep it separate so that we don't have to worry if they change it.
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(3 LP64_ONLY( + 1));
+
+ // Thread-local storage
+ fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM;
+ // oop; newly allocated object
+ fields[TypeFunc::Parms+1] = TypeInstPtr::NOTNULL;
+ // byte size of object
+ fields[TypeFunc::Parms+2] = TypeX_X;
+ // other half of long length
+ LP64_ONLY(fields[TypeFunc::Parms+3] = Type::HALF);
+
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+4, fields);
+ // create result type (range)
+ fields = TypeTuple::fields(0);
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+ return TypeFunc::make(domain, range);
+}
JRT_ENTRY_NO_ASYNC(void, OptoRuntime::register_finalizer(oopDesc* obj, JavaThread* thread))
assert(obj->is_oop(), "must be a valid oop");
--- old/src/share/vm/opto/runtime.hpp 2017-04-18 10:46:09.182562637 -0700
+++ new/src/share/vm/opto/runtime.hpp 2017-04-18 10:46:09.062563106 -0700
@@ -329,6 +329,9 @@
static const TypeFunc* dtrace_method_entry_exit_Type();
static const TypeFunc* dtrace_object_alloc_Type();
+ // Heap sampling support
+ static const TypeFunc* heap_object_alloc_Type();
+
private:
static NamedCounter * volatile _named_counters;
--- old/src/share/vm/prims/forte.cpp 2017-04-18 10:46:09.558561167 -0700
+++ new/src/share/vm/prims/forte.cpp 2017-04-18 10:46:09.434561652 -0700
@@ -35,19 +35,6 @@
#include "runtime/vframe.hpp"
#include "runtime/vframeArray.hpp"
-// call frame copied from old .h file and renamed
-typedef struct {
- jint lineno; // line number in the source file
- jmethodID method_id; // method executed in this frame
-} ASGCT_CallFrame;
-
-// call trace copied from old .h file and renamed
-typedef struct {
- JNIEnv *env_id; // Env where trace was recorded
- jint num_frames; // number of frames in this trace
- ASGCT_CallFrame *frames; // frames
-} ASGCT_CallTrace;
-
// These name match the names reported by the forte quality kit
enum {
ticks_no_Java_frame = 0,
--- old/src/share/vm/prims/forte.hpp 2017-04-18 10:46:09.942559665 -0700
+++ new/src/share/vm/prims/forte.hpp 2017-04-18 10:46:09.830560104 -0700
@@ -34,4 +34,20 @@
// register internal VM stub
};
+// call frame copied from old .h file and renamed
+typedef struct {
+ jint lineno; // line number in the source file
+ jmethodID method_id; // method executed in this frame
+} ASGCT_CallFrame;
+
+// call trace copied from old .h file and renamed
+typedef struct {
+ JNIEnv *env_id; // Env where trace was recorded
+ jint num_frames; // number of frames in this trace
+ ASGCT_CallFrame *frames; // frames
+} ASGCT_CallTrace;
+
+extern "C"
+void AsyncGetCallTrace(ASGCT_CallTrace *trace, jint depth, void* ucontext);
+
#endif // SHARE_VM_PRIMS_FORTE_HPP
--- old/src/share/vm/prims/jvmti.xml 2017-04-18 10:46:10.310558227 -0700
+++ new/src/share/vm/prims/jvmti.xml 2017-04-18 10:46:10.202558649 -0700
@@ -11466,6 +11466,81 @@
+
+ Start Heap Sampling
+
+ Start the heap sampler in the JVM. The function provides, via its argument, the sampling
+ rate requested and will fill internal data structures with heap allocation samples. The
+ samples are obtained via the function.
+
+ new
+
+
+
+
+
+
+ The monitoring period used for sampling. The sampler will use a statistical approach to
+ provide in average sampling every allocated bytes.
+
+
+
+
+
+ is less than zero.
+
+
+
+
+
+
+ Get Live Traces
+
+
+
+
+ TODO(jcbeyler): Not sure if we should declare the type as non void and make a JVMTI type here.
+ It would be ASGCT_CallTrace and then would include a pointer to ASGCT_CallFrame. Would we refactor that code?
+
+
+
+
+
+ The size of the object allocation.
+
+
+
+
+
+ The thread id number.
+
+
+
+
+ Get Live Heap Sampled traces. The fields of the
+ structure are filled in with details of the specified sampled allocation.
+
+ new
+
+
+
+
+ jvmtiStackTraceData
+
+ The stack trace array to be filled.
+
+
+
+
+
+ On output, size of the array returned via the first parameter.
+
+
+
+
+
+
+
--- old/src/share/vm/prims/jvmtiEnv.cpp 2017-04-18 10:46:10.962555678 -0700
+++ new/src/share/vm/prims/jvmtiEnv.cpp 2017-04-18 10:46:10.842556147 -0700
@@ -46,6 +46,7 @@
#include "prims/jvmtiCodeBlobEvents.hpp"
#include "prims/jvmtiExtensions.hpp"
#include "prims/jvmtiGetLoadedClasses.hpp"
+#include "prims/jvmtiHeapTransition.hpp"
#include "prims/jvmtiImpl.hpp"
#include "prims/jvmtiManageCapabilities.hpp"
#include "prims/jvmtiRawMonitor.hpp"
@@ -1935,6 +1936,30 @@
return JVMTI_ERROR_NONE;
} /* end IterateOverInstancesOfClass */
+// Start the sampler.
+jvmtiError
+JvmtiEnv::StartHeapSampling(jint monitoring_period) {
+ if (monitoring_period < 0) {
+ return JVMTI_ERROR_ILLEGAL_ARGUMENT;
+ }
+
+ HeapMonitor = true;
+ HeapThreadTransition htt(Thread::current());
+ HeapMonitoring::initialize_profiling(monitoring_period);
+ return JVMTI_ERROR_NONE;
+} /* end StartHeapSampling */
+
+// Get the currently live sampled allocations.
+jvmtiError
+JvmtiEnv::GetLiveTraces(jvmtiStackTraceData **stack_traces, jint *num_traces) {
+ HeapThreadTransition htt(Thread::current());
+ if (stack_traces == NULL || num_traces == NULL) {
+ return JVMTI_ERROR_ILLEGAL_ARGUMENT;
+ }
+
+ HeapMonitoring::get_live_traces(stack_traces, num_traces);
+ return JVMTI_ERROR_NONE;
+} /* end GetLiveTraces */
//
// Local Variable functions
--- old/src/share/vm/runtime/arguments.cpp 2017-04-18 10:46:11.414553910 -0700
+++ new/src/share/vm/runtime/arguments.cpp 2017-04-18 10:46:11.286554411 -0700
@@ -4620,6 +4620,10 @@
}
#endif
+#if !(defined(X86) || defined(PPC64))
+ HeapMonitor = false;
+#endif
+
return JNI_OK;
}
--- old/src/share/vm/runtime/globals.hpp 2017-04-18 10:46:11.882552081 -0700
+++ new/src/share/vm/runtime/globals.hpp 2017-04-18 10:46:11.750552596 -0700
@@ -4082,7 +4082,13 @@
diagnostic(bool, CompilerDirectivesPrint, false, \
"Print compiler directives on installation.") \
diagnostic(int, CompilerDirectivesLimit, 50, \
- "Limit on number of compiler directives.")
+ "Limit on number of compiler directives.") \
+ product(bool, HeapMonitor, false, \
+ "Enable heap monitoring.") \
+ product(int, HeapMonitorRate, (1<<19), \
+ "Heap monitoring rate.") \
+ product(uintx, MaxHeapTraces, 200, \
+ "Maximum number of traces kept by the heap monitoring.") \
/*
--- old/src/share/vm/runtime/init.cpp 2017-04-18 10:46:12.326550344 -0700
+++ new/src/share/vm/runtime/init.cpp 2017-04-18 10:46:12.210550798 -0700
@@ -32,6 +32,8 @@
#include "prims/methodHandles.hpp"
#include "runtime/globals.hpp"
#include "runtime/handles.inline.hpp"
+#include "runtime/heapMonitoring.hpp"
+#include "prims/jvmtiHeapTransition.hpp"
#include "runtime/icache.hpp"
#include "runtime/init.hpp"
#include "runtime/safepoint.hpp"
@@ -155,6 +157,11 @@
CommandLineFlags::printFlags(tty, false, PrintFlagsRanges);
}
+ if (HeapMonitor) {
+ fprintf(stderr, "Starting sampling with rate %d\n", HeapMonitorRate);
+ HeapThreadTransition htt(Thread::current());
+ HeapMonitoring::initialize_profiling(HeapMonitorRate);
+ }
return JNI_OK;
}
--- old/src/share/vm/runtime/thread.cpp 2017-04-18 10:46:12.726548780 -0700
+++ new/src/share/vm/runtime/thread.cpp 2017-04-18 10:46:12.586549328 -0700
@@ -1480,6 +1480,7 @@
_do_not_unlock_if_synchronized = false;
_cached_monitor_info = NULL;
_parker = Parker::Allocate(this);
+ _bytes_until_sample = 0;
#ifndef PRODUCT
_jmp_ring_index = 0;
--- old/src/share/vm/runtime/thread.hpp 2017-04-18 10:46:13.186546982 -0700
+++ new/src/share/vm/runtime/thread.hpp 2017-04-18 10:46:13.070547435 -0700
@@ -815,6 +815,9 @@
JavaFrameAnchor _anchor; // Encapsulation of current java frame and it state
+ size_t _bytes_until_sample; // Thread local counter to determine when to sample
+ // allocations.
+
ThreadFunction _entry_point;
JNIEnv _jni_environment;
@@ -1102,6 +1105,9 @@
address last_Java_pc(void) { return _anchor.last_Java_pc(); }
+ // Bytes until next heap sample.
+ size_t* bytes_until_sample() { return &_bytes_until_sample; }
+
// Safepoint support
#if !(defined(PPC64) || defined(AARCH64))
JavaThreadState thread_state() const { return _thread_state; }
@@ -1554,6 +1560,7 @@
static ByteSize frame_anchor_offset() {
return byte_offset_of(JavaThread, _anchor);
}
+ static ByteSize bytes_until_sample_offset() { return byte_offset_of(JavaThread, _bytes_until_sample); }
static ByteSize callee_target_offset() { return byte_offset_of(JavaThread, _callee_target); }
static ByteSize vm_result_offset() { return byte_offset_of(JavaThread, _vm_result); }
static ByteSize vm_result_2_offset() { return byte_offset_of(JavaThread, _vm_result_2); }
--- /dev/null 2017-04-17 13:03:13.666114673 -0700
+++ new/src/share/vm/prims/jvmtiHeapTransition.hpp 2017-04-18 10:46:13.470545871 -0700
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2017, Google and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_PRIMS_JVMTIHEAPSAMPLING_HPP
+#define SHARE_VM_PRIMS_JVMTIHEAPSAMPLING_HPP
+
+// TODO(jcbeyler): is there a better/standard JVM way of doing this?
+// A RAII class that handles transitions from the agent into the VM.
+class HeapThreadTransition : StackObj {
+ private:
+ JavaThreadState _saved_state;
+ JavaThread *_jthread;
+
+ public:
+ // Transitions this thread from the agent (thread_in_native) to the VM.
+ HeapThreadTransition(Thread *thread) {
+ if (thread->is_Java_thread()) {
+ _jthread = (JavaThread *)thread;
+ _saved_state = _jthread->thread_state();
+ if (_saved_state == _thread_in_native) {
+ ThreadStateTransition::transition_from_native(_jthread, _thread_in_vm);
+ } else {
+ ThreadStateTransition::transition(_jthread,
+ _saved_state,
+ _thread_in_vm);
+ }
+ } else {
+ _jthread = NULL;
+ _saved_state = _thread_new;
+ }
+ }
+
+ // Transitions this thread back to the agent from the VM.
+ ~HeapThreadTransition() {
+ if (_jthread != NULL) {
+ ThreadStateTransition::transition(_jthread, _thread_in_vm, _saved_state);
+ }
+ }
+};
+
+#endif // SHARE_VM_PRIMS_JVMTIHEAPSAMPLING_HPP
--- /dev/null 2017-04-17 13:03:13.666114673 -0700
+++ new/src/share/vm/runtime/heapMonitoring.cpp 2017-04-18 10:46:13.818544511 -0700
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 2017, Google and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "prims/forte.hpp"
+#include "runtime/heapMonitoring.hpp"
+
+const int kMaxStackDepth = 64;
+
+// The resulting data, as they appear to the client.
+// TODO(jcbeyler): should we make this into a JVMTI structure?
+struct StackTraceData {
+ ASGCT_CallTrace *trace;
+ intx byte_size;
+ jlong thread_id;
+
+ StackTraceData(ASGCT_CallTrace *t, intx size, jlong tid) : trace(t),
+ byte_size(size), thread_id(tid) {}
+};
+
+// RAII class that acquires / releases lock
+class MuxLocker {
+ private:
+ volatile intptr_t *_lock;
+ const char *_name;
+ public:
+ MuxLocker(volatile intptr_t *lock, const char *name) :
+ _lock(lock),
+ _name(name) {
+ Thread::muxAcquire(lock, name);
+ }
+ ~MuxLocker() {
+ Thread::muxRelease(_lock);
+ }
+};
+
+// Each object that we profile is stored as trace with the thread_id.
+class StackTraceStorage {
+ public:
+ // The function that gets called to add a trace to the list of
+ // traces we are maintaining. trace is the stacktrace, and thread
+ // is the thread that did the allocation.
+ void add_trace(ASGCT_CallTrace *trace, intx byte_size, Thread *thread);
+
+ // The function that gets called by the client to retrieve the list
+ // of stack traces. Passes (by reference) a pointer to a list of
+ // traces, and a number of traces, both of which will get mutated by
+ // the function being called.
+ void get_all_stack_traces(jvmtiStackTraceData **traces, jint *num_traces);
+
+ ~StackTraceStorage();
+ StackTraceStorage();
+
+ // The global storage. Not a global static because
+ // StackTraceStorage isn't available at module-loading time.
+ static StackTraceStorage *storage() {
+ static StackTraceStorage storage;
+ return &storage;
+ }
+
+ // Protects the traces currently sampled (below).
+ volatile intptr_t _allocated_traces_lock[1];
+
+ // The current allocated traces. A fixed-size ring buffer.
+ // This is a temporay fix until the GC handlers are in place. Then this
+ // becomes a growable array that is emptied as elements get garbage
+ // collected.
+ StackTraceData** _allocated_traces;
+
+ // Maximum size of the allocation.
+ size_t _allocated_traces_size;
+
+ // TODO(jcbeyler): remove extra code that is here for statistics...
+ size_t _allocated_count;
+
+ // The current position in _allocated_traces (above);
+ // This is a temporay fix until the GC handlers are in place. Then this
+ // becomes a growable array that is emptied as elements get garbage
+ // collected.
+ int _allocated_traces_pos;
+
+ private:
+ // Support functions and classes for copying data to the external
+ // world.
+ class StackTraceDataCopier {
+ public:
+ virtual int size() const = 0;
+ virtual StackTraceData *get(int i) const = 0;
+ };
+
+ class LiveStackTraceDataCopier : public StackTraceDataCopier {
+ public:
+ LiveStackTraceDataCopier(StackTraceData **data, int size) :
+ _data(data), _size(size) {}
+ int size() const { return _size; }
+ StackTraceData *get(int i) const { return _data[i]; }
+
+ private:
+ StackTraceData **_data;
+ int _size;
+ };
+
+ // Copies jvmtiStackTraceData from to jvmtiStackTraceData to
+ bool deep_copy(jvmtiStackTraceData *to, StackTraceData *from);
+
+ // Creates a deep copy of the list of StackTraceData
+ void copy_stack_traces(const StackTraceDataCopier &copier,
+ jvmtiStackTraceData **traces,
+ jint *num_traces);
+};
+
+// Statics for Sampler
+double HeapMonitoring::_log_table[1 << kFastlogNumBits];
+
+bool HeapMonitoring::_initialized = false;
+
+jint HeapMonitoring::_monitoring_period;
+
+// Cheap random number generator
+uint64_t HeapMonitoring::_rnd;
+
+StackTraceStorage::StackTraceStorage() :
+ _allocated_traces(new StackTraceData*[MaxHeapTraces]),
+ _allocated_traces_size(MaxHeapTraces),
+ _allocated_traces_pos(0),
+ _allocated_count(0) {
+ memset(_allocated_traces, 0, sizeof(*_allocated_traces) * MaxHeapTraces);
+ _allocated_traces_lock[0] = 0;
+}
+
+StackTraceStorage::~StackTraceStorage() {
+ delete[] _allocated_traces;
+}
+
+void StackTraceStorage::add_trace(ASGCT_CallTrace *trace,
+ intx byte_size,
+ Thread *thread) {
+ StackTraceData *new_data =
+ new StackTraceData(trace, byte_size, SharedRuntime::get_java_tid(thread));
+
+ MuxLocker mu(_allocated_traces_lock, "StackTraceStorage::add_trace");
+ StackTraceData *current_allocated_trace =
+ _allocated_traces[_allocated_traces_pos];
+ if (current_allocated_trace != NULL) {
+ delete current_allocated_trace;
+ }
+ _allocated_traces[_allocated_traces_pos] = new_data;
+ _allocated_traces_pos = (_allocated_traces_pos + 1) % _allocated_traces_size;
+ _allocated_count++;
+}
+
+bool StackTraceStorage::deep_copy(jvmtiStackTraceData *to,
+ StackTraceData *from) {
+ to->thread_id = from->thread_id;
+ to->size = from->byte_size;
+
+ ASGCT_CallTrace *dest = reinterpret_cast(
+ os::malloc(sizeof(ASGCT_CallTrace), mtInternal));
+ if (dest == NULL) {
+ return false;
+ }
+ to->trace = dest;
+
+ const ASGCT_CallTrace *src =
+ reinterpret_cast(from->trace);
+ *dest = *src;
+
+ dest->frames = reinterpret_cast(
+ os::malloc(sizeof(ASGCT_CallFrame) * kMaxStackDepth, mtInternal));
+ if (dest->frames == NULL) {
+ return false;
+ }
+ memcpy(dest->frames,
+ src->frames,
+ sizeof(ASGCT_CallFrame) * kMaxStackDepth);
+ return true;
+}
+
+// Called by the outside world; returns a copy of the stack traces
+// (because we could be replacing them as the user handles them).
+// The array is secretly null-terminated (to make it easier to reclaim).
+void StackTraceStorage::get_all_stack_traces(jvmtiStackTraceData **traces,
+ jint *num_traces) {
+ LiveStackTraceDataCopier copier(_allocated_traces, _allocated_traces_size);
+ copy_stack_traces(copier, traces, num_traces);
+}
+
+void StackTraceStorage::copy_stack_traces(const StackTraceDataCopier &copier,
+ jvmtiStackTraceData **traces,
+ jint *num_traces) {
+ MuxLocker mu(_allocated_traces_lock, "StackTraceStorage::copy_stack_traces");
+ int len = copier.size();
+ // TODO(jcbeyler): +2 -> +1 for len :remove this extra code handling the extra trace for
+ // counting, it is for statistics.
+ // Create a new array to store the StackTraceData objects.
+ jvmtiStackTraceData *t = reinterpret_cast(
+ os::malloc((len + 2) * sizeof(*t), mtInternal));
+ if (t == NULL) {
+ *traces = NULL;
+ *num_traces = 0;
+ return;
+ }
+ // TODO(jcbeyler): +2 -> +1 for len :remove this extra code handling the extra trace for
+ // counting, it is for statistics.
+ // +1 to have a NULL at the end of the array.
+ memset(t, 0, (len + 2) * sizeof(*t));
+
+ // TODO(jcbeyler): remove this extra code handling the extra trace for
+ // counting, it is for statistics.
+ jvmtiStackTraceData *to = &t[0];
+ to->size = _allocated_count;
+
+ // Copy the StackTraceData objects into the new array.
+ int trace_count = 1;
+ for (int i = 0; i < len; i++) {
+ StackTraceData *stack_trace = copier.get(i);
+ if (stack_trace != NULL && stack_trace->trace != NULL) {
+ jvmtiStackTraceData *to = &t[trace_count];
+ if (!deep_copy(to, stack_trace)) {
+ continue;
+ }
+ trace_count++;
+ }
+ }
+
+ *traces = t;
+ *num_traces = trace_count;
+}
+
+void HeapMonitoring::get_live_traces(jvmtiStackTraceData **traces,
+ jint *num_traces) {
+ StackTraceStorage::storage()->get_all_stack_traces(traces, num_traces);
+}
+
+// TODO(jcbeyler): find out if the algorithm for determining the sampling can be
+// upstreamed.
+void HeapMonitoring::initialize_profiling(jint monitoring_period) {
+ _monitoring_period = monitoring_period;
+
+ // Populate the lookup table for fast_log2.
+ // This approximates the log2 curve with a step function.
+ // Steps have height equal to log2 of the mid-point of the step.
+ for (int i = 0; i < (1 << kFastlogNumBits); i++) {
+ double half_way = static_cast(i + 0.5);
+ _log_table[i] = (log(1.0 + half_way / (1 << kFastlogNumBits)) / log(2.0));
+ }
+
+ JavaThread *t = reinterpret_cast(Thread::current());
+ _rnd = static_cast(reinterpret_cast(t));
+ if (_rnd == 0) {
+ _rnd = 1;
+ }
+ for (int i = 0; i < 20; i++) {
+ _rnd = next_random(_rnd);
+ }
+ _initialized = true;
+}
+
+// Generates a geometric variable with the specified mean (512K by default).
+// This is done by generating a random number between 0 and 1 and applying
+// the inverse cumulative distribution function for an exponential.
+// Specifically: Let m be the inverse of the sample period, then
+// the probability distribution function is m*exp(-mx) so the CDF is
+// p = 1 - exp(-mx), so
+// q = 1 - p = exp(-mx)
+// log_e(q) = -mx
+// -log_e(q)/m = x
+// log_2(q) * (-log_e(2) * 1/m) = x
+// In the code, q is actually in the range 1 to 2**26, hence the -26 below
+void HeapMonitoring::pick_next_sample(JavaThread *t) {
+ _rnd = next_random(_rnd);
+ // Take the top 26 bits as the random number
+ // (This plus a 1<<58 sampling bound gives a max possible step of
+ // 5194297183973780480 bytes. In this case,
+ // for sample_parameter = 1<<19, max possible step is
+ // 9448372 bytes (24 bits).
+ const uint64_t prng_mod_power = 48; // Number of bits in prng
+ // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
+ // under piii debug for some binaries.
+ double q = static_cast(_rnd >> (prng_mod_power - 26)) + 1.0;
+ // Put the computed p-value through the CDF of a geometric.
+ // For faster performance (save ~1/20th exec time), replace
+ // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705)
+ // The value 26.000705 is used rather than 26 to compensate
+ // for inaccuracies in FastLog2 which otherwise result in a
+ // negative answer.
+ size_t *bytes_until_sample = t->bytes_until_sample();
+ double log_val = (fast_log2(q) - 26);
+ *bytes_until_sample = static_cast(
+ (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (_monitoring_period)) + 1);
+}
+
+// Called from the interpreter and C1
+void HeapMonitoring::object_alloc_unsized(oopDesc* o) {
+ JavaThread *thread = reinterpret_cast(Thread::current());
+ assert(o->size() << LogHeapWordSize == byte_size,
+ "Object size is incorrect.");
+ object_alloc_do_sample(thread, o, o->size() << LogHeapWordSize);
+}
+
+void HeapMonitoring::object_alloc(oopDesc* o, intx byte_size) {
+ JavaThread *thread = reinterpret_cast(Thread::current());
+ object_alloc_do_sample(thread, o, byte_size);
+}
+
+// Called directly by C2
+void HeapMonitoring::object_alloc_do_sample(Thread *t, oopDesc *o, intx byte_size) {
+#if defined(X86) || defined(PPC)
+ JavaThread *thread = reinterpret_cast(t);
+ size_t *bytes_until_sample = thread->bytes_until_sample();
+ if (_initialized) {
+ // TODO(jcbeyler): what about this?
+ assert(t->is_Java_thread(), "non-Java thread passed to do_sample");
+ JavaThread *thread = reinterpret_cast(t);
+
+ pick_next_sample(thread);
+
+ ASGCT_CallTrace *trace = NEW_C_HEAP_OBJ(ASGCT_CallTrace, mtInternal);
+ if (trace == NULL) {
+ return;
+ }
+
+ ASGCT_CallFrame *frames =
+ NEW_C_HEAP_ARRAY(ASGCT_CallFrame, kMaxStackDepth, mtInternal);
+ if (frames == NULL) {
+ FreeHeap(reinterpret_cast(trace));
+ return;
+ }
+
+ trace->frames = frames;
+ trace->env_id = (JavaThread::current())->jni_environment();
+
+ ucontext_t uc;
+ if (!getcontext(&uc)) {
+#if defined(IA32)
+ // On Linux/x86 (but not x64), AsyncGetCallTrace/JVM reads the
+ // stack pointer from the REG_UESP field (as opposed to the
+ // REG_ESP field). The kernel sets both the REG_UESP and REG_ESP
+ // fields to the correct stack pointer for the ucontexts passed to
+ // signal handlers. However, getcontext() sets only REG_ESP,
+ // leaving REG_UESP uninitialized. Since there is no way to
+ // distinguish where a ucontext_t came from, copy from REG_ESP to
+ // REG_UESP so that AGCT will read the right stack pointer.
+ uc.uc_mcontext.gregs[REG_UESP] = uc.uc_mcontext.gregs[REG_ESP];
+#endif
+
+ AsyncGetCallTrace(trace, kMaxStackDepth, &uc);
+
+ if (trace->num_frames > 0) {
+ // Success!
+ StackTraceStorage::storage()->add_trace(trace, byte_size, thread);
+ return;
+ }
+ }
+ // Failure!
+ FREE_C_HEAP_ARRAY(ASGCT_CallFrame, trace->frames);
+ FreeHeap(reinterpret_cast(trace));
+ return;
+ } else {
+ // There is something like 64K worth of allocation before the VM
+ // initializes. This is just in the interests of not slowing down
+ // startup.
+ assert(t->is_Java_thread(), "non-Java thread passed to do_sample");
+ JavaThread *thread = reinterpret_cast(t);
+ *(thread->bytes_until_sample()) = 65536;
+ }
+#else
+ Unimplemented();
+#endif
+}
--- /dev/null 2017-04-17 13:03:13.666114673 -0700
+++ new/src/share/vm/runtime/heapMonitoring.hpp 2017-04-18 10:46:14.166543150 -0700
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017, Google and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_HEAPMONITORING_HPP
+#define SHARE_VM_RUNTIME_HEAPMONITORING_HPP
+
+#include "gc/shared/referenceProcessor.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+// Support class for sampling heap allocations across the VM.
+class HeapMonitoring {
+ private:
+ // Cheap random number generator
+ static uint64_t _rnd;
+ static bool _initialized;
+ static jint _monitoring_period;
+
+ // Statics for the fast log
+ static const int kFastlogNumBits = 10;
+ static const int kFastlogMask = (1 << kFastlogNumBits) - 1;
+ static double _log_table[1<(0)) << prng_mod_power);
+ return (prng_mult * rnd + prng_add) & prng_mod_mask;
+ }
+
+ // TODO(jcbeyler): is this algorithm acceptable in open source?
+ // Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer
+ // This mimics the VeryFastLog2 code in those files
+ static inline double fast_log2(const double & d) {
+ assert(d>0, "bad value passed to assert");
+ uint64_t x = 0;
+ memcpy(&x, &d, sizeof(uint64_t));
+ const uint32_t x_high = x >> 32;
+ const uint32_t y = x_high >> (20 - kFastlogNumBits) & kFastlogMask;
+ const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
+ return exponent + _log_table[y];
+ }
+
+ public:
+ static void get_live_traces(jvmtiStackTraceData** stack_traces, jint* num_traces);
+ static void initialize_profiling(jint monitoring_period);
+
+ // Called when o is allocated, called by interpreter and C1.
+ static void object_alloc_unsized(oopDesc* o);
+ static void object_alloc(oopDesc* o, intx byte_size);
+
+ // Called when o is allocated from C2 directly,
+ // we know the thread, and we have done the sampling.
+ static void object_alloc_do_sample(Thread *t, oopDesc *o, intx size_in_bytes);
+};
+
+#endif // SHARE_VM_RUNTIME_HEAPMONITORING_HPP