--- old/src/share/vm/opto/macro.cpp 2017-04-18 10:46:07.978567345 -0700 +++ new/src/share/vm/opto/macro.cpp 2017-04-18 10:46:07.870567767 -0700 @@ -1126,6 +1126,75 @@ } } +void PhaseMacroExpand::conditional_sample(Node *should_sample, + BoolTest::mask test, + float probability, + CallLeafNode *call, + Node *thread, + Node **fast_oop_ctrl, + Node **fast_oop_rawmem, + Node **fast_oop, + Node *size_in_bytes, + Node *in_node) { + Node* sample_cmp = new CmpXNode(should_sample, _igvn.MakeConX(0)); + transform_later(sample_cmp); + + Node *sample_bool = new BoolNode(sample_cmp, test); + transform_later(sample_bool); + + IfNode *sample_if = new IfNode(*fast_oop_ctrl, + sample_bool, + probability, + COUNT_UNKNOWN); + transform_later(sample_if); + + // Slow-path call to sample + Node *sample_true = new IfTrueNode(sample_if); + transform_later(sample_true); + + // Fast path to no sample + Node *sample_false = new IfFalseNode(sample_if); + transform_later(sample_false); + + // Create postdominators for both the control and data flow paths. + Node *sample_region = new RegionNode(3); + Node *sample_phi_rawmem = new PhiNode(sample_region, + Type::MEMORY, + TypeRawPtr::BOTTOM); + + sample_region->init_req(1, sample_false); + sample_phi_rawmem->init_req(1, *fast_oop_rawmem); + + // Invoke the sampling method on the slow path. + int size = TypeFunc::Parms + 2; + + call->init_req(TypeFunc::Parms+0, thread); + call->init_req(TypeFunc::Parms+1, *fast_oop); + call->init_req(TypeFunc::Parms+2, size_in_bytes); +#ifdef _LP64 + // The size is TypeX, so in a 64-bit JVM this a long, and we need + // // a second, dummy argument (an idiosyncracy of C2). + call->init_req(TypeFunc::Parms+3, C->top()); +#endif + call->init_req( TypeFunc::Control, sample_true); + call->init_req( TypeFunc::I_O , top()); // does no i/o + call->init_req( TypeFunc::Memory , *fast_oop_rawmem ); + call->init_req( TypeFunc::ReturnAdr, in_node->in(TypeFunc::ReturnAdr)); + call->init_req( TypeFunc::FramePtr, in_node->in(TypeFunc::FramePtr)); + transform_later(call); + Node *sample_oop_rawmem = new ProjNode(call, TypeFunc::Memory); + transform_later(sample_oop_rawmem); + + // Tie the slow path to the postdominating node. + sample_region->init_req(2, sample_true); + sample_phi_rawmem->init_req(2, sample_oop_rawmem); + transform_later(sample_region); + + *fast_oop_ctrl = sample_region; + *fast_oop_rawmem = sample_phi_rawmem; + transform_later(*fast_oop_rawmem); +} + bool PhaseMacroExpand::eliminate_allocate_node(AllocateNode *alloc) { // Don't do scalar replacement if the frame can be popped by JVMTI: // if reallocation fails during deoptimization we'll pop all @@ -1636,6 +1705,60 @@ transform_later(fast_oop_rawmem); } + if (HeapMonitor) { + // Inlined version of HeapMonitoring::object_alloc_base + // Get base of thread-local storage area + Node* thread = new ThreadLocalNode(); + transform_later(thread); + + ByteSize sample_offset = JavaThread::bytes_until_sample_offset(); + + // Do test to see if we should sample. + // Get bytes_until_sample from thread local storage. + Node *bytes_until_sample = make_load(fast_oop_ctrl, + fast_oop_rawmem, + thread, + in_bytes(sample_offset), + TypeX_X, + TypeX_X->basic_type()); + + // new_bytes_until_sample = bytes_until_sample - size_in_bytes + Node *new_bytes_until_sample = + new SubXNode(bytes_until_sample, size_in_bytes); + transform_later(new_bytes_until_sample); + + // bytes_until_sample = new_bytes_until_sample; + fast_oop_rawmem = make_store(fast_oop_ctrl, + fast_oop_rawmem, + thread, + in_bytes(sample_offset), + new_bytes_until_sample, + TypeX_X->basic_type()); + + // Call to make if sampling succeeds + int size = TypeFunc::Parms + 2; + CallLeafNode *call = new CallLeafNode( + OptoRuntime::heap_object_alloc_Type(), + CAST_FROM_FN_PTR(address, + HeapMonitoring::object_alloc_do_sample), + "object_alloc_do_sample", + TypeRawPtr::BOTTOM); + + // if (new_bytes_until_sample < 0) + conditional_sample(new_bytes_until_sample, + BoolTest::le, + // Probability + // ~1/10000 + PROB_UNLIKELY_MAG(4), + call, + thread, + &fast_oop_ctrl, + &fast_oop_rawmem, + &fast_oop, + size_in_bytes, + alloc); + } + // Plug in the successful fast-path into the result merge point result_region ->init_req(fast_result_path, fast_oop_ctrl); result_phi_rawoop->init_req(fast_result_path, fast_oop);