--- old/src/share/vm/opto/bytecodeInfo.cpp 2012-12-20 20:22:58.358653958 +0100 +++ new/src/share/vm/opto/bytecodeInfo.cpp 2012-12-20 20:22:58.194253850 +0100 @@ -46,7 +46,8 @@ _method(callee), _site_invoke_ratio(site_invoke_ratio), _max_inline_level(max_inline_level), - _count_inline_bcs(method()->code_size_for_inlining()) + _count_inline_bcs(method()->code_size_for_inlining()), + _subtrees(c->comp_arena(), 2, 0, NULL) { NOT_PRODUCT(_count_inlines = 0;) if (_caller_jvms != NULL) { @@ -209,16 +210,18 @@ if ( callee_method->dont_inline()) return "don't inline by annotation"; if ( callee_method->has_unloaded_classes_in_signature()) return "unloaded signature classes"; - if (callee_method->force_inline() || callee_method->should_inline()) { + if (callee_method->should_inline()) { // ignore heuristic controls on inlining return NULL; } // Now perform checks which are heuristic - if (callee_method->has_compiled_code() && - callee_method->instructions_size() > InlineSmallCode) { + if (!callee_method->force_inline()) { + if (callee_method->has_compiled_code() && + callee_method->instructions_size() > InlineSmallCode) { return "already compiled into a big method"; + } } // don't inline exception code unless the top method belongs to an @@ -277,12 +280,15 @@ //-----------------------------try_to_inline----------------------------------- // return NULL if ok, reason for not inlining otherwise // Relocated from "InliningClosure::try_to_inline" -const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) { - +const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result, bool& should_delay) { // Old algorithm had funny accumulating BC-size counters if (UseOldInlining && ClipInlining && (int)count_inline_bcs() >= DesiredMethodLimit) { - return "size > DesiredMethodLimit"; + if (!callee_method->force_inline() || !IncrementalInline) { + return "size > DesiredMethodLimit"; + } else if (!C->inlining_incrementally()) { + should_delay = true; + } } const char *msg = NULL; @@ -303,8 +309,13 @@ if (callee_method->code_size() > MaxTrivialSize) { // don't inline into giant methods - if (C->unique() > (uint)NodeCountInliningCutoff) { - return "NodeCountInliningCutoff"; + if (C->over_inlining_cutoff()) { + if ((!callee_method->force_inline() && !caller_method->is_compiled_lambda_form()) + || !IncrementalInline) { + return "NodeCountInliningCutoff"; + } else { + should_delay = true; + } } if ((!UseInterpreter || CompileTheWorld) && @@ -322,8 +333,13 @@ if (!C->do_inlining() && InlineAccessors) { return "not an accessor"; } + if (inline_level() > _max_inline_level) { - return "inlining too deep"; + if (!callee_method->force_inline() || !IncrementalInline) { + return "inlining too deep"; + } else if (!C->inlining_incrementally()) { + should_delay = true; + } } // detect direct and indirect recursive inlining @@ -348,7 +364,11 @@ if (UseOldInlining && ClipInlining && (int)count_inline_bcs() + size >= DesiredMethodLimit) { - return "size > DesiredMethodLimit"; + if (!callee_method->force_inline() || !IncrementalInline) { + return "size > DesiredMethodLimit"; + } else if (!C->inlining_incrementally()) { + should_delay = true; + } } // ok, inline this method @@ -413,8 +433,9 @@ } //------------------------------ok_to_inline----------------------------------- -WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci) { +WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci, bool& should_delay) { assert(callee_method != NULL, "caller checks for optimized virtual!"); + assert(!should_delay, "should be initialized to false"); #ifdef ASSERT // Make sure the incoming jvms has the same information content as me. // This means that we can eventually make this whole class AllStatic. @@ -444,7 +465,7 @@ // Check if inlining policy says no. WarmCallInfo wci = *(initial_wci); - failure_msg = try_to_inline(callee_method, caller_method, caller_bci, profile, &wci); + failure_msg = try_to_inline(callee_method, caller_method, caller_bci, profile, &wci, should_delay); if (failure_msg != NULL && C->log() != NULL) { C->log()->inline_fail(failure_msg); } --- old/src/share/vm/opto/c2_globals.hpp 2012-12-20 20:22:59.577964018 +0100 +++ new/src/share/vm/opto/c2_globals.hpp 2012-12-20 20:22:59.425621195 +0100 @@ -400,7 +400,7 @@ develop(intx, WarmCallMaxSize, 999999, \ "size of the largest inlinable method") \ \ - product(intx, MaxNodeLimit, 65000, \ + product(intx, MaxNodeLimit, 110000, \ "Maximum number of nodes") \ \ product(intx, NodeLimitFudgeFactor, 1000, \ @@ -606,6 +606,16 @@ \ develop(bool, VerifyAliases, false, \ "perform extra checks on the results of alias analysis") \ + \ + product(bool, IncrementalInline, true, \ + "do post parse inlining") \ + \ + develop(bool, AlwaysIncrementalInline, false, \ + "do all inlining incrementally") \ + \ + product(intx, LiveNodeCountInliningCutoff, 20000, \ + "max number of live nodes in a method") \ + C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG) --- old/src/share/vm/opto/callGenerator.cpp 2012-12-20 20:23:00.822711594 +0100 +++ new/src/share/vm/opto/callGenerator.cpp 2012-12-20 20:23:00.662843177 +0100 @@ -262,8 +262,11 @@ // Allow inlining decisions to be delayed class LateInlineCallGenerator : public DirectCallGenerator { + protected: CallGenerator* _inline_cg; + virtual bool do_late_inline_check(JVMState* jvms) { return true; } + public: LateInlineCallGenerator(ciMethod* method, CallGenerator* inline_cg) : DirectCallGenerator(method, true), _inline_cg(inline_cg) {} @@ -279,7 +282,9 @@ // Record that this call site should be revisited once the main // parse is finished. - Compile::current()->add_late_inline(this); + if (!is_mh_late_inline()) { + C->add_late_inline(this); + } // Emit the CallStaticJava and request separate projections so // that the late inlining logic can distinguish between fall @@ -287,15 +292,35 @@ // as is done for allocations and macro expansion. return DirectCallGenerator::generate(jvms); } -}; + virtual void print_inlining_late(const char* msg) { + CallNode* call = call_node(); + Compile* C = Compile::current(); + C->print_inlining_insert(this); + C->print_inlining(method(), call->jvms()->depth()-1, call->jvms()->bci(), msg); + } + +}; void LateInlineCallGenerator::do_late_inline() { // Can't inline it + if (call_node() == NULL || call_node()->outcnt() == 0 || call_node()->in(0) == NULL || call_node()->in(0)->is_top()) return; + for (int i1 = 0; i1 < method()->arg_size(); i1++) { + if (call_node()->in(TypeFunc::Parms + i1)->is_top()) { + assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing"); + return; + } + } + + if (call_node()->in(TypeFunc::Memory)->is_top()) { + assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing"); + return; + } + CallStaticJavaNode* call = call_node(); // Make a clone of the JVMState that appropriate to use for driving a parse @@ -324,6 +349,11 @@ } } + if (!do_late_inline_check(jvms)) { + map->disconnect_inputs(NULL, C); + return; + } + C->print_inlining_insert(this); CompileLog* log = C->log(); @@ -360,6 +390,10 @@ result = (result_size == 1) ? kit.pop() : kit.pop_pair(); } + C->set_has_loops(C->has_loops() || _inline_cg->method()->has_loops()); + C->env()->notice_inlined_method(_inline_cg->method()); + C->set_inlining_progress(); + kit.replace_call(call, result); } @@ -368,6 +402,83 @@ return new LateInlineCallGenerator(method, inline_cg); } +class LateInlineMHCallGenerator : public LateInlineCallGenerator { + ciMethod* _caller; + int _attempt; + bool _input_not_const; + + virtual bool do_late_inline_check(JVMState* jvms); + virtual bool already_attempted() const { return _attempt > 0; } + + public: + LateInlineMHCallGenerator(ciMethod* caller, ciMethod* callee, bool input_not_const) : + LateInlineCallGenerator(callee, NULL), _caller(caller), _attempt(0), _input_not_const(input_not_const) {} + + virtual bool is_mh_late_inline() const { return true; } + + virtual JVMState* generate(JVMState* jvms) { + JVMState* new_jvms = LateInlineCallGenerator::generate(jvms); + if (_input_not_const) { + // inlining won't be possible so no need to enqueue right now. + call_node()->_cg = this; + } else { + Compile::current()->add_late_inline(this); + } + return new_jvms; + } + + virtual void print_inlining_late(const char* msg) { + if (!_input_not_const) return; + LateInlineCallGenerator::print_inlining_late(msg); + } +}; + +bool LateInlineMHCallGenerator::do_late_inline_check(JVMState* jvms) { + + CallGenerator* cg = for_method_handle_inline(jvms, _caller, method(), _input_not_const); + + if (!_input_not_const) { + _attempt++; + } + + if (cg != NULL) { + assert(!cg->is_late_inline() && cg->is_inline(), "we're doing late inlining"); + _inline_cg = cg; + Compile::current()->dec_number_of_mh_late_inlines(); + return true; + } + + call_node()->_cg = this; + return false; +} + +CallGenerator* CallGenerator::for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const) { + Compile::current()->inc_number_of_mh_late_inlines(); + CallGenerator* cg = new LateInlineMHCallGenerator(caller, callee, input_not_const); + return cg; +} + +class LateInlineStringCallGenerator : public LateInlineCallGenerator { + + public: + LateInlineStringCallGenerator(ciMethod* method, CallGenerator* inline_cg) : + LateInlineCallGenerator(method, inline_cg) {} + + virtual JVMState* generate(JVMState* jvms) { + Compile *C = Compile::current(); + C->print_inlining_skip(this); + + C->add_string_late_inline(this); + + JVMState* new_jvms = DirectCallGenerator::generate(jvms); + return new_jvms; + } +}; + +CallGenerator* CallGenerator::for_string_late_inline(ciMethod* method, CallGenerator* inline_cg) { + return new LateInlineStringCallGenerator(method, inline_cg); +} + //---------------------------WarmCallGenerator-------------------------------- // Internal class which handles initial deferral of inlining decisions. @@ -586,35 +697,52 @@ } -CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee) { +CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden) { assert(callee->is_method_handle_intrinsic() || callee->is_compiled_lambda_form(), "for_method_handle_call mismatch"); - CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee); - if (cg != NULL) - return cg; - return CallGenerator::for_direct_call(callee); + bool input_not_const; + CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee, input_not_const); + Compile* C = Compile::current(); + if (cg != NULL) { + if (!delayed_forbidden && AlwaysIncrementalInline) { + return CallGenerator::for_late_inline(callee, cg); + } else { + return cg; + } + } + int bci = jvms->bci(); + ciCallProfile profile = caller->call_profile_at_bci(bci); + int call_site_count = caller->scale_count(profile.count()); + + if (IncrementalInline && call_site_count > 0 && + (input_not_const || !C->inlining_incrementally() || C->over_inlining_cutoff())) { + return CallGenerator::for_mh_late_inline(caller, callee, input_not_const); + } else { + return CallGenerator::for_direct_call(callee); + } } -CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee) { +CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const) { GraphKit kit(jvms); PhaseGVN& gvn = kit.gvn(); Compile* C = kit.C; vmIntrinsics::ID iid = callee->intrinsic_id(); + input_not_const = true; switch (iid) { case vmIntrinsics::_invokeBasic: { // Get MethodHandle receiver: Node* receiver = kit.argument(0); if (receiver->Opcode() == Op_ConP) { + input_not_const = false; const TypeOopPtr* oop_ptr = receiver->bottom_type()->is_oopptr(); ciMethod* target = oop_ptr->const_oop()->as_method_handle()->get_vmtarget(); guarantee(!target->is_method_handle_intrinsic(), "should not happen"); // XXX remove const int vtable_index = Method::invalid_vtable_index; - CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS); + CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS, true, true); + assert (!cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here"); if (cg != NULL && cg->is_inline()) return cg; - } else { - if (PrintInlining) C->print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant"); } } break; @@ -627,6 +755,7 @@ // Get MemberName argument: Node* member_name = kit.argument(callee->arg_size() - 1); if (member_name->Opcode() == Op_ConP) { + input_not_const = false; const TypeOopPtr* oop_ptr = member_name->bottom_type()->is_oopptr(); ciMethod* target = oop_ptr->const_oop()->as_member_name()->get_vmtarget(); @@ -661,7 +790,8 @@ } const int vtable_index = Method::invalid_vtable_index; const bool call_is_virtual = target->is_abstract(); // FIXME workaround - CallGenerator* cg = C->call_generator(target, vtable_index, call_is_virtual, jvms, true, PROB_ALWAYS); + CallGenerator* cg = C->call_generator(target, vtable_index, call_is_virtual, jvms, true, PROB_ALWAYS, true, true); + assert (!cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here"); if (cg != NULL && cg->is_inline()) return cg; } --- old/src/share/vm/opto/callGenerator.hpp 2012-12-20 20:23:02.092872433 +0100 +++ new/src/share/vm/opto/callGenerator.hpp 2012-12-20 20:23:01.930717879 +0100 @@ -68,6 +68,12 @@ // is_late_inline: supports conversion of call into an inline virtual bool is_late_inline() const { return false; } + // same but for method handle calls + virtual bool is_mh_late_inline() const { return false; } + + // for method handle calls: have we tried inlinining the call already? + virtual bool already_attempted() const { ShouldNotReachHere(); return false; } + // Replace the call with an inline version of the code virtual void do_late_inline() { ShouldNotReachHere(); } @@ -112,11 +118,13 @@ static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index); // virtual, interface static CallGenerator* for_dynamic_call(ciMethod* m); // invokedynamic - static CallGenerator* for_method_handle_call( JVMState* jvms, ciMethod* caller, ciMethod* callee); - static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee); + static CallGenerator* for_method_handle_call( JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden); + static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const); // How to generate a replace a direct call with an inline version static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg); + static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const); + static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg); // How to make a call but defer the decision whether to inline or not. static CallGenerator* for_warm_call(WarmCallInfo* ci, @@ -147,6 +155,8 @@ CallGenerator* cg); virtual Node* generate_predicate(JVMState* jvms) { return NULL; }; + virtual void print_inlining_late(const char* msg) { ShouldNotReachHere(); } + static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) { if (PrintInlining) C->print_inlining(callee, inline_level, bci, msg); --- old/src/share/vm/opto/callnode.cpp 2012-12-20 20:23:03.334282166 +0100 +++ new/src/share/vm/opto/callnode.cpp 2012-12-20 20:23:03.162850744 +0100 @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "ci/bcEscapeAnalyzer.hpp" #include "compiler/oopMap.hpp" +#include "opto/callGenerator.hpp" #include "opto/callnode.hpp" #include "opto/escape.hpp" #include "opto/locknode.hpp" @@ -775,16 +776,36 @@ // and the exception object may not exist if an exception handler // swallows the exception but all the other must exist and be found. assert(projs->fallthrough_proj != NULL, "must be found"); - assert(projs->fallthrough_catchproj != NULL, "must be found"); - assert(projs->fallthrough_memproj != NULL, "must be found"); - assert(projs->fallthrough_ioproj != NULL, "must be found"); - assert(projs->catchall_catchproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->fallthrough_catchproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->fallthrough_memproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->fallthrough_ioproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->catchall_catchproj != NULL, "must be found"); if (separate_io_proj) { - assert(projs->catchall_memproj != NULL, "must be found"); - assert(projs->catchall_ioproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->catchall_memproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->catchall_ioproj != NULL, "must be found"); } } +Node *CallNode::Ideal(PhaseGVN *phase, bool can_reshape) { + if (can_reshape && _cg != NULL && _cg->is_mh_late_inline() && !_cg->already_attempted()) { + // Check whether this MH handle call becomes a candidate for inlining + ciMethod* callee = _cg->method(); + vmIntrinsics::ID iid = callee->intrinsic_id(); + if (iid == vmIntrinsics::_invokeBasic) { + if (in(TypeFunc::Parms)->Opcode() == Op_ConP) { + Compile::current()->prepend_late_inline(_cg); + _cg = NULL; + } + } else { + if (in(TypeFunc::Parms + callee->arg_size() - 1)->Opcode() == Op_ConP) { + Compile::current()->prepend_late_inline(_cg); + _cg = NULL; + } + } + } + return SafePointNode::Ideal(phase, can_reshape); +} + //============================================================================= uint CallJavaNode::size_of() const { return sizeof(*this); } --- old/src/share/vm/opto/callnode.hpp 2012-12-20 20:23:04.650545230 +0100 +++ new/src/share/vm/opto/callnode.hpp 2012-12-20 20:23:04.492135085 +0100 @@ -507,6 +507,7 @@ Node* exobj; }; +class CallGenerator; //------------------------------CallNode--------------------------------------- // Call nodes now subsume the function of debug nodes at callsites, so they @@ -517,12 +518,14 @@ const TypeFunc *_tf; // Function type address _entry_point; // Address of method being called float _cnt; // Estimate of number of times called + CallGenerator* _cg; // corresponding CallGenerator for some late inline calls CallNode(const TypeFunc* tf, address addr, const TypePtr* adr_type) : SafePointNode(tf->domain()->cnt(), NULL, adr_type), _tf(tf), _entry_point(addr), - _cnt(COUNT_UNKNOWN) + _cnt(COUNT_UNKNOWN), + _cg(NULL) { init_class_id(Class_Call); } @@ -537,6 +540,7 @@ virtual const Type *bottom_type() const; virtual const Type *Value( PhaseTransform *phase ) const; + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual Node *Identity( PhaseTransform *phase ) { return this; } virtual uint cmp( const Node &n ) const; virtual uint size_of() const = 0; --- old/src/share/vm/opto/cfgnode.cpp 2012-12-20 20:23:05.909958408 +0100 +++ new/src/share/vm/opto/cfgnode.cpp 2012-12-20 20:23:05.731847448 +0100 @@ -363,6 +363,64 @@ return true; // The Region node is unreachable - it is dead. } +bool RegionNode::try_clean_mem_phi(PhaseGVN *phase) { + // Incremental inlining + PhaseStringOpts sometimes produce: + // + // Some Node \ + // \ Phi + // MergeMem / / + // / + // IfFalse / + // cmpP with 1 top input -- If / \ Region + // \ / + // IfTrue + // + // It's expected by PhaseStringOpts that the Region goes away and is + // replaced by If's control input but because there's still a Phi, + // the Region stays in the graph. The top input from the cmpP is + // propagated forward and a subgraph that is useful goes away. The + // code below replaces the Phi with the MergeMem so that the Region + // is simplified. + + Node* phi = NULL; + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { + Node* n = fast_out(i); + if (n->is_Phi()) { + if (phi == NULL) { + phi = n; + } else if (phi != n) { + return true; // more than one phi + } + } + } + + if (phi->as_Phi()->type() == Type::MEMORY && req() == 3 && in(1) && in(2) && in(1)->in(0) == in(2)->in(0)) { + MergeMemNode* m = NULL; + for(uint i=1; ireq(); ++i) { + Node *ii = phi->in(i); + + if (ii && ii->is_MergeMem() && in(i)->outcnt() == 1) { + m = ii->as_MergeMem(); + + bool ok = true; + for(uint i=1; ireq(); ++i) { + Node *ii = phi->in(i); + if (ii == NULL || ii == m) continue; + if (ii != m->base_memory() || (ii->is_MergeMem() && in(i)->outcnt() > 1)) { + ok = false; + break; + } + } + if (ok) { + phase->is_IterGVN()->replace_node(phi, m); + return false; + } + } + } + } + return true; +} + //------------------------------Ideal------------------------------------------ // Return a node which is more "ideal" than the current node. Must preserve // the CFG, but we can still strip out dead paths. @@ -375,6 +433,10 @@ bool has_phis = false; if (can_reshape) { // Need DU info to check for Phi users has_phis = (has_phi() != NULL); // Cache result + if (has_phis) { + has_phis = try_clean_mem_phi(phase); + } + if (!has_phis) { // No Phi users? Nothing merging? for (uint i = 1; i < req()-1; i++) { Node *if1 = in(i); --- old/src/share/vm/opto/cfgnode.hpp 2012-12-20 20:23:07.201916284 +0100 +++ new/src/share/vm/opto/cfgnode.hpp 2012-12-20 20:23:07.034150431 +0100 @@ -95,6 +95,7 @@ virtual Node *Identity( PhaseTransform *phase ); virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual const RegMask &out_RegMask() const; + bool try_clean_mem_phi(PhaseGVN *phase); }; //------------------------------JProjNode-------------------------------------- --- old/src/share/vm/opto/compile.cpp 2012-12-20 20:23:08.513788200 +0100 +++ new/src/share/vm/opto/compile.cpp 2012-12-20 20:23:08.331780735 +0100 @@ -136,7 +136,7 @@ void Compile::register_intrinsic(CallGenerator* cg) { if (_intrinsics == NULL) { - _intrinsics = new GrowableArray(60); + _intrinsics = new (comp_arena())GrowableArray(comp_arena(), 60, 0, NULL); } // This code is stolen from ciObjectFactory::insert. // Really, GrowableArray should have methods for @@ -365,6 +365,22 @@ } } +void Compile::remove_useless_late_inlines(Unique_Node_List &useful, bool string) { + int shift = 0; + GrowableArray* inlines = string ? &_string_late_inlines : &_late_inlines; + for (int i = 0; i < inlines->length(); i++) { + CallGenerator* cg = inlines->at(i); + CallNode* call = cg->call_node(); + if (shift > 0) { + inlines->at_put(i-shift, cg); + } + if (!useful.member(call)) { + shift++; + } + } + inlines->trunc_to(inlines->length()-shift); +} + // Disconnect all useless nodes by disconnecting those at the boundary. void Compile::remove_useless_nodes(Unique_Node_List &useful) { uint next = 0; @@ -394,6 +410,9 @@ remove_macro_node(n); } } + // clean up the late inline lists + remove_useless_late_inlines(useful, true); + remove_useless_late_inlines(useful, false); debug_only(verify_graph_edges(true/*check for no_dead_code*/);) } @@ -611,6 +630,12 @@ _printer(IdealGraphPrinter::printer()), #endif _congraph(NULL), + _late_inlines(comp_arena(), 2, 0, NULL), + _string_late_inlines(comp_arena(), 2, 0, NULL), + _late_inlines_pos(0), + _number_of_mh_late_inlines(0), + _inlining_progress(false), + _inlining_incrementally(false), _print_inlining_list(NULL), _print_inlining(0) { C = this; @@ -736,30 +761,14 @@ // This is done by a special, unique RethrowNode bound to root. rethrow_exceptions(kit.transfer_exceptions_into_jvms()); } + + assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off"); - if (!failing() && has_stringbuilder()) { - { - // remove useless nodes to make the usage analysis simpler - ResourceMark rm; - PhaseRemoveUseless pru(initial_gvn(), &for_igvn); - } - - { - ResourceMark rm; - print_method("Before StringOpts", 3); - PhaseStringOpts pso(initial_gvn(), &for_igvn); - print_method("After StringOpts", 3); - } - - // now inline anything that we skipped the first time around - while (_late_inlines.length() > 0) { - CallGenerator* cg = _late_inlines.pop(); - cg->do_late_inline(); - if (failing()) return; - } + if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) { + string_inline(true); } - assert(_late_inlines.length() == 0, "should have been processed"); - dump_inlining(); + + if (failing()) return; print_method("Before RemoveUseless", 3); @@ -906,6 +915,9 @@ _dead_node_list(comp_arena()), _dead_node_count(0), _congraph(NULL), + _number_of_mh_late_inlines(0), + _inlining_progress(false), + _inlining_incrementally(false), _print_inlining_list(NULL), _print_inlining(0) { C = this; @@ -1760,6 +1772,132 @@ assert(predicate_count()==0, "should be clean!"); } +// StringOpts and late inlining of string methods +void Compile::string_inline(bool parse_time) { + { + // remove useless nodes to make the usage analysis simpler + ResourceMark rm; + PhaseRemoveUseless pru(initial_gvn(), for_igvn()); + } + + { + ResourceMark rm; + print_method("Before StringOpts", 3); + PhaseStringOpts pso(initial_gvn(), for_igvn()); + print_method("After StringOpts", 3); + } + + // now inline anything that we skipped the first time around + if (!parse_time) { + _late_inlines_pos = _late_inlines.length(); + } + + while (_string_late_inlines.length() > 0) { + CallGenerator* cg = _string_late_inlines.pop(); + + cg->do_late_inline(); + + if (failing()) return; + } + _string_late_inlines.trunc_to(0); +} + +void Compile::incremental_inline_one(PhaseIterGVN& igvn) { + assert(IncrementalInline, "incremental inlining should be on"); + PhaseGVN* gvn = initial_gvn(); + + clear_inlining_progress(); + for_igvn()->clear(); + gvn->update_with(&igvn); + + int i = 0; + + { + for (; i <_late_inlines.length() && !inlining_progress(); i++) { + + CallGenerator* cg = _late_inlines.at(i); + + _late_inlines_pos = i+1; + + cg->do_late_inline(); + + if (failing()) return; + } + int j = 0; + for (; i < _late_inlines.length(); i++, j++) { + _late_inlines.at_put(j, _late_inlines.at(i)); + } + _late_inlines.trunc_to(j); + + { + ResourceMark rm; + PhaseRemoveUseless pru(C->initial_gvn(), C->for_igvn()); + } + } + + igvn = PhaseIterGVN(gvn); +} + +// Perform incremental inlining until bound on number of live nodes is reached +void Compile::incremental_inline(PhaseIterGVN& igvn) { + PhaseGVN* gvn = initial_gvn(); + + set_inlining_incrementally(); + set_inlining_progress(); + uint low_live_nodes = 0; + + while(inlining_progress() && _late_inlines.length() > 0) { + + if (live_nodes() > (uint)LiveNodeCountInliningCutoff) { + if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) { + // PhaseIdealLoop is expensive so we only try it once we are + // out of loop and we only try it again if the previous helped + // got the number of nodes down significantly + PhaseIdealLoop ideal_loop( igvn, false, true ); + if (failing()) return; + low_live_nodes = live_nodes(); + _major_progress = true; + } + + if (live_nodes() > (uint)LiveNodeCountInliningCutoff) { + break; + } + } + + incremental_inline_one(igvn); + + if (failing()) return; + + igvn.optimize(); + + if (failing()) return; + } + + assert( igvn._worklist.size() == 0, "should be done with igvn" ); + + if (_string_late_inlines.length() > 0) { + assert(has_stringbuilder(), "inconsistent"); + for_igvn()->clear(); + initial_gvn()->update_with(&igvn); + + string_inline(false); + + if (failing()) return; + + { + ResourceMark rm; + PhaseRemoveUseless pru(initial_gvn(), for_igvn()); + } + + igvn = PhaseIterGVN(gvn); + + igvn.optimize(); + } + + clear_inlining_incrementally(); +} + + //------------------------------Optimize--------------------------------------- // Given a graph, optimize it. void Compile::Optimize() { @@ -1782,13 +1920,21 @@ { // Iterative Global Value Numbering, including ideal transforms // Initialize IterGVN with types and values from parse-time GVN + PhaseIterGVN igvn(initial_gvn()); + { NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); ) igvn.optimize(); } - + print_method("Iter GVN 1", 2); + + if (failing()) return; + + incremental_inline(igvn); + + print_method("Incremental Inline", 2); if (failing()) return; @@ -1914,6 +2060,7 @@ } // (End scope of igvn; run destructor if necessary for asserts.) + dump_inlining(); // A method with only infinite loops has no edges entering loops from root { NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); ) @@ -3362,6 +3509,28 @@ void Compile::dump_inlining() { if (PrintInlining) { + // Print inlining message for candidates that we couldn't inline + // for lack of space or non constant receiver + for (int i = 0; i < _late_inlines.length(); i++) { + CallGenerator* cg = _late_inlines.at(i); + cg->print_inlining_late("live nodes > LiveNodeCountInliningCutoff"); + } + Unique_Node_List useful; + useful.push(root()); + for( uint next = 0; next < useful.size(); ++next ) { + Node *n = useful.at(next); + if (n->isa_Call() != NULL && n->as_Call()->_cg != NULL && n->as_Call()->_cg->call_node() == n) { + CallNode* call = n->as_Call(); + CallGenerator* cg = call->_cg; + cg->print_inlining_late("receiver not constant"); + } + uint max = n->len(); + for( uint i = 0; i < max; ++i ) { + Node *m = n->in(i); + if( m == NULL ) continue; + useful.push(m); + } + } for (int i = 0; i < _print_inlining_list->length(); i++) { tty->print(_print_inlining_list->at(i).ss()->as_string()); } --- old/src/share/vm/opto/compile.hpp 2012-12-20 20:23:09.852235112 +0100 +++ new/src/share/vm/opto/compile.hpp 2012-12-20 20:23:09.691881325 +0100 @@ -280,6 +280,8 @@ int _orig_pc_slot_offset_in_bytes; int _major_progress; // Count of something big happening + bool _inlining_progress; // progress doing incremental inlining? + bool _inlining_incrementally;// Are we doing incremental inlining (post parse) bool _has_loops; // True if the method _may_ have some loops bool _has_split_ifs; // True if the method _may_ have some split-if bool _has_unsafe_access; // True if the method _may_ produce faults in unsafe loads or stores. @@ -367,8 +369,13 @@ Unique_Node_List* _for_igvn; // Initial work-list for next round of Iterative GVN WarmCallInfo* _warm_calls; // Sorted work-list for heat-based inlining. - GrowableArray _late_inlines; // List of CallGenerators to be revisited after - // main parsing has finished. + GrowableArray _late_inlines; // List of CallGenerators to be revisited after + // main parsing has finished. + GrowableArray _string_late_inlines; // same but for string operations + + int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining) + uint _number_of_mh_late_inlines; // number of method handle late inlining still pending + // Inlining may not happen in parse order which would make // PrintInlining output confusing. Keep track of PrintInlining @@ -491,6 +498,12 @@ int fixed_slots() const { assert(_fixed_slots >= 0, ""); return _fixed_slots; } void set_fixed_slots(int n) { _fixed_slots = n; } int major_progress() const { return _major_progress; } + void set_inlining_progress() { _inlining_progress = true; } + void clear_inlining_progress() { _inlining_progress = false; } + int inlining_progress() const { return _inlining_progress; } + int inlining_incrementally() const { return _inlining_incrementally; } + void set_inlining_incrementally() { _inlining_incrementally = true; } + void clear_inlining_incrementally() { _inlining_incrementally = false; } void set_major_progress() { _major_progress++; } void clear_major_progress() { _major_progress = 0; } int num_loop_opts() const { return _num_loop_opts; } @@ -726,10 +739,10 @@ void rethrow_exceptions(JVMState* jvms); void return_values(JVMState* jvms); JVMState* build_start_state(StartNode* start, const TypeFunc* tf); - + // Decide how to build a call. // The profile factor is a discount to apply to this site's interp. profile. - CallGenerator* call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor, bool allow_intrinsics = true); + CallGenerator* call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor, bool allow_intrinsics = true, bool delayed_forbidden = false); bool should_delay_inlining(ciMethod* call_method, JVMState* jvms); // Report if there were too many traps at a current method and bci. @@ -765,10 +778,39 @@ WarmCallInfo* pop_warm_call(); // Record this CallGenerator for inlining at the end of parsing. - void add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); } + void add_late_inline(CallGenerator* cg) { + _late_inlines.insert_before(_late_inlines_pos, cg); + _late_inlines_pos++; + } + + void prepend_late_inline(CallGenerator* cg) { + _late_inlines.insert_before(0, cg); + } + + void add_string_late_inline(CallGenerator* cg) { + _string_late_inlines.push(cg); + } + + void remove_useless_late_inlines(Unique_Node_List &useful, bool string); void dump_inlining(); + bool over_inlining_cutoff() const { + if (!inlining_incrementally()) { + return unique() > (uint)NodeCountInliningCutoff; + } else { + return live_nodes() > (uint)LiveNodeCountInliningCutoff; + } + } + + void inc_number_of_mh_late_inlines() { _number_of_mh_late_inlines++; } + void dec_number_of_mh_late_inlines() { assert(_number_of_mh_late_inlines > 0, "_number_of_mh_late_inlines < 0 !"); _number_of_mh_late_inlines--; } + bool has_mh_late_inlines() const { return _number_of_mh_late_inlines > 0; } + + void incremental_inline_one(PhaseIterGVN& igvn); + void incremental_inline(PhaseIterGVN& igvn); + void string_inline(bool parse_time); + // Matching, CFG layout, allocation, code generation PhaseCFG* cfg() { return _cfg; } bool select_24_bit_instr() const { return _select_24_bit_instr; } --- old/src/share/vm/opto/doCall.cpp 2012-12-20 20:23:11.121709803 +0100 +++ new/src/share/vm/opto/doCall.cpp 2012-12-20 20:23:10.961337994 +0100 @@ -63,7 +63,7 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, - float prof_factor, bool allow_intrinsics) { + float prof_factor, bool allow_intrinsics, bool delayed_forbidden) { ciMethod* caller = jvms->method(); int bci = jvms->bci(); Bytecodes::Code bytecode = caller->java_code_at_bci(bci); @@ -130,7 +130,9 @@ // MethodHandle.invoke* are native methods which obviously don't // have bytecodes and so normal inlining fails. if (callee->is_method_handle_intrinsic()) { - return CallGenerator::for_method_handle_call(jvms, caller, callee); + CallGenerator* cg = CallGenerator::for_method_handle_call(jvms, caller, callee, delayed_forbidden); + assert (cg == NULL || !delayed_forbidden || !cg->is_late_inline() || cg->is_mh_late_inline(), "unexpected CallGenerator"); + return cg; } // Do not inline strict fp into non-strict code, or the reverse @@ -161,20 +163,27 @@ WarmCallInfo scratch_ci; if (!UseOldInlining) scratch_ci.init(jvms, callee, profile, prof_factor); - WarmCallInfo* ci = ilt->ok_to_inline(callee, jvms, profile, &scratch_ci); + bool should_delay = false; + WarmCallInfo* ci = ilt->ok_to_inline(callee, jvms, profile, &scratch_ci, should_delay); assert(ci != &scratch_ci, "do not let this pointer escape"); bool allow_inline = (ci != NULL && !ci->is_cold()); bool require_inline = (allow_inline && ci->is_hot()); if (allow_inline) { CallGenerator* cg = CallGenerator::for_inline(callee, expected_uses); - if (require_inline && cg != NULL && should_delay_inlining(callee, jvms)) { + + if (require_inline && cg != NULL) { // Delay the inlining of this method to give us the // opportunity to perform some high level optimizations // first. - return CallGenerator::for_late_inline(callee, cg); + if (should_delay_inlining(callee, jvms)) { + assert(!delayed_forbidden, "strange"); + return CallGenerator::for_string_late_inline(callee, cg); + } else if ((should_delay || AlwaysIncrementalInline) && !delayed_forbidden) { + return CallGenerator::for_late_inline(callee, cg); + } } - if (cg == NULL) { + if (cg == NULL || should_delay) { // Fall through. } else if (require_inline || !InlineWarmCalls) { return cg; --- old/src/share/vm/opto/graphKit.cpp 2012-12-20 20:23:12.479869061 +0100 +++ new/src/share/vm/opto/graphKit.cpp 2012-12-20 20:23:12.287392574 +0100 @@ -1794,10 +1794,15 @@ if (ejvms == NULL) { // No exception edges to simply kill off those paths - C->gvn_replace_by(callprojs.catchall_catchproj, C->top()); - C->gvn_replace_by(callprojs.catchall_memproj, C->top()); - C->gvn_replace_by(callprojs.catchall_ioproj, C->top()); - + if (callprojs.catchall_catchproj != NULL) { + C->gvn_replace_by(callprojs.catchall_catchproj, C->top()); + } + if (callprojs.catchall_memproj != NULL) { + C->gvn_replace_by(callprojs.catchall_memproj, C->top()); + } + if (callprojs.catchall_ioproj != NULL) { + C->gvn_replace_by(callprojs.catchall_ioproj, C->top()); + } // Replace the old exception object with top if (callprojs.exobj != NULL) { C->gvn_replace_by(callprojs.exobj, C->top()); @@ -1809,10 +1814,15 @@ SafePointNode* ex_map = ekit.combine_and_pop_all_exception_states(); Node* ex_oop = ekit.use_exception_state(ex_map); - - C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control()); - C->gvn_replace_by(callprojs.catchall_memproj, ekit.reset_memory()); - C->gvn_replace_by(callprojs.catchall_ioproj, ekit.i_o()); + if (callprojs.catchall_catchproj != NULL) { + C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control()); + } + if (callprojs.catchall_memproj != NULL) { + C->gvn_replace_by(callprojs.catchall_memproj, ekit.reset_memory()); + } + if (callprojs.catchall_ioproj != NULL) { + C->gvn_replace_by(callprojs.catchall_ioproj, ekit.i_o()); + } // Replace the old exception object with the newly created one if (callprojs.exobj != NULL) { --- old/src/share/vm/opto/parse.hpp 2012-12-20 20:23:13.943286270 +0100 +++ new/src/share/vm/opto/parse.hpp 2012-12-20 20:23:13.784279949 +0100 @@ -70,7 +70,7 @@ InlineTree *build_inline_tree_for_callee(ciMethod* callee_method, JVMState* caller_jvms, int caller_bci); - const char* try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result); + const char* try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result, bool& should_delay); const char* should_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const; const char* should_not_inline(ciMethod* callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const; void print_inlining(ciMethod *callee_method, int caller_bci, const char *failure_msg) const; @@ -107,7 +107,7 @@ // and may be accessed by find_subtree_from_root. // The call_method is the dest_method for a special or static invocation. // The call_method is an optimized virtual method candidate otherwise. - WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci); + WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci, bool& should_delay); // Information about inlined method JVMState* caller_jvms() const { return _caller_jvms; } --- old/src/share/vm/opto/phaseX.cpp 2012-12-20 20:23:15.198938367 +0100 +++ new/src/share/vm/opto/phaseX.cpp 2012-12-20 20:23:15.035294745 +0100 @@ -75,6 +75,13 @@ // nh->_sentinel must be in the current node space } +void NodeHash::update_with(NodeHash *nh) { + debug_only(_table = (Node**)badAddress); // interact correctly w/ operator= + // just copy in all the fields + *this = *nh; + // nh->_sentinel must be in the current node space +} + //------------------------------hash_find-------------------------------------- // Find in hash table Node *NodeHash::hash_find( const Node *n ) { --- old/src/share/vm/opto/phaseX.hpp 2012-12-20 20:23:16.527212924 +0100 +++ new/src/share/vm/opto/phaseX.hpp 2012-12-20 20:23:16.363551493 +0100 @@ -92,6 +92,7 @@ } void remove_useless_nodes(VectorSet &useful); // replace with sentinel + void update_with(NodeHash* nh); Node *sentinel() { return _sentinel; } @@ -386,6 +387,11 @@ Node *transform( Node *n ); Node *transform_no_reclaim( Node *n ); + void update_with(PhaseGVN* gvn) { + _table.update_with(&gvn->_table); + _types = gvn->_types; + } + // Check for a simple dead loop when a data node references itself. DEBUG_ONLY(void dead_loop_check(Node *n);) }; --- old/src/share/vm/opto/stringopts.cpp 2012-12-20 20:23:17.752326206 +0100 +++ new/src/share/vm/opto/stringopts.cpp 2012-12-20 20:23:17.580142088 +0100 @@ -265,7 +265,8 @@ } else if (n->is_IfTrue()) { Compile* C = _stringopts->C; C->gvn_replace_by(n, n->in(0)->in(0)); - C->gvn_replace_by(n->in(0), C->top()); + // get rid of the other projection + C->gvn_replace_by(n->in(0)->as_If()->proj_out(false), C->top()); } } } @@ -439,7 +440,7 @@ } // Find the constructor call Node* result = alloc->result_cast(); - if (result == NULL || !result->is_CheckCastPP()) { + if (result == NULL || !result->is_CheckCastPP() || alloc->in(TypeFunc::Memory)->is_top()) { // strange looking allocation #ifndef PRODUCT if (PrintOptimizeStringConcat) { @@ -834,6 +835,9 @@ ptr->in(1)->in(0) != NULL && ptr->in(1)->in(0)->is_If()) { // Simple diamond. // XXX should check for possibly merging stores. simple data merges are ok. + // The IGVN will make this simple diamond go away when it + // transforms the Region. Make sure it sees it. + Compile::current()->record_for_igvn(ptr); ptr = ptr->in(1)->in(0)->in(0); continue; } --- old/src/share/vm/runtime/arguments.cpp 2012-12-20 20:23:19.099084374 +0100 +++ new/src/share/vm/runtime/arguments.cpp 2012-12-20 20:23:18.918415845 +0100 @@ -3283,6 +3283,13 @@ if (!EliminateLocks) { EliminateNestedLocks = false; } + if (!Inline) { + IncrementalInline = false; + } + if (!IncrementalInline && FLAG_IS_DEFAULT(MaxNodeLimit)) { + // no incremental inlining: use a safe MaxNodeLimit + FLAG_SET_DEFAULT(MaxNodeLimit, (intx)65000); + } #endif if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {