# HG changeset patch # User redestad # Date 1525685920 -7200 # Mon May 07 11:38:40 2018 +0200 # Node ID 2a16cbf37f398408fff3b5f88ba27adec9be017a # Parent bd0a95bec96bfd0891569588a8ad7992ad8d0959 8202711: Merge tiered compilation policies Reviewed-by: neliasso diff --git a/src/hotspot/share/runtime/advancedThresholdPolicy.cpp b/src/hotspot/share/runtime/advancedThresholdPolicy.cpp deleted file mode 100644 --- a/src/hotspot/share/runtime/advancedThresholdPolicy.cpp +++ /dev/null @@ -1,667 +0,0 @@ -/* - * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "code/codeCache.hpp" -#include "runtime/advancedThresholdPolicy.hpp" -#include "runtime/handles.inline.hpp" -#include "runtime/simpleThresholdPolicy.inline.hpp" -#if INCLUDE_JVMCI -#include "jvmci/jvmciRuntime.hpp" -#endif - -#ifdef TIERED -// Print an event. -void AdvancedThresholdPolicy::print_specific(EventType type, const methodHandle& mh, const methodHandle& imh, - int bci, CompLevel level) { - tty->print(" rate="); - if (mh->prev_time() == 0) tty->print("n/a"); - else tty->print("%f", mh->rate()); - - tty->print(" k=%.2lf,%.2lf", threshold_scale(CompLevel_full_profile, Tier3LoadFeedback), - threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback)); - -} - -void AdvancedThresholdPolicy::initialize() { - int count = CICompilerCount; -#ifdef _LP64 - // Turn on ergonomic compiler count selection - if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) { - FLAG_SET_DEFAULT(CICompilerCountPerCPU, true); - } - if (CICompilerCountPerCPU) { - // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n - int log_cpu = log2_intptr(os::active_processor_count()); - int loglog_cpu = log2_intptr(MAX2(log_cpu, 1)); - count = MAX2(log_cpu * loglog_cpu * 3 / 2, 2); - FLAG_SET_ERGO(intx, CICompilerCount, count); - } -#else - // On 32-bit systems, the number of compiler threads is limited to 3. - // On these systems, the virtual address space available to the JVM - // is usually limited to 2-4 GB (the exact value depends on the platform). - // As the compilers (especially C2) can consume a large amount of - // memory, scaling the number of compiler threads with the number of - // available cores can result in the exhaustion of the address space - /// available to the VM and thus cause the VM to crash. - if (FLAG_IS_DEFAULT(CICompilerCount)) { - count = 3; - FLAG_SET_ERGO(intx, CICompilerCount, count); - } -#endif - - if (TieredStopAtLevel < CompLevel_full_optimization) { - // No C2 compiler thread required - set_c1_count(count); - } else { - set_c1_count(MAX2(count / 3, 1)); - set_c2_count(MAX2(count - c1_count(), 1)); - } - assert(count == c1_count() + c2_count(), "inconsistent compiler thread count"); - - // Some inlining tuning -#ifdef X86 - if (FLAG_IS_DEFAULT(InlineSmallCode)) { - FLAG_SET_DEFAULT(InlineSmallCode, 2000); - } -#endif - -#if defined SPARC || defined AARCH64 - if (FLAG_IS_DEFAULT(InlineSmallCode)) { - FLAG_SET_DEFAULT(InlineSmallCode, 2500); - } -#endif - - set_increase_threshold_at_ratio(); - set_start_time(os::javaTimeMillis()); -} - -// update_rate() is called from select_task() while holding a compile queue lock. -void AdvancedThresholdPolicy::update_rate(jlong t, Method* m) { - // Skip update if counters are absent. - // Can't allocate them since we are holding compile queue lock. - if (m->method_counters() == NULL) return; - - if (is_old(m)) { - // We don't remove old methods from the queue, - // so we can just zero the rate. - m->set_rate(0); - return; - } - - // We don't update the rate if we've just came out of a safepoint. - // delta_s is the time since last safepoint in milliseconds. - jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint(); - jlong delta_t = t - (m->prev_time() != 0 ? m->prev_time() : start_time()); // milliseconds since the last measurement - // How many events were there since the last time? - int event_count = m->invocation_count() + m->backedge_count(); - int delta_e = event_count - m->prev_event_count(); - - // We should be running for at least 1ms. - if (delta_s >= TieredRateUpdateMinTime) { - // And we must've taken the previous point at least 1ms before. - if (delta_t >= TieredRateUpdateMinTime && delta_e > 0) { - m->set_prev_time(t); - m->set_prev_event_count(event_count); - m->set_rate((float)delta_e / (float)delta_t); // Rate is events per millisecond - } else { - if (delta_t > TieredRateUpdateMaxTime && delta_e == 0) { - // If nothing happened for 25ms, zero the rate. Don't modify prev values. - m->set_rate(0); - } - } - } -} - -// Check if this method has been stale from a given number of milliseconds. -// See select_task(). -bool AdvancedThresholdPolicy::is_stale(jlong t, jlong timeout, Method* m) { - jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint(); - jlong delta_t = t - m->prev_time(); - if (delta_t > timeout && delta_s > timeout) { - int event_count = m->invocation_count() + m->backedge_count(); - int delta_e = event_count - m->prev_event_count(); - // Return true if there were no events. - return delta_e == 0; - } - return false; -} - -// We don't remove old methods from the compile queue even if they have -// very low activity. See select_task(). -bool AdvancedThresholdPolicy::is_old(Method* method) { - return method->invocation_count() > 50000 || method->backedge_count() > 500000; -} - -double AdvancedThresholdPolicy::weight(Method* method) { - return (double)(method->rate() + 1) * - (method->invocation_count() + 1) * (method->backedge_count() + 1); -} - -// Apply heuristics and return true if x should be compiled before y -bool AdvancedThresholdPolicy::compare_methods(Method* x, Method* y) { - if (x->highest_comp_level() > y->highest_comp_level()) { - // recompilation after deopt - return true; - } else - if (x->highest_comp_level() == y->highest_comp_level()) { - if (weight(x) > weight(y)) { - return true; - } - } - return false; -} - -// Is method profiled enough? -bool AdvancedThresholdPolicy::is_method_profiled(Method* method) { - MethodData* mdo = method->method_data(); - if (mdo != NULL) { - int i = mdo->invocation_count_delta(); - int b = mdo->backedge_count_delta(); - return call_predicate_helper(i, b, 1, method); - } - return false; -} - -// Called with the queue locked and with at least one element -CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) { - CompileTask *max_blocking_task = NULL; - CompileTask *max_task = NULL; - Method* max_method = NULL; - jlong t = os::javaTimeMillis(); - // Iterate through the queue and find a method with a maximum rate. - for (CompileTask* task = compile_queue->first(); task != NULL;) { - CompileTask* next_task = task->next(); - Method* method = task->method(); - update_rate(t, method); - if (max_task == NULL) { - max_task = task; - max_method = method; - } else { - // If a method has been stale for some time, remove it from the queue. - // Blocking tasks and tasks submitted from whitebox API don't become stale - if (task->can_become_stale() && is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) { - if (PrintTieredEvents) { - print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level()); - } - compile_queue->remove_and_mark_stale(task); - method->clear_queued_for_compilation(); - task = next_task; - continue; - } - - // Select a method with a higher rate - if (compare_methods(method, max_method)) { - max_task = task; - max_method = method; - } - } - - if (task->is_blocking()) { - if (max_blocking_task == NULL || compare_methods(method, max_blocking_task->method())) { - max_blocking_task = task; - } - } - - task = next_task; - } - - if (max_blocking_task != NULL) { - // In blocking compilation mode, the CompileBroker will make - // compilations submitted by a JVMCI compiler thread non-blocking. These - // compilations should be scheduled after all blocking compilations - // to service non-compiler related compilations sooner and reduce the - // chance of such compilations timing out. - max_task = max_blocking_task; - max_method = max_task->method(); - } - - if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile - && is_method_profiled(max_method)) { - max_task->set_comp_level(CompLevel_limited_profile); - if (PrintTieredEvents) { - print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level()); - } - } - - return max_task; -} - -double AdvancedThresholdPolicy::threshold_scale(CompLevel level, int feedback_k) { - double queue_size = CompileBroker::queue_size(level); - int comp_count = compiler_count(level); - double k = queue_size / (feedback_k * comp_count) + 1; - - // Increase C1 compile threshold when the code cache is filled more - // than specified by IncreaseFirstTierCompileThresholdAt percentage. - // The main intention is to keep enough free space for C2 compiled code - // to achieve peak performance if the code cache is under stress. - if ((TieredStopAtLevel == CompLevel_full_optimization) && (level != CompLevel_full_optimization)) { - double current_reverse_free_ratio = CodeCache::reverse_free_ratio(CodeCache::get_code_blob_type(level)); - if (current_reverse_free_ratio > _increase_threshold_at_ratio) { - k *= exp(current_reverse_free_ratio - _increase_threshold_at_ratio); - } - } - return k; -} - -// Call and loop predicates determine whether a transition to a higher -// compilation level should be performed (pointers to predicate functions -// are passed to common()). -// Tier?LoadFeedback is basically a coefficient that determines of -// how many methods per compiler thread can be in the queue before -// the threshold values double. -bool AdvancedThresholdPolicy::loop_predicate(int i, int b, CompLevel cur_level, Method* method) { - switch(cur_level) { - case CompLevel_aot: { - double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); - return loop_predicate_helper(i, b, k, method); - } - case CompLevel_none: - case CompLevel_limited_profile: { - double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); - return loop_predicate_helper(i, b, k, method); - } - case CompLevel_full_profile: { - double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback); - return loop_predicate_helper(i, b, k, method); - } - default: - return true; - } -} - -bool AdvancedThresholdPolicy::call_predicate(int i, int b, CompLevel cur_level, Method* method) { - switch(cur_level) { - case CompLevel_aot: { - double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); - return call_predicate_helper(i, b, k, method); - } - case CompLevel_none: - case CompLevel_limited_profile: { - double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); - return call_predicate_helper(i, b, k, method); - } - case CompLevel_full_profile: { - double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback); - return call_predicate_helper(i, b, k, method); - } - default: - return true; - } -} - -// If a method is old enough and is still in the interpreter we would want to -// start profiling without waiting for the compiled method to arrive. -// We also take the load on compilers into the account. -bool AdvancedThresholdPolicy::should_create_mdo(Method* method, CompLevel cur_level) { - if (cur_level == CompLevel_none && - CompileBroker::queue_size(CompLevel_full_optimization) <= - Tier3DelayOn * compiler_count(CompLevel_full_optimization)) { - int i = method->invocation_count(); - int b = method->backedge_count(); - double k = Tier0ProfilingStartPercentage / 100.0; - return call_predicate_helper(i, b, k, method) || loop_predicate_helper(i, b, k, method); - } - return false; -} - -// Inlining control: if we're compiling a profiled method with C1 and the callee -// is known to have OSRed in a C2 version, don't inline it. -bool AdvancedThresholdPolicy::should_not_inline(ciEnv* env, ciMethod* callee) { - CompLevel comp_level = (CompLevel)env->comp_level(); - if (comp_level == CompLevel_full_profile || - comp_level == CompLevel_limited_profile) { - return callee->highest_osr_comp_level() == CompLevel_full_optimization; - } - return false; -} - -// Create MDO if necessary. -void AdvancedThresholdPolicy::create_mdo(const methodHandle& mh, JavaThread* THREAD) { - if (mh->is_native() || - mh->is_abstract() || - mh->is_accessor() || - mh->is_constant_getter()) { - return; - } - if (mh->method_data() == NULL) { - Method::build_interpreter_method_data(mh, CHECK_AND_CLEAR); - } -} - - -/* - * Method states: - * 0 - interpreter (CompLevel_none) - * 1 - pure C1 (CompLevel_simple) - * 2 - C1 with invocation and backedge counting (CompLevel_limited_profile) - * 3 - C1 with full profiling (CompLevel_full_profile) - * 4 - C2 (CompLevel_full_optimization) - * - * Common state transition patterns: - * a. 0 -> 3 -> 4. - * The most common path. But note that even in this straightforward case - * profiling can start at level 0 and finish at level 3. - * - * b. 0 -> 2 -> 3 -> 4. - * This case occurs when the load on C2 is deemed too high. So, instead of transitioning - * into state 3 directly and over-profiling while a method is in the C2 queue we transition to - * level 2 and wait until the load on C2 decreases. This path is disabled for OSRs. - * - * c. 0 -> (3->2) -> 4. - * In this case we enqueue a method for compilation at level 3, but the C1 queue is long enough - * to enable the profiling to fully occur at level 0. In this case we change the compilation level - * of the method to 2 while the request is still in-queue, because it'll allow it to run much faster - * without full profiling while c2 is compiling. - * - * d. 0 -> 3 -> 1 or 0 -> 2 -> 1. - * After a method was once compiled with C1 it can be identified as trivial and be compiled to - * level 1. These transition can also occur if a method can't be compiled with C2 but can with C1. - * - * e. 0 -> 4. - * This can happen if a method fails C1 compilation (it will still be profiled in the interpreter) - * or because of a deopt that didn't require reprofiling (compilation won't happen in this case because - * the compiled version already exists). - * - * Note that since state 0 can be reached from any other state via deoptimization different loops - * are possible. - * - */ - -// Common transition function. Given a predicate determines if a method should transition to another level. -CompLevel AdvancedThresholdPolicy::common(Predicate p, Method* method, CompLevel cur_level, bool disable_feedback) { - CompLevel next_level = cur_level; - int i = method->invocation_count(); - int b = method->backedge_count(); - - if (is_trivial(method)) { - next_level = CompLevel_simple; - } else { - switch(cur_level) { - default: break; - case CompLevel_aot: { - // If we were at full profile level, would we switch to full opt? - if (common(p, method, CompLevel_full_profile, disable_feedback) == CompLevel_full_optimization) { - next_level = CompLevel_full_optimization; - } else if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <= - Tier3DelayOff * compiler_count(CompLevel_full_optimization) && - (this->*p)(i, b, cur_level, method))) { - next_level = CompLevel_full_profile; - } - } - break; - case CompLevel_none: - // If we were at full profile level, would we switch to full opt? - if (common(p, method, CompLevel_full_profile, disable_feedback) == CompLevel_full_optimization) { - next_level = CompLevel_full_optimization; - } else if ((this->*p)(i, b, cur_level, method)) { -#if INCLUDE_JVMCI - if (EnableJVMCI && UseJVMCICompiler) { - // Since JVMCI takes a while to warm up, its queue inevitably backs up during - // early VM execution. As of 2014-06-13, JVMCI's inliner assumes that the root - // compilation method and all potential inlinees have mature profiles (which - // includes type profiling). If it sees immature profiles, JVMCI's inliner - // can perform pathologically bad (e.g., causing OutOfMemoryErrors due to - // exploring/inlining too many graphs). Since a rewrite of the inliner is - // in progress, we simply disable the dialing back heuristic for now and will - // revisit this decision once the new inliner is completed. - next_level = CompLevel_full_profile; - } else -#endif - { - // C1-generated fully profiled code is about 30% slower than the limited profile - // code that has only invocation and backedge counters. The observation is that - // if C2 queue is large enough we can spend too much time in the fully profiled code - // while waiting for C2 to pick the method from the queue. To alleviate this problem - // we introduce a feedback on the C2 queue size. If the C2 queue is sufficiently long - // we choose to compile a limited profiled version and then recompile with full profiling - // when the load on C2 goes down. - if (!disable_feedback && CompileBroker::queue_size(CompLevel_full_optimization) > - Tier3DelayOn * compiler_count(CompLevel_full_optimization)) { - next_level = CompLevel_limited_profile; - } else { - next_level = CompLevel_full_profile; - } - } - } - break; - case CompLevel_limited_profile: - if (is_method_profiled(method)) { - // Special case: we got here because this method was fully profiled in the interpreter. - next_level = CompLevel_full_optimization; - } else { - MethodData* mdo = method->method_data(); - if (mdo != NULL) { - if (mdo->would_profile()) { - if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <= - Tier3DelayOff * compiler_count(CompLevel_full_optimization) && - (this->*p)(i, b, cur_level, method))) { - next_level = CompLevel_full_profile; - } - } else { - next_level = CompLevel_full_optimization; - } - } else { - // If there is no MDO we need to profile - if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <= - Tier3DelayOff * compiler_count(CompLevel_full_optimization) && - (this->*p)(i, b, cur_level, method))) { - next_level = CompLevel_full_profile; - } - } - } - break; - case CompLevel_full_profile: - { - MethodData* mdo = method->method_data(); - if (mdo != NULL) { - if (mdo->would_profile()) { - int mdo_i = mdo->invocation_count_delta(); - int mdo_b = mdo->backedge_count_delta(); - if ((this->*p)(mdo_i, mdo_b, cur_level, method)) { - next_level = CompLevel_full_optimization; - } - } else { - next_level = CompLevel_full_optimization; - } - } - } - break; - } - } - return MIN2(next_level, (CompLevel)TieredStopAtLevel); -} - -// Determine if a method should be compiled with a normal entry point at a different level. -CompLevel AdvancedThresholdPolicy::call_event(Method* method, CompLevel cur_level, JavaThread * thread) { - CompLevel osr_level = MIN2((CompLevel) method->highest_osr_comp_level(), - common(&AdvancedThresholdPolicy::loop_predicate, method, cur_level, true)); - CompLevel next_level = common(&AdvancedThresholdPolicy::call_predicate, method, cur_level); - - // If OSR method level is greater than the regular method level, the levels should be - // equalized by raising the regular method level in order to avoid OSRs during each - // invocation of the method. - if (osr_level == CompLevel_full_optimization && cur_level == CompLevel_full_profile) { - MethodData* mdo = method->method_data(); - guarantee(mdo != NULL, "MDO should not be NULL"); - if (mdo->invocation_count() >= 1) { - next_level = CompLevel_full_optimization; - } - } else { - next_level = MAX2(osr_level, next_level); - } -#if INCLUDE_JVMCI - if (UseJVMCICompiler) { - next_level = JVMCIRuntime::adjust_comp_level(method, false, next_level, thread); - } -#endif - return next_level; -} - -// Determine if we should do an OSR compilation of a given method. -CompLevel AdvancedThresholdPolicy::loop_event(Method* method, CompLevel cur_level, JavaThread * thread) { - CompLevel next_level = common(&AdvancedThresholdPolicy::loop_predicate, method, cur_level, true); - if (cur_level == CompLevel_none) { - // If there is a live OSR method that means that we deopted to the interpreter - // for the transition. - CompLevel osr_level = MIN2((CompLevel)method->highest_osr_comp_level(), next_level); - if (osr_level > CompLevel_none) { - return osr_level; - } - } -#if INCLUDE_JVMCI - if (UseJVMCICompiler) { - next_level = JVMCIRuntime::adjust_comp_level(method, true, next_level, thread); - } -#endif - return next_level; -} - -// Update the rate and submit compile -void AdvancedThresholdPolicy::submit_compile(const methodHandle& mh, int bci, CompLevel level, JavaThread* thread) { - int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count(); - update_rate(os::javaTimeMillis(), mh()); - CompileBroker::compile_method(mh, bci, level, mh, hot_count, CompileTask::Reason_Tiered, thread); -} - -bool AdvancedThresholdPolicy::maybe_switch_to_aot(const methodHandle& mh, CompLevel cur_level, CompLevel next_level, JavaThread* thread) { - if (UseAOT && !delay_compilation_during_startup()) { - if (cur_level == CompLevel_full_profile || cur_level == CompLevel_none) { - // If the current level is full profile or interpreter and we're switching to any other level, - // activate the AOT code back first so that we won't waste time overprofiling. - compile(mh, InvocationEntryBci, CompLevel_aot, thread); - // Fall through for JIT compilation. - } - if (next_level == CompLevel_limited_profile && cur_level != CompLevel_aot && mh->has_aot_code()) { - // If the next level is limited profile, use the aot code (if there is any), - // since it's essentially the same thing. - compile(mh, InvocationEntryBci, CompLevel_aot, thread); - // Not need to JIT, we're done. - return true; - } - } - return false; -} - - -// Handle the invocation event. -void AdvancedThresholdPolicy::method_invocation_event(const methodHandle& mh, const methodHandle& imh, - CompLevel level, CompiledMethod* nm, JavaThread* thread) { - if (should_create_mdo(mh(), level)) { - create_mdo(mh, thread); - } - CompLevel next_level = call_event(mh(), level, thread); - if (next_level != level) { - if (maybe_switch_to_aot(mh, level, next_level, thread)) { - // No JITting necessary - return; - } - if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh)) { - compile(mh, InvocationEntryBci, next_level, thread); - } - } -} - -// Handle the back branch event. Notice that we can compile the method -// with a regular entry from here. -void AdvancedThresholdPolicy::method_back_branch_event(const methodHandle& mh, const methodHandle& imh, - int bci, CompLevel level, CompiledMethod* nm, JavaThread* thread) { - if (should_create_mdo(mh(), level)) { - create_mdo(mh, thread); - } - // Check if MDO should be created for the inlined method - if (should_create_mdo(imh(), level)) { - create_mdo(imh, thread); - } - - if (is_compilation_enabled()) { - CompLevel next_osr_level = loop_event(imh(), level, thread); - CompLevel max_osr_level = (CompLevel)imh->highest_osr_comp_level(); - // At the very least compile the OSR version - if (!CompileBroker::compilation_is_in_queue(imh) && (next_osr_level != level)) { - compile(imh, bci, next_osr_level, thread); - } - - // Use loop event as an opportunity to also check if there's been - // enough calls. - CompLevel cur_level, next_level; - if (mh() != imh()) { // If there is an enclosing method - if (level == CompLevel_aot) { - // Recompile the enclosing method to prevent infinite OSRs. Stay at AOT level while it's compiling. - if (max_osr_level != CompLevel_none && !CompileBroker::compilation_is_in_queue(mh)) { - compile(mh, InvocationEntryBci, MIN2((CompLevel)TieredStopAtLevel, CompLevel_full_profile), thread); - } - } else { - // Current loop event level is not AOT - guarantee(nm != NULL, "Should have nmethod here"); - cur_level = comp_level(mh()); - next_level = call_event(mh(), cur_level, thread); - - if (max_osr_level == CompLevel_full_optimization) { - // The inlinee OSRed to full opt, we need to modify the enclosing method to avoid deopts - bool make_not_entrant = false; - if (nm->is_osr_method()) { - // This is an osr method, just make it not entrant and recompile later if needed - make_not_entrant = true; - } else { - if (next_level != CompLevel_full_optimization) { - // next_level is not full opt, so we need to recompile the - // enclosing method without the inlinee - cur_level = CompLevel_none; - make_not_entrant = true; - } - } - if (make_not_entrant) { - if (PrintTieredEvents) { - int osr_bci = nm->is_osr_method() ? nm->osr_entry_bci() : InvocationEntryBci; - print_event(MAKE_NOT_ENTRANT, mh(), mh(), osr_bci, level); - } - nm->make_not_entrant(); - } - } - // Fix up next_level if necessary to avoid deopts - if (next_level == CompLevel_limited_profile && max_osr_level == CompLevel_full_profile) { - next_level = CompLevel_full_profile; - } - if (cur_level != next_level) { - if (!maybe_switch_to_aot(mh, cur_level, next_level, thread) && !CompileBroker::compilation_is_in_queue(mh)) { - compile(mh, InvocationEntryBci, next_level, thread); - } - } - } - } else { - cur_level = comp_level(mh()); - next_level = call_event(mh(), cur_level, thread); - if (next_level != cur_level) { - if (!maybe_switch_to_aot(mh, cur_level, next_level, thread) && !CompileBroker::compilation_is_in_queue(mh)) { - compile(mh, InvocationEntryBci, next_level, thread); - } - } - } - } -} - -#endif // TIERED diff --git a/src/hotspot/share/runtime/advancedThresholdPolicy.hpp b/src/hotspot/share/runtime/advancedThresholdPolicy.hpp deleted file mode 100644 --- a/src/hotspot/share/runtime/advancedThresholdPolicy.hpp +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP -#define SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP - -#include "runtime/simpleThresholdPolicy.hpp" - -#ifdef TIERED -class CompileTask; -class CompileQueue; - -/* - * The system supports 5 execution levels: - * * level 0 - interpreter - * * level 1 - C1 with full optimization (no profiling) - * * level 2 - C1 with invocation and backedge counters - * * level 3 - C1 with full profiling (level 2 + MDO) - * * level 4 - C2 - * - * Levels 0, 2 and 3 periodically notify the runtime about the current value of the counters - * (invocation counters and backedge counters). The frequency of these notifications is - * different at each level. These notifications are used by the policy to decide what transition - * to make. - * - * Execution starts at level 0 (interpreter), then the policy can decide either to compile the - * method at level 3 or level 2. The decision is based on the following factors: - * 1. The length of the C2 queue determines the next level. The observation is that level 2 - * is generally faster than level 3 by about 30%, therefore we would want to minimize the time - * a method spends at level 3. We should only spend the time at level 3 that is necessary to get - * adequate profiling. So, if the C2 queue is long enough it is more beneficial to go first to - * level 2, because if we transitioned to level 3 we would be stuck there until our C2 compile - * request makes its way through the long queue. When the load on C2 recedes we are going to - * recompile at level 3 and start gathering profiling information. - * 2. The length of C1 queue is used to dynamically adjust the thresholds, so as to introduce - * additional filtering if the compiler is overloaded. The rationale is that by the time a - * method gets compiled it can become unused, so it doesn't make sense to put too much onto the - * queue. - * - * After profiling is completed at level 3 the transition is made to level 4. Again, the length - * of the C2 queue is used as a feedback to adjust the thresholds. - * - * After the first C1 compile some basic information is determined about the code like the number - * of the blocks and the number of the loops. Based on that it can be decided that a method - * is trivial and compiling it with C1 will yield the same code. In this case the method is - * compiled at level 1 instead of 4. - * - * We also support profiling at level 0. If C1 is slow enough to produce the level 3 version of - * the code and the C2 queue is sufficiently small we can decide to start profiling in the - * interpreter (and continue profiling in the compiled code once the level 3 version arrives). - * If the profiling at level 0 is fully completed before level 3 version is produced, a level 2 - * version is compiled instead in order to run faster waiting for a level 4 version. - * - * Compile queues are implemented as priority queues - for each method in the queue we compute - * the event rate (the number of invocation and backedge counter increments per unit of time). - * When getting an element off the queue we pick the one with the largest rate. Maintaining the - * rate also allows us to remove stale methods (the ones that got on the queue but stopped - * being used shortly after that). -*/ - -/* Command line options: - * - Tier?InvokeNotifyFreqLog and Tier?BackedgeNotifyFreqLog control the frequency of method - * invocation and backedge notifications. Basically every n-th invocation or backedge a mutator thread - * makes a call into the runtime. - * - * - Tier?InvocationThreshold, Tier?CompileThreshold, Tier?BackEdgeThreshold, Tier?MinInvocationThreshold control - * compilation thresholds. - * Level 2 thresholds are not used and are provided for option-compatibility and potential future use. - * Other thresholds work as follows: - * - * Transition from interpreter (level 0) to C1 with full profiling (level 3) happens when - * the following predicate is true (X is the level): - * - * i > TierXInvocationThreshold * s || (i > TierXMinInvocationThreshold * s && i + b > TierXCompileThreshold * s), - * - * where $i$ is the number of method invocations, $b$ number of backedges and $s$ is the scaling - * coefficient that will be discussed further. - * The intuition is to equalize the time that is spend profiling each method. - * The same predicate is used to control the transition from level 3 to level 4 (C2). It should be - * noted though that the thresholds are relative. Moreover i and b for the 0->3 transition come - * from Method* and for 3->4 transition they come from MDO (since profiled invocations are - * counted separately). Finally, if a method does not contain anything worth profiling, a transition - * from level 3 to level 4 occurs without considering thresholds (e.g., with fewer invocations than - * what is specified by Tier4InvocationThreshold). - * - * OSR transitions are controlled simply with b > TierXBackEdgeThreshold * s predicates. - * - * - Tier?LoadFeedback options are used to automatically scale the predicates described above depending - * on the compiler load. The scaling coefficients are computed as follows: - * - * s = queue_size_X / (TierXLoadFeedback * compiler_count_X) + 1, - * - * where queue_size_X is the current size of the compiler queue of level X, and compiler_count_X - * is the number of level X compiler threads. - * - * Basically these parameters describe how many methods should be in the compile queue - * per compiler thread before the scaling coefficient increases by one. - * - * This feedback provides the mechanism to automatically control the flow of compilation requests - * depending on the machine speed, mutator load and other external factors. - * - * - Tier3DelayOn and Tier3DelayOff parameters control another important feedback loop. - * Consider the following observation: a method compiled with full profiling (level 3) - * is about 30% slower than a method at level 2 (just invocation and backedge counters, no MDO). - * Normally, the following transitions will occur: 0->3->4. The problem arises when the C2 queue - * gets congested and the 3->4 transition is delayed. While the method is the C2 queue it continues - * executing at level 3 for much longer time than is required by the predicate and at suboptimal speed. - * The idea is to dynamically change the behavior of the system in such a way that if a substantial - * load on C2 is detected we would first do the 0->2 transition allowing a method to run faster. - * And then when the load decreases to allow 2->3 transitions. - * - * Tier3Delay* parameters control this switching mechanism. - * Tier3DelayOn is the number of methods in the C2 queue per compiler thread after which the policy - * no longer does 0->3 transitions but does 0->2 transitions instead. - * Tier3DelayOff switches the original behavior back when the number of methods in the C2 queue - * per compiler thread falls below the specified amount. - * The hysteresis is necessary to avoid jitter. - * - * - TieredCompileTaskTimeout is the amount of time an idle method can spend in the compile queue. - * Basically, since we use the event rate d(i + b)/dt as a value of priority when selecting a method to - * compile from the compile queue, we also can detect stale methods for which the rate has been - * 0 for some time in the same iteration. Stale methods can appear in the queue when an application - * abruptly changes its behavior. - * - * - TieredStopAtLevel, is used mostly for testing. It allows to bypass the policy logic and stick - * to a given level. For example it's useful to set TieredStopAtLevel = 1 in order to compile everything - * with pure c1. - * - * - Tier0ProfilingStartPercentage allows the interpreter to start profiling when the inequalities in the - * 0->3 predicate are already exceeded by the given percentage but the level 3 version of the - * method is still not ready. We can even go directly from level 0 to 4 if c1 doesn't produce a compiled - * version in time. This reduces the overall transition to level 4 and decreases the startup time. - * Note that this behavior is also guarded by the Tier3Delay mechanism: when the c2 queue is too long - * these is not reason to start profiling prematurely. - * - * - TieredRateUpdateMinTime and TieredRateUpdateMaxTime are parameters of the rate computation. - * Basically, the rate is not computed more frequently than TieredRateUpdateMinTime and is considered - * to be zero if no events occurred in TieredRateUpdateMaxTime. - */ - - -class AdvancedThresholdPolicy : public SimpleThresholdPolicy { - jlong _start_time; - - // Call and loop predicates determine whether a transition to a higher compilation - // level should be performed (pointers to predicate functions are passed to common(). - // Predicates also take compiler load into account. - typedef bool (AdvancedThresholdPolicy::*Predicate)(int i, int b, CompLevel cur_level, Method* method); - bool call_predicate(int i, int b, CompLevel cur_level, Method* method); - bool loop_predicate(int i, int b, CompLevel cur_level, Method* method); - // Common transition function. Given a predicate determines if a method should transition to another level. - CompLevel common(Predicate p, Method* method, CompLevel cur_level, bool disable_feedback = false); - // Transition functions. - // call_event determines if a method should be compiled at a different - // level with a regular invocation entry. - CompLevel call_event(Method* method, CompLevel cur_level, JavaThread * thread); - // loop_event checks if a method should be OSR compiled at a different - // level. - CompLevel loop_event(Method* method, CompLevel cur_level, JavaThread * thread); - // Has a method been long around? - // We don't remove old methods from the compile queue even if they have - // very low activity (see select_task()). - inline bool is_old(Method* method); - // Was a given method inactive for a given number of milliseconds. - // If it is, we would remove it from the queue (see select_task()). - inline bool is_stale(jlong t, jlong timeout, Method* m); - // Compute the weight of the method for the compilation scheduling - inline double weight(Method* method); - // Apply heuristics and return true if x should be compiled before y - inline bool compare_methods(Method* x, Method* y); - // Compute event rate for a given method. The rate is the number of event (invocations + backedges) - // per millisecond. - inline void update_rate(jlong t, Method* m); - // Compute threshold scaling coefficient - inline double threshold_scale(CompLevel level, int feedback_k); - // If a method is old enough and is still in the interpreter we would want to - // start profiling without waiting for the compiled method to arrive. This function - // determines whether we should do that. - inline bool should_create_mdo(Method* method, CompLevel cur_level); - // Create MDO if necessary. - void create_mdo(const methodHandle& mh, JavaThread* thread); - // Is method profiled enough? - bool is_method_profiled(Method* method); - - double _increase_threshold_at_ratio; - - bool maybe_switch_to_aot(const methodHandle& mh, CompLevel cur_level, CompLevel next_level, JavaThread* thread); - -protected: - void print_specific(EventType type, const methodHandle& mh, const methodHandle& imh, int bci, CompLevel level); - - void set_increase_threshold_at_ratio() { _increase_threshold_at_ratio = 100 / (100 - (double)IncreaseFirstTierCompileThresholdAt); } - void set_start_time(jlong t) { _start_time = t; } - jlong start_time() const { return _start_time; } - - // Submit a given method for compilation (and update the rate). - virtual void submit_compile(const methodHandle& mh, int bci, CompLevel level, JavaThread* thread); - // event() from SimpleThresholdPolicy would call these. - virtual void method_invocation_event(const methodHandle& method, const methodHandle& inlinee, - CompLevel level, CompiledMethod* nm, JavaThread* thread); - virtual void method_back_branch_event(const methodHandle& method, const methodHandle& inlinee, - int bci, CompLevel level, CompiledMethod* nm, JavaThread* thread); -public: - AdvancedThresholdPolicy() : _start_time(0) { } - // Select task is called by CompileBroker. We should return a task or NULL. - virtual CompileTask* select_task(CompileQueue* compile_queue); - virtual void initialize(); - virtual bool should_not_inline(ciEnv* env, ciMethod* callee); - -}; - -#endif // TIERED - -#endif // SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -1605,7 +1605,7 @@ } void Arguments::set_tiered_flags() { - // With tiered, set default policy to AdvancedThresholdPolicy, which is 3. + // With tiered, set default policy to SimpleThresholdPolicy, which is 2 or 3. if (FLAG_IS_DEFAULT(CompilationPolicyChoice)) { FLAG_SET_DEFAULT(CompilationPolicyChoice, 3); } diff --git a/src/hotspot/share/runtime/compilationPolicy.cpp b/src/hotspot/share/runtime/compilationPolicy.cpp --- a/src/hotspot/share/runtime/compilationPolicy.cpp +++ b/src/hotspot/share/runtime/compilationPolicy.cpp @@ -33,7 +33,6 @@ #include "oops/method.inline.hpp" #include "oops/oop.inline.hpp" #include "prims/nativeLookup.hpp" -#include "runtime/advancedThresholdPolicy.hpp" #include "runtime/compilationPolicy.hpp" #include "runtime/frame.hpp" #include "runtime/handles.inline.hpp" @@ -68,19 +67,13 @@ #endif break; case 2: + case 3: #ifdef TIERED CompilationPolicy::set_policy(new SimpleThresholdPolicy()); #else Unimplemented(); #endif break; - case 3: -#ifdef TIERED - CompilationPolicy::set_policy(new AdvancedThresholdPolicy()); -#else - Unimplemented(); -#endif - break; default: fatal("CompilationPolicyChoice must be in the range: [0-3]"); } diff --git a/src/hotspot/share/runtime/simpleThresholdPolicy.cpp b/src/hotspot/share/runtime/simpleThresholdPolicy.cpp --- a/src/hotspot/share/runtime/simpleThresholdPolicy.cpp +++ b/src/hotspot/share/runtime/simpleThresholdPolicy.cpp @@ -140,20 +140,33 @@ } void SimpleThresholdPolicy::initialize() { - if (FLAG_IS_DEFAULT(CICompilerCount)) { - FLAG_SET_DEFAULT(CICompilerCount, 3); - } int count = CICompilerCount; #ifdef _LP64 - // On 64-bit systems, scale the number of compiler threads with - // the number of cores available on the system. Scaling is not - // performed on 32-bit systems because it can lead to exhaustion - // of the virtual memory address space available to the JVM. + // Turn on ergonomic compiler count selection + if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) { + FLAG_SET_DEFAULT(CICompilerCountPerCPU, true); + } if (CICompilerCountPerCPU) { - count = MAX2(log2_intptr(os::active_processor_count()) * 3 / 2, 2); + // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n + int log_cpu = log2_intptr(os::active_processor_count()); + int loglog_cpu = log2_intptr(MAX2(log_cpu, 1)); + count = MAX2(log_cpu * loglog_cpu * 3 / 2, 2); + FLAG_SET_ERGO(intx, CICompilerCount, count); + } +#else + // On 32-bit systems, the number of compiler threads is limited to 3. + // On these systems, the virtual address space available to the JVM + // is usually limited to 2-4 GB (the exact value depends on the platform). + // As the compilers (especially C2) can consume a large amount of + // memory, scaling the number of compiler threads with the number of + // available cores can result in the exhaustion of the address space + /// available to the VM and thus cause the VM to crash. + if (FLAG_IS_DEFAULT(CICompilerCount)) { + count = 3; FLAG_SET_ERGO(intx, CICompilerCount, count); } #endif + if (TieredStopAtLevel < CompLevel_full_optimization) { // No C2 compiler thread required set_c1_count(count); @@ -162,6 +175,22 @@ set_c2_count(MAX2(count - c1_count(), 1)); } assert(count == c1_count() + c2_count(), "inconsistent compiler thread count"); + + // Some inlining tuning +#ifdef X86 + if (FLAG_IS_DEFAULT(InlineSmallCode)) { + FLAG_SET_DEFAULT(InlineSmallCode, 2000); + } +#endif + +#if defined SPARC || defined AARCH64 + if (FLAG_IS_DEFAULT(InlineSmallCode)) { + FLAG_SET_DEFAULT(InlineSmallCode, 2500); + } +#endif + + set_increase_threshold_at_ratio(); + set_start_time(os::javaTimeMillis()); } void SimpleThresholdPolicy::set_carry_if_necessary(InvocationCounter *counter) { @@ -186,7 +215,66 @@ // Called with the queue locked and with at least one element CompileTask* SimpleThresholdPolicy::select_task(CompileQueue* compile_queue) { - return select_task_helper(compile_queue); + CompileTask *max_blocking_task = NULL; + CompileTask *max_task = NULL; + Method* max_method = NULL; + jlong t = os::javaTimeMillis(); + // Iterate through the queue and find a method with a maximum rate. + for (CompileTask* task = compile_queue->first(); task != NULL;) { + CompileTask* next_task = task->next(); + Method* method = task->method(); + update_rate(t, method); + if (max_task == NULL) { + max_task = task; + max_method = method; + } else { + // If a method has been stale for some time, remove it from the queue. + // Blocking tasks and tasks submitted from whitebox API don't become stale + if (task->can_become_stale() && is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) { + if (PrintTieredEvents) { + print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level()); + } + compile_queue->remove_and_mark_stale(task); + method->clear_queued_for_compilation(); + task = next_task; + continue; + } + + // Select a method with a higher rate + if (compare_methods(method, max_method)) { + max_task = task; + max_method = method; + } + } + + if (task->is_blocking()) { + if (max_blocking_task == NULL || compare_methods(method, max_blocking_task->method())) { + max_blocking_task = task; + } + } + + task = next_task; + } + + if (max_blocking_task != NULL) { + // In blocking compilation mode, the CompileBroker will make + // compilations submitted by a JVMCI compiler thread non-blocking. These + // compilations should be scheduled after all blocking compilations + // to service non-compiler related compilations sooner and reduce the + // chance of such compilations timing out. + max_task = max_blocking_task; + max_method = max_task->method(); + } + + if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile + && is_method_profiled(max_method)) { + max_task->set_comp_level(CompLevel_limited_profile); + if (PrintTieredEvents) { + print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level()); + } + } + + return max_task; } void SimpleThresholdPolicy::reprofile(ScopeDesc* trap_scope, bool is_osr) { @@ -284,26 +372,150 @@ } } -// Tell the broker to compile the method +// Update the rate and submit compile void SimpleThresholdPolicy::submit_compile(const methodHandle& mh, int bci, CompLevel level, JavaThread* thread) { int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count(); + update_rate(os::javaTimeMillis(), mh()); CompileBroker::compile_method(mh, bci, level, mh, hot_count, CompileTask::Reason_Tiered, thread); } +// Print an event. +void SimpleThresholdPolicy::print_specific(EventType type, const methodHandle& mh, const methodHandle& imh, + int bci, CompLevel level) { + tty->print(" rate="); + if (mh->prev_time() == 0) tty->print("n/a"); + else tty->print("%f", mh->rate()); + + tty->print(" k=%.2lf,%.2lf", threshold_scale(CompLevel_full_profile, Tier3LoadFeedback), + threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback)); + +} + +// update_rate() is called from select_task() while holding a compile queue lock. +void SimpleThresholdPolicy::update_rate(jlong t, Method* m) { + // Skip update if counters are absent. + // Can't allocate them since we are holding compile queue lock. + if (m->method_counters() == NULL) return; + + if (is_old(m)) { + // We don't remove old methods from the queue, + // so we can just zero the rate. + m->set_rate(0); + return; + } + + // We don't update the rate if we've just came out of a safepoint. + // delta_s is the time since last safepoint in milliseconds. + jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint(); + jlong delta_t = t - (m->prev_time() != 0 ? m->prev_time() : start_time()); // milliseconds since the last measurement + // How many events were there since the last time? + int event_count = m->invocation_count() + m->backedge_count(); + int delta_e = event_count - m->prev_event_count(); + + // We should be running for at least 1ms. + if (delta_s >= TieredRateUpdateMinTime) { + // And we must've taken the previous point at least 1ms before. + if (delta_t >= TieredRateUpdateMinTime && delta_e > 0) { + m->set_prev_time(t); + m->set_prev_event_count(event_count); + m->set_rate((float)delta_e / (float)delta_t); // Rate is events per millisecond + } else { + if (delta_t > TieredRateUpdateMaxTime && delta_e == 0) { + // If nothing happened for 25ms, zero the rate. Don't modify prev values. + m->set_rate(0); + } + } + } +} + +// Check if this method has been stale from a given number of milliseconds. +// See select_task(). +bool SimpleThresholdPolicy::is_stale(jlong t, jlong timeout, Method* m) { + jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint(); + jlong delta_t = t - m->prev_time(); + if (delta_t > timeout && delta_s > timeout) { + int event_count = m->invocation_count() + m->backedge_count(); + int delta_e = event_count - m->prev_event_count(); + // Return true if there were no events. + return delta_e == 0; + } + return false; +} + +// We don't remove old methods from the compile queue even if they have +// very low activity. See select_task(). +bool SimpleThresholdPolicy::is_old(Method* method) { + return method->invocation_count() > 50000 || method->backedge_count() > 500000; +} + +double SimpleThresholdPolicy::weight(Method* method) { + return (double)(method->rate() + 1) * + (method->invocation_count() + 1) * (method->backedge_count() + 1); +} + +// Apply heuristics and return true if x should be compiled before y +bool SimpleThresholdPolicy::compare_methods(Method* x, Method* y) { + if (x->highest_comp_level() > y->highest_comp_level()) { + // recompilation after deopt + return true; + } else + if (x->highest_comp_level() == y->highest_comp_level()) { + if (weight(x) > weight(y)) { + return true; + } + } + return false; +} + +// Is method profiled enough? +bool SimpleThresholdPolicy::is_method_profiled(Method* method) { + MethodData* mdo = method->method_data(); + if (mdo != NULL) { + int i = mdo->invocation_count_delta(); + int b = mdo->backedge_count_delta(); + return call_predicate_helper(i, b, 1, method); + } + return false; +} + +double SimpleThresholdPolicy::threshold_scale(CompLevel level, int feedback_k) { + double queue_size = CompileBroker::queue_size(level); + int comp_count = compiler_count(level); + double k = queue_size / (feedback_k * comp_count) + 1; + + // Increase C1 compile threshold when the code cache is filled more + // than specified by IncreaseFirstTierCompileThresholdAt percentage. + // The main intention is to keep enough free space for C2 compiled code + // to achieve peak performance if the code cache is under stress. + if ((TieredStopAtLevel == CompLevel_full_optimization) && (level != CompLevel_full_optimization)) { + double current_reverse_free_ratio = CodeCache::reverse_free_ratio(CodeCache::get_code_blob_type(level)); + if (current_reverse_free_ratio > _increase_threshold_at_ratio) { + k *= exp(current_reverse_free_ratio - _increase_threshold_at_ratio); + } + } + return k; +} + // Call and loop predicates determine whether a transition to a higher // compilation level should be performed (pointers to predicate functions -// are passed to common() transition function). +// are passed to common()). +// Tier?LoadFeedback is basically a coefficient that determines of +// how many methods per compiler thread can be in the queue before +// the threshold values double. bool SimpleThresholdPolicy::loop_predicate(int i, int b, CompLevel cur_level, Method* method) { switch(cur_level) { case CompLevel_aot: { - return loop_predicate_helper(i, b, 1.0, method); + double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); + return loop_predicate_helper(i, b, k, method); } case CompLevel_none: case CompLevel_limited_profile: { - return loop_predicate_helper(i, b, 1.0, method); + double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); + return loop_predicate_helper(i, b, k, method); } case CompLevel_full_profile: { - return loop_predicate_helper(i, b, 1.0, method); + double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback); + return loop_predicate_helper(i, b, k, method); } default: return true; @@ -313,14 +525,17 @@ bool SimpleThresholdPolicy::call_predicate(int i, int b, CompLevel cur_level, Method* method) { switch(cur_level) { case CompLevel_aot: { - return call_predicate_helper(i, b, 1.0, method); + double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); + return call_predicate_helper(i, b, k, method); } case CompLevel_none: case CompLevel_limited_profile: { - return call_predicate_helper(i, b, 1.0, method); + double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); + return call_predicate_helper(i, b, k, method); } case CompLevel_full_profile: { - return call_predicate_helper(i, b, 1.0, method); + double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback); + return call_predicate_helper(i, b, k, method); } default: return true; @@ -341,31 +556,167 @@ return false; } +// If a method is old enough and is still in the interpreter we would want to +// start profiling without waiting for the compiled method to arrive. +// We also take the load on compilers into the account. +bool SimpleThresholdPolicy::should_create_mdo(Method* method, CompLevel cur_level) { + if (cur_level == CompLevel_none && + CompileBroker::queue_size(CompLevel_full_optimization) <= + Tier3DelayOn * compiler_count(CompLevel_full_optimization)) { + int i = method->invocation_count(); + int b = method->backedge_count(); + double k = Tier0ProfilingStartPercentage / 100.0; + return call_predicate_helper(i, b, k, method) || loop_predicate_helper(i, b, k, method); + } + return false; +} + +// Inlining control: if we're compiling a profiled method with C1 and the callee +// is known to have OSRed in a C2 version, don't inline it. +bool SimpleThresholdPolicy::should_not_inline(ciEnv* env, ciMethod* callee) { + CompLevel comp_level = (CompLevel)env->comp_level(); + if (comp_level == CompLevel_full_profile || + comp_level == CompLevel_limited_profile) { + return callee->highest_osr_comp_level() == CompLevel_full_optimization; + } + return false; +} + +// Create MDO if necessary. +void SimpleThresholdPolicy::create_mdo(const methodHandle& mh, JavaThread* THREAD) { + if (mh->is_native() || + mh->is_abstract() || + mh->is_accessor() || + mh->is_constant_getter()) { + return; + } + if (mh->method_data() == NULL) { + Method::build_interpreter_method_data(mh, CHECK_AND_CLEAR); + } +} + + +/* + * Method states: + * 0 - interpreter (CompLevel_none) + * 1 - pure C1 (CompLevel_simple) + * 2 - C1 with invocation and backedge counting (CompLevel_limited_profile) + * 3 - C1 with full profiling (CompLevel_full_profile) + * 4 - C2 (CompLevel_full_optimization) + * + * Common state transition patterns: + * a. 0 -> 3 -> 4. + * The most common path. But note that even in this straightforward case + * profiling can start at level 0 and finish at level 3. + * + * b. 0 -> 2 -> 3 -> 4. + * This case occurs when the load on C2 is deemed too high. So, instead of transitioning + * into state 3 directly and over-profiling while a method is in the C2 queue we transition to + * level 2 and wait until the load on C2 decreases. This path is disabled for OSRs. + * + * c. 0 -> (3->2) -> 4. + * In this case we enqueue a method for compilation at level 3, but the C1 queue is long enough + * to enable the profiling to fully occur at level 0. In this case we change the compilation level + * of the method to 2 while the request is still in-queue, because it'll allow it to run much faster + * without full profiling while c2 is compiling. + * + * d. 0 -> 3 -> 1 or 0 -> 2 -> 1. + * After a method was once compiled with C1 it can be identified as trivial and be compiled to + * level 1. These transition can also occur if a method can't be compiled with C2 but can with C1. + * + * e. 0 -> 4. + * This can happen if a method fails C1 compilation (it will still be profiled in the interpreter) + * or because of a deopt that didn't require reprofiling (compilation won't happen in this case because + * the compiled version already exists). + * + * Note that since state 0 can be reached from any other state via deoptimization different loops + * are possible. + * + */ + // Common transition function. Given a predicate determines if a method should transition to another level. -CompLevel SimpleThresholdPolicy::common(Predicate p, Method* method, CompLevel cur_level) { +CompLevel SimpleThresholdPolicy::common(Predicate p, Method* method, CompLevel cur_level, bool disable_feedback) { CompLevel next_level = cur_level; int i = method->invocation_count(); int b = method->backedge_count(); - if (is_trivial(method) && cur_level != CompLevel_aot) { + if (is_trivial(method)) { next_level = CompLevel_simple; } else { switch(cur_level) { - case CompLevel_aot: { - if ((this->*p)(i, b, cur_level, method)) { + default: break; + case CompLevel_aot: { + // If we were at full profile level, would we switch to full opt? + if (common(p, method, CompLevel_full_profile, disable_feedback) == CompLevel_full_optimization) { + next_level = CompLevel_full_optimization; + } else if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <= + Tier3DelayOff * compiler_count(CompLevel_full_optimization) && + (this->*p)(i, b, cur_level, method))) { next_level = CompLevel_full_profile; } } break; case CompLevel_none: // If we were at full profile level, would we switch to full opt? - if (common(p, method, CompLevel_full_profile) == CompLevel_full_optimization) { + if (common(p, method, CompLevel_full_profile, disable_feedback) == CompLevel_full_optimization) { next_level = CompLevel_full_optimization; } else if ((this->*p)(i, b, cur_level, method)) { - next_level = CompLevel_full_profile; +#if INCLUDE_JVMCI + if (EnableJVMCI && UseJVMCICompiler) { + // Since JVMCI takes a while to warm up, its queue inevitably backs up during + // early VM execution. As of 2014-06-13, JVMCI's inliner assumes that the root + // compilation method and all potential inlinees have mature profiles (which + // includes type profiling). If it sees immature profiles, JVMCI's inliner + // can perform pathologically bad (e.g., causing OutOfMemoryErrors due to + // exploring/inlining too many graphs). Since a rewrite of the inliner is + // in progress, we simply disable the dialing back heuristic for now and will + // revisit this decision once the new inliner is completed. + next_level = CompLevel_full_profile; + } else +#endif + { + // C1-generated fully profiled code is about 30% slower than the limited profile + // code that has only invocation and backedge counters. The observation is that + // if C2 queue is large enough we can spend too much time in the fully profiled code + // while waiting for C2 to pick the method from the queue. To alleviate this problem + // we introduce a feedback on the C2 queue size. If the C2 queue is sufficiently long + // we choose to compile a limited profiled version and then recompile with full profiling + // when the load on C2 goes down. + if (!disable_feedback && CompileBroker::queue_size(CompLevel_full_optimization) > + Tier3DelayOn * compiler_count(CompLevel_full_optimization)) { + next_level = CompLevel_limited_profile; + } else { + next_level = CompLevel_full_profile; + } + } } break; case CompLevel_limited_profile: + if (is_method_profiled(method)) { + // Special case: we got here because this method was fully profiled in the interpreter. + next_level = CompLevel_full_optimization; + } else { + MethodData* mdo = method->method_data(); + if (mdo != NULL) { + if (mdo->would_profile()) { + if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <= + Tier3DelayOff * compiler_count(CompLevel_full_optimization) && + (this->*p)(i, b, cur_level, method))) { + next_level = CompLevel_full_profile; + } + } else { + next_level = CompLevel_full_optimization; + } + } else { + // If there is no MDO we need to profile + if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <= + Tier3DelayOff * compiler_count(CompLevel_full_optimization) && + (this->*p)(i, b, cur_level, method))) { + next_level = CompLevel_full_profile; + } + } + } + break; case CompLevel_full_profile: { MethodData* mdo = method->method_data(); @@ -382,17 +733,15 @@ } } break; - default: - break; } } return MIN2(next_level, (CompLevel)TieredStopAtLevel); } // Determine if a method should be compiled with a normal entry point at a different level. -CompLevel SimpleThresholdPolicy::call_event(Method* method, CompLevel cur_level, JavaThread* thread) { +CompLevel SimpleThresholdPolicy::call_event(Method* method, CompLevel cur_level, JavaThread * thread) { CompLevel osr_level = MIN2((CompLevel) method->highest_osr_comp_level(), - common(&SimpleThresholdPolicy::loop_predicate, method, cur_level)); + common(&SimpleThresholdPolicy::loop_predicate, method, cur_level, true)); CompLevel next_level = common(&SimpleThresholdPolicy::call_predicate, method, cur_level); // If OSR method level is greater than the regular method level, the levels should be @@ -417,7 +766,7 @@ // Determine if we should do an OSR compilation of a given method. CompLevel SimpleThresholdPolicy::loop_event(Method* method, CompLevel cur_level, JavaThread* thread) { - CompLevel next_level = common(&SimpleThresholdPolicy::loop_predicate, method, cur_level); + CompLevel next_level = common(&SimpleThresholdPolicy::loop_predicate, method, cur_level, true); if (cur_level == CompLevel_none) { // If there is a live OSR method that means that we deopted to the interpreter // for the transition. @@ -434,13 +783,39 @@ return next_level; } +bool SimpleThresholdPolicy::maybe_switch_to_aot(const methodHandle& mh, CompLevel cur_level, CompLevel next_level, JavaThread* thread) { + if (UseAOT && !delay_compilation_during_startup()) { + if (cur_level == CompLevel_full_profile || cur_level == CompLevel_none) { + // If the current level is full profile or interpreter and we're switching to any other level, + // activate the AOT code back first so that we won't waste time overprofiling. + compile(mh, InvocationEntryBci, CompLevel_aot, thread); + // Fall through for JIT compilation. + } + if (next_level == CompLevel_limited_profile && cur_level != CompLevel_aot && mh->has_aot_code()) { + // If the next level is limited profile, use the aot code (if there is any), + // since it's essentially the same thing. + compile(mh, InvocationEntryBci, CompLevel_aot, thread); + // Not need to JIT, we're done. + return true; + } + } + return false; +} + // Handle the invocation event. void SimpleThresholdPolicy::method_invocation_event(const methodHandle& mh, const methodHandle& imh, - CompLevel level, CompiledMethod* nm, JavaThread* thread) { - if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh)) { - CompLevel next_level = call_event(mh(), level, thread); - if (next_level != level) { + CompLevel level, CompiledMethod* nm, JavaThread* thread) { + if (should_create_mdo(mh(), level)) { + create_mdo(mh, thread); + } + CompLevel next_level = call_event(mh(), level, thread); + if (next_level != level) { + if (maybe_switch_to_aot(mh, level, next_level, thread)) { + // No JITting necessary + return; + } + if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh)) { compile(mh, InvocationEntryBci, next_level, thread); } } @@ -450,25 +825,77 @@ // with a regular entry from here. void SimpleThresholdPolicy::method_back_branch_event(const methodHandle& mh, const methodHandle& imh, int bci, CompLevel level, CompiledMethod* nm, JavaThread* thread) { - // If the method is already compiling, quickly bail out. - if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh)) { - // Use loop event as an opportunity to also check there's been - // enough calls. - CompLevel cur_level = comp_level(mh()); - CompLevel next_level = call_event(mh(), cur_level, thread); - CompLevel next_osr_level = loop_event(mh(), level, thread); + if (should_create_mdo(mh(), level)) { + create_mdo(mh, thread); + } + // Check if MDO should be created for the inlined method + if (should_create_mdo(imh(), level)) { + create_mdo(imh, thread); + } - next_level = MAX2(next_level, - next_osr_level < CompLevel_full_optimization ? next_osr_level : cur_level); - bool is_compiling = false; - if (next_level != cur_level) { - compile(mh, InvocationEntryBci, next_level, thread); - is_compiling = true; + if (is_compilation_enabled()) { + CompLevel next_osr_level = loop_event(imh(), level, thread); + CompLevel max_osr_level = (CompLevel)imh->highest_osr_comp_level(); + // At the very least compile the OSR version + if (!CompileBroker::compilation_is_in_queue(imh) && (next_osr_level != level)) { + compile(imh, bci, next_osr_level, thread); } - // Do the OSR version - if (!is_compiling && next_osr_level != level) { - compile(mh, bci, next_osr_level, thread); + // Use loop event as an opportunity to also check if there's been + // enough calls. + CompLevel cur_level, next_level; + if (mh() != imh()) { // If there is an enclosing method + if (level == CompLevel_aot) { + // Recompile the enclosing method to prevent infinite OSRs. Stay at AOT level while it's compiling. + if (max_osr_level != CompLevel_none && !CompileBroker::compilation_is_in_queue(mh)) { + compile(mh, InvocationEntryBci, MIN2((CompLevel)TieredStopAtLevel, CompLevel_full_profile), thread); + } + } else { + // Current loop event level is not AOT + guarantee(nm != NULL, "Should have nmethod here"); + cur_level = comp_level(mh()); + next_level = call_event(mh(), cur_level, thread); + + if (max_osr_level == CompLevel_full_optimization) { + // The inlinee OSRed to full opt, we need to modify the enclosing method to avoid deopts + bool make_not_entrant = false; + if (nm->is_osr_method()) { + // This is an osr method, just make it not entrant and recompile later if needed + make_not_entrant = true; + } else { + if (next_level != CompLevel_full_optimization) { + // next_level is not full opt, so we need to recompile the + // enclosing method without the inlinee + cur_level = CompLevel_none; + make_not_entrant = true; + } + } + if (make_not_entrant) { + if (PrintTieredEvents) { + int osr_bci = nm->is_osr_method() ? nm->osr_entry_bci() : InvocationEntryBci; + print_event(MAKE_NOT_ENTRANT, mh(), mh(), osr_bci, level); + } + nm->make_not_entrant(); + } + } + // Fix up next_level if necessary to avoid deopts + if (next_level == CompLevel_limited_profile && max_osr_level == CompLevel_full_profile) { + next_level = CompLevel_full_profile; + } + if (cur_level != next_level) { + if (!maybe_switch_to_aot(mh, cur_level, next_level, thread) && !CompileBroker::compilation_is_in_queue(mh)) { + compile(mh, InvocationEntryBci, next_level, thread); + } + } + } + } else { + cur_level = comp_level(mh()); + next_level = call_event(mh(), cur_level, thread); + if (next_level != cur_level) { + if (!maybe_switch_to_aot(mh, cur_level, next_level, thread) && !CompileBroker::compilation_is_in_queue(mh)) { + compile(mh, InvocationEntryBci, next_level, thread); + } + } } } } diff --git a/src/hotspot/share/runtime/simpleThresholdPolicy.hpp b/src/hotspot/share/runtime/simpleThresholdPolicy.hpp --- a/src/hotspot/share/runtime/simpleThresholdPolicy.hpp +++ b/src/hotspot/share/runtime/simpleThresholdPolicy.hpp @@ -34,8 +34,136 @@ class CompileTask; class CompileQueue; +/* + * The system supports 5 execution levels: + * * level 0 - interpreter + * * level 1 - C1 with full optimization (no profiling) + * * level 2 - C1 with invocation and backedge counters + * * level 3 - C1 with full profiling (level 2 + MDO) + * * level 4 - C2 + * + * Levels 0, 2 and 3 periodically notify the runtime about the current value of the counters + * (invocation counters and backedge counters). The frequency of these notifications is + * different at each level. These notifications are used by the policy to decide what transition + * to make. + * + * Execution starts at level 0 (interpreter), then the policy can decide either to compile the + * method at level 3 or level 2. The decision is based on the following factors: + * 1. The length of the C2 queue determines the next level. The observation is that level 2 + * is generally faster than level 3 by about 30%, therefore we would want to minimize the time + * a method spends at level 3. We should only spend the time at level 3 that is necessary to get + * adequate profiling. So, if the C2 queue is long enough it is more beneficial to go first to + * level 2, because if we transitioned to level 3 we would be stuck there until our C2 compile + * request makes its way through the long queue. When the load on C2 recedes we are going to + * recompile at level 3 and start gathering profiling information. + * 2. The length of C1 queue is used to dynamically adjust the thresholds, so as to introduce + * additional filtering if the compiler is overloaded. The rationale is that by the time a + * method gets compiled it can become unused, so it doesn't make sense to put too much onto the + * queue. + * + * After profiling is completed at level 3 the transition is made to level 4. Again, the length + * of the C2 queue is used as a feedback to adjust the thresholds. + * + * After the first C1 compile some basic information is determined about the code like the number + * of the blocks and the number of the loops. Based on that it can be decided that a method + * is trivial and compiling it with C1 will yield the same code. In this case the method is + * compiled at level 1 instead of 4. + * + * We also support profiling at level 0. If C1 is slow enough to produce the level 3 version of + * the code and the C2 queue is sufficiently small we can decide to start profiling in the + * interpreter (and continue profiling in the compiled code once the level 3 version arrives). + * If the profiling at level 0 is fully completed before level 3 version is produced, a level 2 + * version is compiled instead in order to run faster waiting for a level 4 version. + * + * Compile queues are implemented as priority queues - for each method in the queue we compute + * the event rate (the number of invocation and backedge counter increments per unit of time). + * When getting an element off the queue we pick the one with the largest rate. Maintaining the + * rate also allows us to remove stale methods (the ones that got on the queue but stopped + * being used shortly after that). +*/ + +/* Command line options: + * - Tier?InvokeNotifyFreqLog and Tier?BackedgeNotifyFreqLog control the frequency of method + * invocation and backedge notifications. Basically every n-th invocation or backedge a mutator thread + * makes a call into the runtime. + * + * - Tier?InvocationThreshold, Tier?CompileThreshold, Tier?BackEdgeThreshold, Tier?MinInvocationThreshold control + * compilation thresholds. + * Level 2 thresholds are not used and are provided for option-compatibility and potential future use. + * Other thresholds work as follows: + * + * Transition from interpreter (level 0) to C1 with full profiling (level 3) happens when + * the following predicate is true (X is the level): + * + * i > TierXInvocationThreshold * s || (i > TierXMinInvocationThreshold * s && i + b > TierXCompileThreshold * s), + * + * where $i$ is the number of method invocations, $b$ number of backedges and $s$ is the scaling + * coefficient that will be discussed further. + * The intuition is to equalize the time that is spend profiling each method. + * The same predicate is used to control the transition from level 3 to level 4 (C2). It should be + * noted though that the thresholds are relative. Moreover i and b for the 0->3 transition come + * from Method* and for 3->4 transition they come from MDO (since profiled invocations are + * counted separately). Finally, if a method does not contain anything worth profiling, a transition + * from level 3 to level 4 occurs without considering thresholds (e.g., with fewer invocations than + * what is specified by Tier4InvocationThreshold). + * + * OSR transitions are controlled simply with b > TierXBackEdgeThreshold * s predicates. + * + * - Tier?LoadFeedback options are used to automatically scale the predicates described above depending + * on the compiler load. The scaling coefficients are computed as follows: + * + * s = queue_size_X / (TierXLoadFeedback * compiler_count_X) + 1, + * + * where queue_size_X is the current size of the compiler queue of level X, and compiler_count_X + * is the number of level X compiler threads. + * + * Basically these parameters describe how many methods should be in the compile queue + * per compiler thread before the scaling coefficient increases by one. + * + * This feedback provides the mechanism to automatically control the flow of compilation requests + * depending on the machine speed, mutator load and other external factors. + * + * - Tier3DelayOn and Tier3DelayOff parameters control another important feedback loop. + * Consider the following observation: a method compiled with full profiling (level 3) + * is about 30% slower than a method at level 2 (just invocation and backedge counters, no MDO). + * Normally, the following transitions will occur: 0->3->4. The problem arises when the C2 queue + * gets congested and the 3->4 transition is delayed. While the method is the C2 queue it continues + * executing at level 3 for much longer time than is required by the predicate and at suboptimal speed. + * The idea is to dynamically change the behavior of the system in such a way that if a substantial + * load on C2 is detected we would first do the 0->2 transition allowing a method to run faster. + * And then when the load decreases to allow 2->3 transitions. + * + * Tier3Delay* parameters control this switching mechanism. + * Tier3DelayOn is the number of methods in the C2 queue per compiler thread after which the policy + * no longer does 0->3 transitions but does 0->2 transitions instead. + * Tier3DelayOff switches the original behavior back when the number of methods in the C2 queue + * per compiler thread falls below the specified amount. + * The hysteresis is necessary to avoid jitter. + * + * - TieredCompileTaskTimeout is the amount of time an idle method can spend in the compile queue. + * Basically, since we use the event rate d(i + b)/dt as a value of priority when selecting a method to + * compile from the compile queue, we also can detect stale methods for which the rate has been + * 0 for some time in the same iteration. Stale methods can appear in the queue when an application + * abruptly changes its behavior. + * + * - TieredStopAtLevel, is used mostly for testing. It allows to bypass the policy logic and stick + * to a given level. For example it's useful to set TieredStopAtLevel = 1 in order to compile everything + * with pure c1. + * + * - Tier0ProfilingStartPercentage allows the interpreter to start profiling when the inequalities in the + * 0->3 predicate are already exceeded by the given percentage but the level 3 version of the + * method is still not ready. We can even go directly from level 0 to 4 if c1 doesn't produce a compiled + * version in time. This reduces the overall transition to level 4 and decreases the startup time. + * Note that this behavior is also guarded by the Tier3Delay mechanism: when the c2 queue is too long + * these is not reason to start profiling prematurely. + * + * - TieredRateUpdateMinTime and TieredRateUpdateMaxTime are parameters of the rate computation. + * Basically, the rate is not computed more frequently than TieredRateUpdateMinTime and is considered + * to be zero if no events occurred in TieredRateUpdateMaxTime. + */ class SimpleThresholdPolicy : public CompilationPolicy { + jlong _start_time; int _c1_count, _c2_count; // Check if the counter is big enough and set carry (effectively infinity). @@ -49,7 +177,7 @@ bool call_predicate(int i, int b, CompLevel cur_level, Method* method); bool loop_predicate(int i, int b, CompLevel cur_level, Method* method); // Common transition function. Given a predicate determines if a method should transition to another level. - CompLevel common(Predicate p, Method* method, CompLevel cur_level); + CompLevel common(Predicate p, Method* method, CompLevel cur_level, bool disable_feedback = false); // Transition functions. // call_event determines if a method should be compiled at a different // level with a regular invocation entry. @@ -58,6 +186,35 @@ // level. CompLevel loop_event(Method* method, CompLevel cur_level, JavaThread* thread); void print_counters(const char* prefix, const methodHandle& mh); + // Has a method been long around? + // We don't remove old methods from the compile queue even if they have + // very low activity (see select_task()). + inline bool is_old(Method* method); + // Was a given method inactive for a given number of milliseconds. + // If it is, we would remove it from the queue (see select_task()). + inline bool is_stale(jlong t, jlong timeout, Method* m); + // Compute the weight of the method for the compilation scheduling + inline double weight(Method* method); + // Apply heuristics and return true if x should be compiled before y + inline bool compare_methods(Method* x, Method* y); + // Compute event rate for a given method. The rate is the number of event (invocations + backedges) + // per millisecond. + inline void update_rate(jlong t, Method* m); + // Compute threshold scaling coefficient + inline double threshold_scale(CompLevel level, int feedback_k); + // If a method is old enough and is still in the interpreter we would want to + // start profiling without waiting for the compiled method to arrive. This function + // determines whether we should do that. + inline bool should_create_mdo(Method* method, CompLevel cur_level); + // Create MDO if necessary. + void create_mdo(const methodHandle& mh, JavaThread* thread); + // Is method profiled enough? + bool is_method_profiled(Method* method); + + double _increase_threshold_at_ratio; + + bool maybe_switch_to_aot(const methodHandle& mh, CompLevel cur_level, CompLevel next_level, JavaThread* thread); + protected: int c1_count() const { return _c1_count; } int c2_count() const { return _c2_count; } @@ -67,7 +224,7 @@ enum EventType { CALL, LOOP, COMPILE, REMOVE_FROM_QUEUE, UPDATE_IN_QUEUE, REPROFILE, MAKE_NOT_ENTRANT }; void print_event(EventType type, const methodHandle& mh, const methodHandle& imh, int bci, CompLevel level); // Print policy-specific information if necessary - virtual void print_specific(EventType type, const methodHandle& mh, const methodHandle& imh, int bci, CompLevel level) { } + virtual void print_specific(EventType type, const methodHandle& mh, const methodHandle& imh, int bci, CompLevel level); // Check if the method can be compiled, change level if necessary void compile(const methodHandle& mh, int bci, CompLevel level, JavaThread* thread); // Submit a given method for compilation @@ -87,8 +244,13 @@ CompLevel level, CompiledMethod* nm, JavaThread* thread); virtual void method_back_branch_event(const methodHandle& method, const methodHandle& inlinee, int bci, CompLevel level, CompiledMethod* nm, JavaThread* thread); + + void set_increase_threshold_at_ratio() { _increase_threshold_at_ratio = 100 / (100 - (double)IncreaseFirstTierCompileThresholdAt); } + void set_start_time(jlong t) { _start_time = t; } + jlong start_time() const { return _start_time; } + public: - SimpleThresholdPolicy() : _c1_count(0), _c2_count(0) { } + SimpleThresholdPolicy() : _start_time(0), _c1_count(0), _c2_count(0) { } virtual int compiler_count(CompLevel comp_level) { if (is_c1_compile(comp_level)) return c1_count(); if (is_c2_compile(comp_level)) return c2_count(); @@ -107,11 +269,7 @@ virtual bool is_mature(Method* method); // Initialize: set compiler thread count virtual void initialize(); - virtual bool should_not_inline(ciEnv* env, ciMethod* callee) { - return (env->comp_level() == CompLevel_limited_profile || - env->comp_level() == CompLevel_full_profile) && - callee->has_loops(); - } + virtual bool should_not_inline(ciEnv* env, ciMethod* callee); }; #endif // TIERED diff --git a/test/hotspot/jtreg/compiler/aot/RecompilationTest.java b/test/hotspot/jtreg/compiler/aot/RecompilationTest.java --- a/test/hotspot/jtreg/compiler/aot/RecompilationTest.java +++ b/test/hotspot/jtreg/compiler/aot/RecompilationTest.java @@ -37,26 +37,12 @@ * -extraopt -XX:+UnlockDiagnosticVMOptions -extraopt -XX:+WhiteBoxAPI -extraopt -Xbootclasspath/a:. * -extraopt -XX:-UseCompressedOops * -extraopt -XX:CompileCommand=dontinline,compiler.whitebox.SimpleTestCaseHelper::* - * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:+TieredCompilation -XX:CompilationPolicyChoice=2 - * -XX:-UseCounterDecay -XX:-UseCompressedOops - * -XX:-Inline - * -XX:AOTLibrary=./libRecompilationTest1.so -Xbootclasspath/a:. - * -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI - * -Dcompiler.aot.RecompilationTest.check_level=1 - * compiler.aot.RecompilationTest * @run driver compiler.aot.AotCompiler -libname libRecompilationTest2.so * -class compiler.whitebox.SimpleTestCaseHelper * -extraopt -Dgraal.TieredAOT=false * -extraopt -XX:+UnlockDiagnosticVMOptions -extraopt -XX:+WhiteBoxAPI -extraopt -Xbootclasspath/a:. * -extraopt -XX:-UseCompressedOops * -extraopt -XX:CompileCommand=dontinline,compiler.whitebox.SimpleTestCaseHelper::* - * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:+TieredCompilation -XX:CompilationPolicyChoice=2 - * -XX:-UseCounterDecay -XX:-UseCompressedOops - * -XX:-Inline - * -XX:AOTLibrary=./libRecompilationTest2.so -Xbootclasspath/a:. - * -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI - * -Dcompiler.aot.RecompilationTest.check_level=-1 - * compiler.aot.RecompilationTest * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:-TieredCompilation * -XX:-UseCounterDecay -XX:-UseCompressedOops * -XX:-Inline diff --git a/test/hotspot/jtreg/compiler/tiered/ConstantGettersTransitionsTest.java b/test/hotspot/jtreg/compiler/tiered/ConstantGettersTransitionsTest.java --- a/test/hotspot/jtreg/compiler/tiered/ConstantGettersTransitionsTest.java +++ b/test/hotspot/jtreg/compiler/tiered/ConstantGettersTransitionsTest.java @@ -34,7 +34,6 @@ * @run main/othervm/timeout=240 -Xmixed -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+WhiteBoxAPI -XX:+TieredCompilation -XX:-UseCounterDecay * -XX:CompileCommand=compileonly,compiler.tiered.ConstantGettersTransitionsTest$ConstantGettersTestCase$TrivialMethods::* - * compiler.tiered.TransitionsTestExecutor * compiler.tiered.ConstantGettersTransitionsTest */ @@ -200,4 +199,4 @@ } } } -} \ No newline at end of file +} diff --git a/test/hotspot/jtreg/compiler/tiered/LevelTransitionTest.java b/test/hotspot/jtreg/compiler/tiered/LevelTransitionTest.java --- a/test/hotspot/jtreg/compiler/tiered/LevelTransitionTest.java +++ b/test/hotspot/jtreg/compiler/tiered/LevelTransitionTest.java @@ -36,7 +36,6 @@ * -XX:+WhiteBoxAPI -XX:+TieredCompilation -XX:-UseCounterDecay * -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCaseHelper::* * -XX:CompileCommand=compileonly,compiler.tiered.LevelTransitionTest$ExtendedTestCase$CompileMethodHolder::* - * compiler.tiered.TransitionsTestExecutor * compiler.tiered.LevelTransitionTest */ diff --git a/test/hotspot/jtreg/compiler/tiered/TransitionsTestExecutor.java b/test/hotspot/jtreg/compiler/tiered/TransitionsTestExecutor.java deleted file mode 100644 --- a/test/hotspot/jtreg/compiler/tiered/TransitionsTestExecutor.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package compiler.tiered; - -import compiler.whitebox.CompilerWhiteBoxTest; -import jdk.test.lib.process.OutputAnalyzer; -import jdk.test.lib.process.ProcessTools; - -import java.lang.management.ManagementFactory; -import java.lang.management.RuntimeMXBean; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * Executes given test in a separate VM with enabled Tiered Compilation for - * CompilationPolicyChoice 2 and 3 - */ -public class TransitionsTestExecutor { - public static void main(String[] args) throws Throwable { - if (CompilerWhiteBoxTest.skipOnTieredCompilation(false)) { - return; - } - if (args.length != 1) { - throw new Error("TESTBUG: Test name should be specified"); - } - executeTestFor(2, args[0]); - executeTestFor(3, args[0]); - } - - private static void executeTestFor(int compilationPolicy, String testName) throws Throwable { - String policy = "-XX:CompilationPolicyChoice=" + compilationPolicy; - - // Get runtime arguments including VM options given to this executor - RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean(); - List vmArgs = runtime.getInputArguments(); - - // Construct execution command with compilation policy choice and test name - List args = new ArrayList<>(vmArgs); - Collections.addAll(args, policy, testName); - - OutputAnalyzer out = ProcessTools.executeTestJvm(args.toArray(new String[args.size()])); - out.shouldHaveExitValue(0); - } -}