--- old/src/share/vm/oops/methodKlass.cpp 2011-03-03 14:50:39.309821000 -0800 +++ new/src/share/vm/oops/methodKlass.cpp 2011-03-03 14:50:39.086277000 -0800 @@ -103,6 +103,12 @@ m->backedge_counter()->init(); m->clear_number_of_breakpoints(); +#ifdef TIERED + m->set_rate(0); + m->set_prev_event_count(0); + m->set_prev_time(0); +#endif + assert(m->is_parsable(), "must be parsable here."); assert(m->size() == size, "wrong size for object"); // We should not publish an uprasable object's reference --- old/src/share/vm/oops/methodOop.hpp 2011-03-03 14:50:40.035139000 -0800 +++ new/src/share/vm/oops/methodOop.hpp 2011-03-03 14:50:39.873361000 -0800 @@ -84,6 +84,11 @@ // | invocation_counter | // | backedge_counter | // |------------------------------------------------------| +// | prev_time (tiered only, 64 bit wide) | +// | | +// |------------------------------------------------------| +// | rate (tiered) | +// |------------------------------------------------------| // | code (pointer) | // | i2i (pointer) | // | adapter (pointer) | @@ -124,6 +129,11 @@ InvocationCounter _invocation_counter; // Incremented before each activation of the method - used to trigger frequency-based optimizations InvocationCounter _backedge_counter; // Incremented before each backedge taken - used to trigger frequencey-based optimizations +#ifdef TIERED + jlong _prev_time; // Previous time the rate was acquired + float _rate; // Events (invocation and backedge counter increments) per millisecond +#endif + #ifndef PRODUCT int _compiled_invocation_count; // Number of nmethod invocations so far (for perf. debugging) #endif @@ -304,6 +314,17 @@ InvocationCounter* invocation_counter() { return &_invocation_counter; } InvocationCounter* backedge_counter() { return &_backedge_counter; } +#ifdef TIERED + // We are reusing interpreter_invocation_count as a holder for the previous event count! + // We can do that since interpreter_invocation_count is not used in tiered. + int prev_event_count() const { return _interpreter_invocation_count; } + void set_prev_event_count(int count) { _interpreter_invocation_count = count; } + jlong prev_time() const { return _prev_time; } + void set_prev_time(jlong time) { _prev_time = time; } + float rate() const { return _rate; } + void set_rate(float rate) { _rate = rate; } +#endif + int invocation_count(); int backedge_count(); --- old/src/share/vm/runtime/arguments.cpp 2011-03-03 14:50:40.763754000 -0800 +++ new/src/share/vm/runtime/arguments.cpp 2011-03-03 14:50:40.594922000 -0800 @@ -1025,8 +1025,9 @@ } void Arguments::set_tiered_flags() { + // With tiered, set default policy to AdvancedThresholdPolicy, which is 3. if (FLAG_IS_DEFAULT(CompilationPolicyChoice)) { - FLAG_SET_DEFAULT(CompilationPolicyChoice, 2); + FLAG_SET_DEFAULT(CompilationPolicyChoice, 3); } if (CompilationPolicyChoice < 2) { vm_exit_during_initialization( --- old/src/share/vm/runtime/compilationPolicy.cpp 2011-03-03 14:50:41.586499000 -0800 +++ new/src/share/vm/runtime/compilationPolicy.cpp 2011-03-03 14:50:41.425503000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,6 +32,7 @@ #include "oops/methodOop.hpp" #include "oops/oop.inline.hpp" #include "prims/nativeLookup.hpp" +#include "runtime/advancedThresholdPolicy.hpp" #include "runtime/compilationPolicy.hpp" #include "runtime/frame.hpp" #include "runtime/handles.inline.hpp" @@ -72,8 +73,15 @@ Unimplemented(); #endif break; + case 3: +#ifdef TIERED + CompilationPolicy::set_policy(new AdvancedThresholdPolicy()); +#else + Unimplemented(); +#endif + break; default: - fatal("CompilationPolicyChoice must be in the range: [0-2]"); + fatal("CompilationPolicyChoice must be in the range: [0-3]"); } CompilationPolicy::policy()->initialize(); } --- /dev/null 2011-03-03 14:50:42.000000000 -0800 +++ new/src/share/vm/runtime/advancedThresholdPolicy.cpp 2011-03-03 14:50:42.135369000 -0800 @@ -0,0 +1,450 @@ +/* +* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. +*/ + +#include "precompiled.hpp" +#include "runtime/advancedThresholdPolicy.hpp" +#include "runtime/simpleThresholdPolicy.inline.hpp" + +#ifdef TIERED +// Print an event. +void AdvancedThresholdPolicy::print_specific(EventType type, methodHandle mh, methodHandle imh, + int bci, CompLevel level) { + tty->print(" rate: "); + if (mh->prev_time() == 0) tty->print("n/a"); + else tty->print("%f", mh->rate()); + + tty->print(" k: %.2lf,%.2lf", threshold_scale(CompLevel_full_profile, Tier3LoadFeedback), + threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback)); + +} + +void AdvancedThresholdPolicy::initialize() { + // Turn on ergonomic compiler count selection + if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) { + FLAG_SET_DEFAULT(CICompilerCountPerCPU, true); + } + int count = CICompilerCount; + if (CICompilerCountPerCPU) { + // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n + int log_cpu = log2_intptr(os::active_processor_count()); + int loglog_cpu = log2_intptr(MAX2(log_cpu, 1)); + count = MAX2(log_cpu * loglog_cpu, 1) * 3 / 2; + } + + set_c1_count(MAX2(count / 3, 1)); + set_c2_count(MAX2(count - count / 3, 1)); + + // Some inlining tuning +#ifdef X86 + if (FLAG_IS_DEFAULT(InlineSmallCode)) { + FLAG_SET_DEFAULT(InlineSmallCode, 2000); + } +#endif + +#ifdef SPARC + if (FLAG_IS_DEFAULT(InlineSmallCode)) { + FLAG_SET_DEFAULT(InlineSmallCode, 2500); + } +#endif + + + set_start_time(os::javaTimeMillis()); +} + +// update_rate() is called from select_task() while holding a compile queue lock. +void AdvancedThresholdPolicy::update_rate(jlong t, methodOop m) { + if (is_old(m)) { + // We don't remove old methods from the queue, + // so we can just zero the rate. + m->set_rate(0); + return; + } + + // We don't update the rate if we've just came out of a safepoint. + // delta_s is the time since last safepoint in milliseconds. + jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint(); + jlong delta_t = t - (m->prev_time() != 0 ? m->prev_time() : start_time()); // milliseconds since the last measurement + // How many events were there since the last time? + int event_count = m->invocation_count() + m->backedge_count(); + int delta_e = event_count - m->prev_event_count(); + + // We should be running for at least 1ms. + if (delta_s >= TieredRateUpdateMinTime) { + // And we must've taken the previous point at least 1ms before. + if (delta_t >= TieredRateUpdateMinTime && delta_e > 0) { + m->set_prev_time(t); + m->set_prev_event_count(event_count); + m->set_rate((float)delta_e / (float)delta_t); // Rate is events per millisecond + } else + if (delta_t > TieredRateUpdateMaxTime && delta_e == 0) { + // If nothing happened for 25ms, zero the rate. Don't modify prev values. + m->set_rate(0); + } + } +} + +// Check if this method has been stale from a given number of milliseconds. +// See select_task(). +bool AdvancedThresholdPolicy::is_stale(jlong t, jlong timeout, methodOop m) { + jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint(); + jlong delta_t = t - m->prev_time(); + if (delta_t > timeout && delta_s > timeout) { + int event_count = m->invocation_count() + m->backedge_count(); + int delta_e = event_count - m->prev_event_count(); + // Return true if there were no events. + return delta_e == 0; + } + return false; +} + +// We don't remove old methods from the compile queue even if they have +// very low activity. See select_task(). +bool AdvancedThresholdPolicy::is_old(methodOop method) { + return method->invocation_count() > 50000 || method->backedge_count() > 500000; +} + +double AdvancedThresholdPolicy::weight(methodOop method) { + return (method->rate() + 1) * ((method->invocation_count() + 1) * (method->backedge_count() + 1)); +} + +// Apply heuristics and return true if x should be compiled before y +bool AdvancedThresholdPolicy::compare_methods(methodOop x, methodOop y) { + if (x->highest_comp_level() > y->highest_comp_level()) { + // recompilation after deopt + return true; + } else + if (x->highest_comp_level() == y->highest_comp_level()) { + if (weight(x) > weight(y)) { + return true; + } + } + return false; +} + +// Is method profiled enough? +bool AdvancedThresholdPolicy::is_method_profiled(methodOop method) { + methodDataOop mdo = method->method_data(); + if (mdo != NULL) { + int i = mdo->invocation_count_delta(); + int b = mdo->backedge_count_delta(); + return call_predicate_helper(i, b, 1); + } + return false; +} + +// Called with the queue locked and with at least one element +CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) { + CompileTask *max_task = NULL; + methodOop max_method; + jlong t = os::javaTimeMillis(); + // Iterate through the queue and find a method with a maximum rate. + for (CompileTask* task = compile_queue->first(); task != NULL;) { + CompileTask* next_task = task->next(); + methodOop method = (methodOop)JNIHandles::resolve(task->method_handle()); + methodDataOop mdo = method->method_data(); + update_rate(t, method); + if (max_task == NULL) { + max_task = task; + max_method = method; + } else { + // If a method has been stale for some time, remove it from the queue. + if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) { + if (PrintTieredEvents) { + print_event(KILL, method, method, task->osr_bci(), (CompLevel)task->comp_level()); + } + CompileTaskWrapper ctw(task); // Frees the task + compile_queue->remove(task); + method->clear_queued_for_compilation(); + task = next_task; + continue; + } + + // Select a method with a higher rate + if (compare_methods(method, max_method)) { + max_task = task; + max_method = method; + } + } + task = next_task; + } + + if (max_task->comp_level() == CompLevel_full_profile && is_method_profiled(max_method)) { + max_task->set_comp_level(CompLevel_limited_profile); + if (PrintTieredEvents) { + print_event(UPDATE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level()); + } + } + + return max_task; +} + +double AdvancedThresholdPolicy::threshold_scale(CompLevel level, int feedback_k) { + double queue_size = CompileBroker::queue_size(level); + int comp_count = compiler_count(level); + double k = queue_size / (feedback_k * comp_count) + 1; + return k; +} + +// Call and loop predicates determine whether a transition to a higher +// compilation level should be performed (pointers to predicate functions +// are passed to common()). +// Tier?LoadFeedback is basically a coefficient that determines of +// how many methods per compiler thread can be in the queue before +// the threshold values double. +bool AdvancedThresholdPolicy::loop_predicate(int i, int b, CompLevel cur_level) { + switch(cur_level) { + case CompLevel_none: + case CompLevel_limited_profile: { + double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); + return loop_predicate_helper(i, b, k); + } + case CompLevel_full_profile: { + double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback); + return loop_predicate_helper(i, b, k); + } + default: + return true; + } +} + +bool AdvancedThresholdPolicy::call_predicate(int i, int b, CompLevel cur_level) { + switch(cur_level) { + case CompLevel_none: + case CompLevel_limited_profile: { + double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); + return call_predicate_helper(i, b, k); + } + case CompLevel_full_profile: { + double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback); + return call_predicate_helper(i, b, k); + } + default: + return true; + } +} + +// If a method is old enough and is still in the interpreter we would want to +// start profiling without waiting for the compiled method to arrive. +// We also take the load on compilers into the account. +bool AdvancedThresholdPolicy::should_create_mdo(methodOop method, CompLevel cur_level) { + if (cur_level == CompLevel_none && + CompileBroker::queue_size(CompLevel_full_optimization) <= + Tier3DelayOn * compiler_count(CompLevel_full_optimization)) { + int i = method->invocation_count(); + int b = method->backedge_count(); + double k = Tier0ProfilingStartPercentage / 100.0; + return call_predicate_helper(i, b, k) || loop_predicate_helper(i, b, k); + } + return false; +} + +// Create MDO if necessary. +void AdvancedThresholdPolicy::create_mdo(methodHandle mh, TRAPS) { + if (mh->is_native() || mh->is_abstract() || mh->is_accessor()) return; + if (mh->method_data() == NULL) { + methodOopDesc::build_interpreter_method_data(mh, THREAD); + if (HAS_PENDING_EXCEPTION) { + CLEAR_PENDING_EXCEPTION; + } + } +} + + +/* + * Method states: + * 0 - interpreter (CompLevel_none) + * 1 - pure C1 (CompLevel_simple) + * 2 - C1 with invocation and backedge counting (CompLevel_limited_profile) + * 3 - C1 with full profiling (CompLevel_full_profile) + * 4 - C2 (CompLevel_full_optimization) + * + * Common state transition patterns: + * a. 0 -> 3 -> 4. + * The most common path. But note that even in this straightforward case + * profiling can start at level 0 and finish at level 3. + * + * b. 0 -> 2 -> 3 -> 4. + * This case occures when the load on C2 is deemed too high. So, instead of transitioning + * into state 3 directly and over-profiling while a method is in the C2 queue we transition to + * level 2 and wait until the load on C2 decreases. This path is disabled for OSRs. + * + * c. 0 -> (3->2) -> 4. + * In this case we enqueue a method for compilation at level 3, but the C1 queue is long enough + * to enable the profiling to fully occur at level 0. In this case we change the compilation level + * of the method to 2, because it'll allow it to run much faster without full profiling while c2 + * is compiling. + * + * d. 0 -> 3 -> 1 or 0 -> 2 -> 1. + * After a method was once compiled with C1 it can be identified as trivial and be compiled to + * level 1. These transition can also occur if a method can't be compiled with C2 but can with C1. + * + * e. 0 -> 4. + * This can happen if a method fails C1 compilation (it will still be profiled in the interpreter) + * or because of a deopt that didn't require reprofiling (compilation won't happen in this case because + * the compiled version already exists). + * + * Note that since state 0 can be reached from any other state via deoptimization different loops + * are possible. + * + */ + +// Common transition function. Given a predicate determines if a method should transition to another level. +CompLevel AdvancedThresholdPolicy::common(Predicate p, methodOop method, CompLevel cur_level) { + if (is_trivial(method)) return CompLevel_simple; + + CompLevel next_level = cur_level; + int i = method->invocation_count(); + int b = method->backedge_count(); + + switch(cur_level) { + case CompLevel_none: + // If we were at full profile level, would we switch to full opt? + if (common(p, method, CompLevel_full_profile) == CompLevel_full_optimization) { + next_level = CompLevel_full_optimization; + } else if ((this->*p)(i, b, cur_level)) { + // C1-generated fully profiled code is about 30% slower than the limited profile + // code that has only invocation and backedge counters. The observation is that + // if C2 queue is large enough we can spend too much time in the fully profiled code + // while waiting for C2 to pick the method from the queue. To alleviate this problem + // we introduce a feedback on the C2 queue size. If the C2 queue is sufficiently long + // we choose to compile a limited profiled version and then recompile with full profiling + // when the load on C2 goes down. + if (CompileBroker::queue_size(CompLevel_full_optimization) > + Tier3DelayOn * compiler_count(CompLevel_full_optimization)) { + next_level = CompLevel_limited_profile; + } else { + next_level = CompLevel_full_profile; + } + } + break; + case CompLevel_limited_profile: + if (is_method_profiled(method)) { + // Special case: we got here because this method was fully profiled in the interpreter. + next_level = CompLevel_full_optimization; + } else { + methodDataOop mdo = method->method_data(); + if (mdo != NULL) { + if (mdo->would_profile()) { + if (CompileBroker::queue_size(CompLevel_full_optimization) <= + Tier3DelayOff * compiler_count(CompLevel_full_optimization) && + (this->*p)(i, b, cur_level)) { + next_level = CompLevel_full_profile; + } + } else { + next_level = CompLevel_full_optimization; + } + } + } + break; + case CompLevel_full_profile: + { + methodDataOop mdo = method->method_data(); + if (mdo != NULL) { + if (mdo->would_profile()) { + int mdo_i = mdo->invocation_count_delta(); + int mdo_b = mdo->backedge_count_delta(); + if ((this->*p)(mdo_i, mdo_b, cur_level)) { + next_level = CompLevel_full_optimization; + } + } else { + next_level = CompLevel_full_optimization; + } + } + } + break; + } + return next_level; +} + +// Determine if a method should be compiled with a normal entry point at a different level. +CompLevel AdvancedThresholdPolicy::call_event(methodOop method, CompLevel cur_level) { + CompLevel osr_level = (CompLevel) method->highest_osr_comp_level(); + CompLevel next_level = common(&AdvancedThresholdPolicy::call_predicate, method, cur_level); + + // If OSR method level is greater than the regular method level, the levels should be + // equalized by raising the regular method level in order to avoid OSRs during each + // invocation of the method. + if (osr_level == CompLevel_full_optimization && cur_level == CompLevel_full_profile) { + methodDataOop mdo = method->method_data(); + guarantee(mdo != NULL, "MDO should not be NULL"); + if (mdo->invocation_count() >= 1) { + next_level = CompLevel_full_optimization; + } + } else { + next_level = MAX2(osr_level, next_level); + } + + return next_level; +} + +// Determine if we should do an OSR compilation of a given method. +CompLevel AdvancedThresholdPolicy::loop_event(methodOop method, CompLevel cur_level) { + if (cur_level == CompLevel_none) { + // If there is a live OSR method that means that we deopted to the interpreter + // for the transition. + CompLevel osr_level = (CompLevel)method->highest_osr_comp_level(); + if (osr_level > CompLevel_none) { + return osr_level; + } + } + return common(&AdvancedThresholdPolicy::loop_predicate, method, cur_level); +} + +// Update the rate and submit compile +void AdvancedThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS) { + int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count(); + update_rate(os::javaTimeMillis(), mh()); + CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", THREAD); +} + + +// Handle the invocation event. +void AdvancedThresholdPolicy::method_invocation_event(methodHandle mh, methodHandle imh, + CompLevel level, TRAPS) { + if (should_create_mdo(mh(), level)) { + create_mdo(mh, THREAD); + } + if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, InvocationEntryBci)) { + CompLevel next_level = call_event(mh(), level); + if (next_level != level) { + compile(mh, InvocationEntryBci, next_level, THREAD); + } + } +} + +// Handle the back branch event. Notice that we can compile the method +// with a regular entry from here. +void AdvancedThresholdPolicy::method_back_branch_event(methodHandle mh, methodHandle imh, + int bci, CompLevel level, TRAPS) { + if (should_create_mdo(mh(), level)) { + create_mdo(mh, THREAD); + } + + // If the method is already compiling, quickly bail out. + if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, bci)) { + // Use loop event as an opportinity to also check there's been + // enough calls. + CompLevel cur_level = comp_level(mh()); + CompLevel next_level = call_event(mh(), cur_level); + CompLevel next_osr_level = loop_event(mh(), level); + if (next_osr_level == CompLevel_limited_profile) { + next_osr_level = CompLevel_full_profile; // OSRs are supposed to be for very hot methods. + } + next_level = MAX2(next_level, + next_osr_level < CompLevel_full_optimization ? next_osr_level : cur_level); + bool is_compiling = false; + if (next_level != cur_level) { + compile(mh, InvocationEntryBci, next_level, THREAD); + is_compiling = true; + } + + // Do the OSR version + if (!is_compiling && next_osr_level != level) { + compile(mh, bci, next_osr_level, THREAD); + } + } +} + +#endif // TIERED --- /dev/null 2011-03-03 14:50:42.000000000 -0800 +++ new/src/share/vm/runtime/advancedThresholdPolicy.hpp 2011-03-03 14:50:42.726666000 -0800 @@ -0,0 +1,207 @@ +/* +* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. +*/ + +#ifndef SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP +#define SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP + +#include "runtime/simpleThresholdPolicy.hpp" + +#ifdef TIERED +class CompileTask; +class CompileQueue; + +/* + * The system supports 5 execution levels: + * * level 0 - interpreter + * * level 1 - C1 with full optimization (no profiling) + * * level 2 - C1 with invocation and backedge counters + * * level 3 - C1 with full profiling (level 2 + MDO) + * * level 4 - C2 + * + * Levels 0, 2 and 3 periodically notify the runtime about the current value of the counters + * (invocation counters and backedge counters). The frequency of these notifications is + * different at each level. These notifications are used by the policy to decide what transition + * to make. + * + * Execution starts at level 0 (interpreter), then the policy can decide either to compile the + * method at level 3 or level 2. The decision is based on the following factors: + * 1. The length of the C2 queue determines the next level. The observation is that level 2 + * is generally faster than level 3 by about 30%, therefore we would want to minimize the time + * a method spends at level 3. We should only spend the time at level 3 that is necessary to get + * adequate profiling. So, if the C2 queue is long enough it is more beneficial to go first to + * level 2, because if we transitioned to level 3 we would be stuck there until our C2 compile + * request makes its way through the long queue. When the load on C2 recedes we are going to + * recompile at level 3 and start gathering profiling information. + * 2. The length of C1 queue is used to dynamically adjust the thresholds, so as to introduce + * additional filtering if the compiler is overloaded. The rationale is that by the time a + * method gets compiled it can become unused, so it doesn't make sense to put too much onto the + * queue. + * + * After profiling is completed at level 3 the transition is made to level 4. Again, the length + * of the C2 queue is used as a feedback to adjust the thresholds. + * + * After the first C1 compile some basic information is determined about the code like the number + * of the blocks and the number of the loops. Based on that it can be decided that a method + * is trivial and compiling it with C1 will yield the same code. In this case the method is + * compiled at level 1 instead of 4. + * + * We also support profiling at level 0. If C1 is slow enough to produce the level 3 version of + * the code and the C2 queue is sufficiently small we can decide to start profiling in the + * interpreter (and continue profiling in the compiled code once the level 3 version arrives). + * If the profiling at level 0 is fully completed before level 3 version is produced, a level 2 + * version is compiled instead in order to run faster waiting for a level 4 version. + * + * Compile queues are implemented as priority queues - for each method in the queue we compute + * the event rate (the number of invocation and backedge counter increments per unit of time). + * When getting an element off the queue we pick the one with the largest rate. Maintaining the + * rate also allows us to remove stale methods (the ones that got on the queue but stopped + * being used shortly after that). +*/ + +/* Command line options: + * - Tier?InvokeNotifyFreqLog and Tier?BackedgeNotifyFreqLog control the frequency of method + * invocation and backedge notifications. Basically every n-th invocation or backedge a mutator thread + * makes a call into the runtime. + * + * - Tier?CompileThreshold, Tier?BackEdgeThreshold, Tier?MinInvocationThreshold control + * compilation thresholds. + * Level 2 thresholds are not used and are provided for option-compatibility and potential future use. + * Other thresholds work as follows: + * + * Transition from interpreter (level 0) to C1 with full profiling (level 3) happens when + * the following predicate is true (X is the level): + * + * i > TierXInvocationThreshold * s || (i > TierXMinInvocationThreshold * s && i + b > TierXCompileThreshold * s), + * + * where $i$ is the number of method invocations, $b$ number of backedges and $s$ is the scaling + * coefficient that will be discussed further. + * The intuition is to equalize the time that is spend profiling each method. + * The same predicate is used to control the transition from level 3 to level 4 (C2). It should be + * noted though that the thresholds are relative. Moreover i and b for the 0->3 transition come + * from methodOop and for 3->4 transition they come from MDO (since profiled invocations are + * counted separately). + * + * OSR transitions are controlled simply with b > TierXBackEdgeThreshold * s predicates. + * + * - Tier?LoadFeedback options are used to automatically scale the predicates described above depending + * on the compiler load. The scaling coefficients are computed as follows: + * + * s = queue_size_X / (TierXLoadFeedback * compiler_count_X) + 1, + * + * where queue_size_X is the current size of the compiler queue of level X, and compiler_count_X + * is the number of level X compiler threads. + * + * Basically these parameters describe how many methods should be in the compile queue + * per compiler thread before the scaling coefficient increases by one. + * + * This feedback provides the mechanism to automatically control the flow of compilation requests + * depending on the machine speed, mutator load and other external factors. + * + * - Tier3DelayOn and Tier3DelayOff parameters control another important feedback loop. + * Consider the following observation: a method compiled with full profiling (level 3) + * is about 30% slower than a method at level 2 (just invocation and backedge counters, no MDO). + * Normally, the following transitions will occur: 0->3->4. The problem arises when the C2 queue + * gets congested and the 3->4 transition is delayed. While the method is the C2 queue it continues + * executing at level 3 for much longer time than is required by the predicate and at suboptimal speed. + * The idea is to dynamically change the behavior of the system in such a way that if a substantial + * load on C2 is detected we would first do the 0->2 transition allowing a method to run faster. + * And then when the load decreases to allow 2->3 transitions. + * + * Tier3Delay* parameters control this switching mechanism. + * Tier3DelayOn is the number of methods in the C2 queue per compiler thread after which the policy + * no longer does 0->3 transitions but does 0->2 transitions instead. + * Tier3DelayOff switches the original behavior back when the number of methods in the C2 queue + * per compiler thread falls below the specified amount. + * The hysteresis is necessary to avoid jitter. + * + * - TieredCompileTaskTimeout is the amount of time an idle method can spend in the compile queue. + * Basically, since we use the event rate d(i + b)/dt as a value of priority when selecting a method to + * compile from the compile queue, we also can detect stale methods for which the rate has been + * 0 for some time in the same iteration. Stale methods can appear in the queue when an application + * abruptly changes its behavior. + * + * - TieredStopAtLevel, is used mostly for testing. It allows to bypass the policy logic and stick + * to a given level. For example it's useful to set TieredStopAtLevel = 1 in order to compile everything + * with pure c1. + * + * - Tier0ProfilingStartPercentage allows the interpreter to start profiling when the inequalities in the + * 0->3 predicate are already exceeded by the given percentage but the level 3 version of the + * method is still not ready. We can even go directly from level 0 to 4 if c1 doesn't produce a compiled + * version in time. This reduces the overall transition to level 4 and decreases the startup time. + * Note that this behavior is also guarded by the Tier3Delay mechanism: when the c2 queue is too long + * these is not reason to start profiling prematurely. + * + * - TieredRateUpdateMinTime and TieredRateUpdateMaxTime are parameters of the rate computation. + * Basically, the rate is not computed more frequently than TieredRateUpdateMinTime and is considered + * to be zero if no events occurred in TieredRateUpdateMaxTime. + */ + + +class AdvancedThresholdPolicy : public SimpleThresholdPolicy { + jlong _start_time; + + // Call and loop predicates determine whether a transition to a higher compilation + // level should be performed (pointers to predicate functions are passed to common(). + // Predicates also take compiler load into account. + typedef bool (AdvancedThresholdPolicy::*Predicate)(int i, int b, CompLevel cur_level); + bool call_predicate(int i, int b, CompLevel cur_level); + bool loop_predicate(int i, int b, CompLevel cur_level); + // Common transition function. Given a predicate determines if a method should transition to another level. + CompLevel common(Predicate p, methodOop method, CompLevel cur_level); + // Transition functions. + // call_event determines if a method should be compiled at a different + // level with a regular invocation entry. + CompLevel call_event(methodOop method, CompLevel cur_level); + // loop_event checks if a method should be OSR compiled at a different + // level. + CompLevel loop_event(methodOop method, CompLevel cur_level); + // Has a method been long around? + // We don't remove old methods from the compile queue even if they have + // very low activity (see select_task()). + inline bool is_old(methodOop method); + // Was a given method inactive for a given number of milliseconds. + // If it is, we would remove it from the queue (see select_task()). + inline bool is_stale(jlong t, jlong timeout, methodOop m); + // Compute the weight of the method for the compilation scheduling + inline double weight(methodOop method); + // Apply heuristics and return true if x should be compiled before y + inline bool compare_methods(methodOop x, methodOop y); + // Compute event rate for a given method. The rate is the number of event (invocations + backedges) + // per millisecond. + inline void update_rate(jlong t, methodOop m); + // Compute threshold scaling coefficient + inline double threshold_scale(CompLevel level, int feedback_k); + // If a method is old enough and is still in the interpreter we would want to + // start profiling without waiting for the compiled method to arrive. This function + // determines whether we should do that. + inline bool should_create_mdo(methodOop method, CompLevel cur_level); + // Create MDO if necessary. + void create_mdo(methodHandle mh, TRAPS); + // Is method profiled enough? + bool is_method_profiled(methodOop method); + +protected: + void print_specific(EventType type, methodHandle mh, methodHandle imh, int bci, CompLevel level); + + void set_start_time(jlong t) { _start_time = t; } + jlong start_time() const { return _start_time; } + + // Submit a given method for compilation (and update the rate). + virtual void submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS); + // event() from SimpleThresholdPolicy would call these. + virtual void method_invocation_event(methodHandle method, methodHandle inlinee, + CompLevel level, TRAPS); + virtual void method_back_branch_event(methodHandle method, methodHandle inlinee, + int bci, CompLevel level, TRAPS); +public: + AdvancedThresholdPolicy() : _start_time(0) { } + // Select task is called by CompileBroker. We should return a task or NULL. + virtual CompileTask* select_task(CompileQueue* compile_queue); + virtual void initialize(); +}; + +#endif // TIERED + +#endif // SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP