< prev index next >
src/share/vm/opto/lcm.cpp
Print this page
*** 29,38 ****
--- 29,39 ----
#include "opto/c2compiler.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/machnode.hpp"
#include "opto/runtime.hpp"
+ #include "opto/chaitin.hpp"
#include "runtime/sharedRuntime.hpp"
// Optimization - Graph Style
// Check whether val is not-null-decoded compressed oop,
*** 441,451 ****
// These are chosen immediately. Some instructions are required to immediately
// precede the last instruction in the block, and these are taken last. Of the
// remaining cases (most), choose the instruction with the greatest latency
// (that is, the most number of pseudo-cycles required to the end of the
// routine). If there is a tie, choose the instruction with the most inputs.
! Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) {
// If only a single entry on the stack, use it
uint cnt = worklist.size();
if (cnt == 1) {
Node *n = worklist[0];
--- 442,458 ----
// These are chosen immediately. Some instructions are required to immediately
// precede the last instruction in the block, and these are taken last. Of the
// remaining cases (most), choose the instruction with the greatest latency
// (that is, the most number of pseudo-cycles required to the end of the
// routine). If there is a tie, choose the instruction with the most inputs.
! Node* PhaseCFG::select(
! Block* block,
! Node_List &worklist,
! GrowableArray<int> &ready_cnt,
! VectorSet &next_call,
! uint sched_slot,
! intptr_t* recalc_pressure_nodes) {
// If only a single entry on the stack, use it
uint cnt = worklist.size();
if (cnt == 1) {
Node *n = worklist[0];
*** 456,465 ****
--- 463,473 ----
uint choice = 0; // Bigger is most important
uint latency = 0; // Bigger is scheduled first
uint score = 0; // Bigger is better
int idx = -1; // Index in worklist
int cand_cnt = 0; // Candidate count
+ bool block_size_threshold_ok = (block->number_of_nodes() > 10) ? true : false;
for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist
// Order in worklist is used to break ties.
// See caller for how this is used to delay scheduling
// of induction variable increments to after the other
*** 537,546 ****
--- 545,592 ----
}
uint n_latency = get_latency_for_node(n);
uint n_score = n->req(); // Many inputs get high score to break ties
+ if (OptoRegScheduling && block_size_threshold_ok) {
+ if (recalc_pressure_nodes[n->_idx] == 0x7fff7fff) {
+ _regalloc->_scratch_int_pressure.init(_regalloc->_sched_int_pressure.high_pressure_limit());
+ _regalloc->_scratch_float_pressure.init(_regalloc->_sched_float_pressure.high_pressure_limit());
+ // simulate the notion that we just picked this node to schedule
+ n->add_flag(Node::Flag_is_scheduled);
+ // now caculate its effect upon the graph if we did
+ adjust_register_pressure(n, block, recalc_pressure_nodes, false);
+ // return its state for finalize in case somebody else wins
+ n->remove_flag(Node::Flag_is_scheduled);
+ // now save the two final pressure components of register pressure, limiting pressure calcs to short size
+ short int_pressure = (short)_regalloc->_scratch_int_pressure.current_pressure();
+ short float_pressure = (short)_regalloc->_scratch_float_pressure.current_pressure();
+ recalc_pressure_nodes[n->_idx] = int_pressure;
+ recalc_pressure_nodes[n->_idx] |= (float_pressure << 16);
+ }
+
+ if (_scheduling_for_pressure) {
+ latency = n_latency;
+ if (n_choice != 3) {
+ // Now evaluate each register pressure component based on threshold in the score.
+ // In general the defining register type will dominate the score, ergo we will not see register pressure grow on both banks
+ // on a single instruction, but we might see it shrink on both banks.
+ if (_regalloc->_sched_int_pressure.current_pressure() > _regalloc->_sched_int_pressure.high_pressure_limit()) {
+ short int_pressure = (short)recalc_pressure_nodes[n->_idx];
+ n_score = (int_pressure < 0) ? ((score + n_score) - int_pressure) : (int_pressure > 0) ? 1 : n_score;
+ }
+ if (_regalloc->_sched_float_pressure.current_pressure() > _regalloc->_sched_float_pressure.high_pressure_limit()) {
+ short float_pressure = (short)(recalc_pressure_nodes[n->_idx] >> 16);
+ n_score = (float_pressure < 0) ? ((score + n_score) - float_pressure) : (float_pressure > 0) ? 1 : n_score;
+ }
+ } else {
+ // make sure we choose these candidates
+ score = 0;
+ }
+ }
+ }
+
// Keep best latency found
cand_cnt++;
if (choice < n_choice ||
(choice == n_choice &&
((StressLCM && Compile::randomized_select(cand_cnt)) ||
*** 560,569 ****
--- 606,709 ----
worklist.map((uint)idx, worklist.pop()); // Compress worklist
return n;
}
+ //-------------------------adjust_register_pressure----------------------------
+ void PhaseCFG::adjust_register_pressure(Node* n, Block* block, intptr_t* recalc_pressure_nodes, bool finalize_mode) {
+ PhaseLive* liveinfo = _regalloc->get_live();
+ IndexSet* liveout = liveinfo->live(block);
+ // first adjust the register pressure for the sources
+ for (uint i = 1; i < n->req(); i++) {
+ bool lrg_ends = false;
+ Node *src_n = n->in(i);
+ if (src_n == NULL) continue;
+ if (!src_n->is_Mach()) continue;
+ uint src = _regalloc->_lrg_map.find(src_n);
+ if (src == 0) continue;
+ LRG& lrg_src = _regalloc->lrgs(src);
+ // detect if the live range ends or not
+ if (liveout->member(src) == false) {
+ lrg_ends = true;
+ for (DUIterator_Fast jmax, j = src_n->fast_outs(jmax); j < jmax; j++) {
+ Node* m = src_n->fast_out(j); // Get user
+ if (m == n) continue;
+ if (!m->is_Mach()) continue;
+ MachNode *mach = m->as_Mach();
+ bool src_matches = false;
+ int iop = mach->ideal_Opcode();
+
+ switch (iop) {
+ case Op_StoreB:
+ case Op_StoreC:
+ case Op_StoreCM:
+ case Op_StoreD:
+ case Op_StoreF:
+ case Op_StoreI:
+ case Op_StoreL:
+ case Op_StoreP:
+ case Op_StoreN:
+ case Op_StoreVector:
+ case Op_StoreNKlass:
+ for (uint k = 1; k < m->req(); k++) {
+ Node *in = m->in(k);
+ if (in == src_n) {
+ src_matches = true;
+ break;
+ }
+ }
+ break;
+
+ default:
+ src_matches = true;
+ break;
+ }
+
+ // If we have a store as our use, ignore the non source operands
+ if (src_matches == false) continue;
+
+ // Mark every unscheduled use which is not n with a recalculation
+ if ((get_block_for_node(m) == block) && (!m->is_scheduled())) {
+ if (finalize_mode && !m->is_Phi()) {
+ recalc_pressure_nodes[m->_idx] = 0x7fff7fff;
+ }
+ lrg_ends = false;
+ }
+ }
+ }
+ // if none, this live range ends and we can adjust register pressure
+ if (lrg_ends) {
+ if (finalize_mode) {
+ _regalloc->lower_pressure(block, 0, lrg_src, NULL, _regalloc->_sched_int_pressure, _regalloc->_sched_float_pressure);
+ } else {
+ _regalloc->lower_pressure(block, 0, lrg_src, NULL, _regalloc->_scratch_int_pressure, _regalloc->_scratch_float_pressure);
+ }
+ }
+ }
+
+ // now add the register pressure from the dest and evaluate which heuristic we should use:
+ // 1.) The default, latency scheduling
+ // 2.) Register pressure scheduling based on the high pressure limit threshold for int or float register stacks
+ uint dst = _regalloc->_lrg_map.find(n);
+ if (dst != 0) {
+ LRG& lrg_dst = _regalloc->lrgs(dst);
+ if (finalize_mode) {
+ _regalloc->raise_pressure(block, lrg_dst, _regalloc->_sched_int_pressure, _regalloc->_sched_float_pressure);
+ // check to see if we fall over the register pressure cliff here
+ if (_regalloc->_sched_int_pressure.current_pressure() > _regalloc->_sched_int_pressure.high_pressure_limit()) {
+ _scheduling_for_pressure = true;
+ } else if (_regalloc->_sched_float_pressure.current_pressure() > _regalloc->_sched_float_pressure.high_pressure_limit()) {
+ _scheduling_for_pressure = true;
+ } else {
+ // restore latency scheduling mode
+ _scheduling_for_pressure = false;
+ }
+ } else {
+ _regalloc->raise_pressure(block, lrg_dst, _regalloc->_scratch_int_pressure, _regalloc->_scratch_float_pressure);
+ }
+ }
+ }
//------------------------------set_next_call----------------------------------
void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) {
if( next_call.test_set(n->_idx) ) return;
for( uint i=0; i<n->len(); i++ ) {
*** 642,652 ****
Node* m = n->fast_out(j); // Get user
if(get_block_for_node(m) != block) {
continue;
}
if( m->is_Phi() ) continue;
! int m_cnt = ready_cnt.at(m->_idx)-1;
ready_cnt.at_put(m->_idx, m_cnt);
if( m_cnt == 0 )
worklist.push(m);
}
--- 782,792 ----
Node* m = n->fast_out(j); // Get user
if(get_block_for_node(m) != block) {
continue;
}
if( m->is_Phi() ) continue;
! int m_cnt = ready_cnt.at(m->_idx) - 1;
ready_cnt.at_put(m->_idx, m_cnt);
if( m_cnt == 0 )
worklist.push(m);
}
*** 709,719 ****
}
//------------------------------schedule_local---------------------------------
// Topological sort within a block. Someday become a real scheduler.
! bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) {
// Already "sorted" are the block start Node (as the first entry), and
// the block-ending Node and any trailing control projections. We leave
// these alone. PhiNodes and ParmNodes are made to follow the block start
// Node. Everything else gets topo-sorted.
--- 849,859 ----
}
//------------------------------schedule_local---------------------------------
// Topological sort within a block. Someday become a real scheduler.
! bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call, intptr_t *recalc_pressure_nodes) {
// Already "sorted" are the block start Node (as the first entry), and
// the block-ending Node and any trailing control projections. We leave
// these alone. PhiNodes and ParmNodes are made to follow the block start
// Node. Everything else gets topo-sorted.
*** 731,751 ****
// RootNode is already sorted
if (block->number_of_nodes() == 1) {
return true;
}
// Move PhiNodes and ParmNodes from 1 to cnt up to the start
uint node_cnt = block->end_idx();
uint phi_cnt = 1;
- uint i;
for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
Node *n = block->get_node(i);
if( n->is_Phi() || // Found a PhiNode or ParmNode
(n->is_Proj() && n->in(0) == block->head()) ) {
// Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
block->map_node(block->get_node(phi_cnt), i);
block->map_node(n, phi_cnt++); // swap Phi/Parm up front
} else { // All others
// Count block-local inputs to 'n'
uint cnt = n->len(); // Input count
uint local = 0;
for( uint j=0; j<cnt; j++ ) {
--- 871,909 ----
// RootNode is already sorted
if (block->number_of_nodes() == 1) {
return true;
}
+ bool block_size_threshold_ok = (block->number_of_nodes() > 10) ? true : false;
+
+ // We track the uses of local definitions as input dependences so that
+ // we know when a given instruction is avialable to be scheduled.
+ uint i;
+ if (OptoRegScheduling && block_size_threshold_ok) {
+ for (i = 1; i < block->number_of_nodes(); i++) { // setup nodes for pressure calc
+ Node *n = block->get_node(i);
+ n->remove_flag(Node::Flag_is_scheduled);
+ if (!n->is_Phi()) {
+ recalc_pressure_nodes[n->_idx] = 0x7fff7fff;
+ }
+ }
+ }
+
// Move PhiNodes and ParmNodes from 1 to cnt up to the start
uint node_cnt = block->end_idx();
uint phi_cnt = 1;
for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
Node *n = block->get_node(i);
if( n->is_Phi() || // Found a PhiNode or ParmNode
(n->is_Proj() && n->in(0) == block->head()) ) {
// Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
block->map_node(block->get_node(phi_cnt), i);
block->map_node(n, phi_cnt++); // swap Phi/Parm up front
+ if (OptoRegScheduling && block_size_threshold_ok) {
+ // mark n as scheduled
+ n->add_flag(Node::Flag_is_scheduled);
+ }
} else { // All others
// Count block-local inputs to 'n'
uint cnt = n->len(); // Input count
uint local = 0;
for( uint j=0; j<cnt; j++ ) {
*** 789,804 ****
for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count
ready_cnt.at_put(block->get_node(i2)->_idx, 0);
// All the prescheduled guys do not hold back internal nodes
uint i3;
! for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled
Node *n = block->get_node(i3); // Get pre-scheduled
for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
Node* m = n->fast_out(j);
if (get_block_for_node(m) == block) { // Local-block user
int m_cnt = ready_cnt.at(m->_idx)-1;
ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count
}
}
}
--- 947,968 ----
for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count
ready_cnt.at_put(block->get_node(i2)->_idx, 0);
// All the prescheduled guys do not hold back internal nodes
uint i3;
! for (i3 = 0; i3 < phi_cnt; i3++) { // For all pre-scheduled
Node *n = block->get_node(i3); // Get pre-scheduled
for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
Node* m = n->fast_out(j);
if (get_block_for_node(m) == block) { // Local-block user
int m_cnt = ready_cnt.at(m->_idx)-1;
+ if (OptoRegScheduling && block_size_threshold_ok) {
+ // mark m as scheduled
+ if (m_cnt < 0) {
+ m->add_flag(Node::Flag_is_scheduled);
+ }
+ }
ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count
}
}
}
*** 825,834 ****
--- 989,1015 ----
while (delay.size()) {
Node* d = delay.pop();
worklist.push(d);
}
+ if (OptoRegScheduling && block_size_threshold_ok) {
+ // To stage register pressure calculations we need to examine the live set variables
+ // breaking them up by register class to compartmentalize the calculations.
+ uint float_pressure = FLOATPRESSURE;
+ #ifdef _LP64
+ if (UseAVX > 2) {
+ float_pressure *= 2;
+ }
+ #endif
+ _regalloc->_sched_int_pressure.init(INTPRESSURE);
+ _regalloc->_sched_float_pressure.init(float_pressure);
+ _regalloc->_scratch_int_pressure.init(INTPRESSURE);
+ _regalloc->_scratch_float_pressure.init(float_pressure);
+
+ _regalloc->compute_entry_block_pressure(block);
+ }
+
// Warm up the 'next_call' heuristic bits
needed_for_next_call(block, block->head(), next_call);
#ifndef PRODUCT
if (trace_opto_pipelining()) {
*** 856,868 ****
tty->cr();
}
#endif
// Select and pop a ready guy from worklist
! Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt);
block->map_node(n, phi_cnt++); // Schedule him next
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("# select %d: %s", n->_idx, n->Name());
tty->print(", latency:%d", get_latency_for_node(n));
n->dump();
--- 1037,1058 ----
tty->cr();
}
#endif
// Select and pop a ready guy from worklist
! Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt, recalc_pressure_nodes);
block->map_node(n, phi_cnt++); // Schedule him next
+ if (OptoRegScheduling && block_size_threshold_ok) {
+ n->add_flag(Node::Flag_is_scheduled);
+
+ // Now adjust the resister pressure with the node we selected
+ if (!n->is_Phi()) {
+ adjust_register_pressure(n, block, recalc_pressure_nodes, true);
+ }
+ }
+
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("# select %d: %s", n->_idx, n->Name());
tty->print(", latency:%d", get_latency_for_node(n));
n->dump();
*** 904,914 ****
if( m->is_Phi() ) continue;
if (m->_idx >= max_idx) { // new node, skip it
assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
continue;
}
! int m_cnt = ready_cnt.at(m->_idx)-1;
ready_cnt.at_put(m->_idx, m_cnt);
if( m_cnt == 0 )
worklist.push(m);
}
}
--- 1094,1104 ----
if( m->is_Phi() ) continue;
if (m->_idx >= max_idx) { // new node, skip it
assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
continue;
}
! int m_cnt = ready_cnt.at(m->_idx) - 1;
ready_cnt.at_put(m->_idx, m_cnt);
if( m_cnt == 0 )
worklist.push(m);
}
}
*** 923,945 ****
}
// assert( phi_cnt == end_idx(), "did not schedule all" );
return false;
}
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print_cr("#");
tty->print_cr("# after schedule_local");
for (uint i = 0;i < block->number_of_nodes();i++) {
tty->print("# ");
block->get_node(i)->fast_dump();
}
tty->cr();
}
#endif
-
return true;
}
//--------------------------catch_cleanup_fix_all_inputs-----------------------
static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) {
--- 1113,1147 ----
}
// assert( phi_cnt == end_idx(), "did not schedule all" );
return false;
}
+ if (OptoRegScheduling && block_size_threshold_ok) {
+ _regalloc->compute_exit_block_pressure(block);
+ block->_reg_pressure = _regalloc->_sched_int_pressure.final_pressure();
+ block->_freg_pressure = _regalloc->_sched_float_pressure.final_pressure();
+ }
+
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print_cr("#");
tty->print_cr("# after schedule_local");
for (uint i = 0;i < block->number_of_nodes();i++) {
tty->print("# ");
block->get_node(i)->fast_dump();
}
+ tty->print_cr("# ");
+
+ if (OptoRegScheduling && block_size_threshold_ok) {
+ tty->print_cr("# pressure info : %d", block->_pre_order);
+ _regalloc->print_pressure_info(_regalloc->_sched_int_pressure, "int register info");
+ _regalloc->print_pressure_info(_regalloc->_sched_float_pressure, "float register info");
+ }
tty->cr();
}
#endif
return true;
}
//--------------------------catch_cleanup_fix_all_inputs-----------------------
static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) {
< prev index next >