< prev index next >

src/share/vm/opto/lcm.cpp

Print this page

        

*** 29,38 **** --- 29,39 ---- #include "opto/c2compiler.hpp" #include "opto/callnode.hpp" #include "opto/cfgnode.hpp" #include "opto/machnode.hpp" #include "opto/runtime.hpp" + #include "opto/chaitin.hpp" #include "runtime/sharedRuntime.hpp" // Optimization - Graph Style // Check whether val is not-null-decoded compressed oop,
*** 441,451 **** // These are chosen immediately. Some instructions are required to immediately // precede the last instruction in the block, and these are taken last. Of the // remaining cases (most), choose the instruction with the greatest latency // (that is, the most number of pseudo-cycles required to the end of the // routine). If there is a tie, choose the instruction with the most inputs. ! Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) { // If only a single entry on the stack, use it uint cnt = worklist.size(); if (cnt == 1) { Node *n = worklist[0]; --- 442,458 ---- // These are chosen immediately. Some instructions are required to immediately // precede the last instruction in the block, and these are taken last. Of the // remaining cases (most), choose the instruction with the greatest latency // (that is, the most number of pseudo-cycles required to the end of the // routine). If there is a tie, choose the instruction with the most inputs. ! Node* PhaseCFG::select( ! Block* block, ! Node_List &worklist, ! GrowableArray<int> &ready_cnt, ! VectorSet &next_call, ! uint sched_slot, ! intptr_t* recalc_pressure_nodes) { // If only a single entry on the stack, use it uint cnt = worklist.size(); if (cnt == 1) { Node *n = worklist[0];
*** 456,465 **** --- 463,473 ---- uint choice = 0; // Bigger is most important uint latency = 0; // Bigger is scheduled first uint score = 0; // Bigger is better int idx = -1; // Index in worklist int cand_cnt = 0; // Candidate count + bool block_size_threshold_ok = (block->number_of_nodes() > 10) ? true : false; for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist // Order in worklist is used to break ties. // See caller for how this is used to delay scheduling // of induction variable increments to after the other
*** 537,546 **** --- 545,592 ---- } uint n_latency = get_latency_for_node(n); uint n_score = n->req(); // Many inputs get high score to break ties + if (OptoRegScheduling && block_size_threshold_ok) { + if (recalc_pressure_nodes[n->_idx] == 0x7fff7fff) { + _regalloc->_scratch_int_pressure.init(_regalloc->_sched_int_pressure.high_pressure_limit()); + _regalloc->_scratch_float_pressure.init(_regalloc->_sched_float_pressure.high_pressure_limit()); + // simulate the notion that we just picked this node to schedule + n->add_flag(Node::Flag_is_scheduled); + // now caculate its effect upon the graph if we did + adjust_register_pressure(n, block, recalc_pressure_nodes, false); + // return its state for finalize in case somebody else wins + n->remove_flag(Node::Flag_is_scheduled); + // now save the two final pressure components of register pressure, limiting pressure calcs to short size + short int_pressure = (short)_regalloc->_scratch_int_pressure.current_pressure(); + short float_pressure = (short)_regalloc->_scratch_float_pressure.current_pressure(); + recalc_pressure_nodes[n->_idx] = int_pressure; + recalc_pressure_nodes[n->_idx] |= (float_pressure << 16); + } + + if (_scheduling_for_pressure) { + latency = n_latency; + if (n_choice != 3) { + // Now evaluate each register pressure component based on threshold in the score. + // In general the defining register type will dominate the score, ergo we will not see register pressure grow on both banks + // on a single instruction, but we might see it shrink on both banks. + if (_regalloc->_sched_int_pressure.current_pressure() > _regalloc->_sched_int_pressure.high_pressure_limit()) { + short int_pressure = (short)recalc_pressure_nodes[n->_idx]; + n_score = (int_pressure < 0) ? ((score + n_score) - int_pressure) : (int_pressure > 0) ? 1 : n_score; + } + if (_regalloc->_sched_float_pressure.current_pressure() > _regalloc->_sched_float_pressure.high_pressure_limit()) { + short float_pressure = (short)(recalc_pressure_nodes[n->_idx] >> 16); + n_score = (float_pressure < 0) ? ((score + n_score) - float_pressure) : (float_pressure > 0) ? 1 : n_score; + } + } else { + // make sure we choose these candidates + score = 0; + } + } + } + // Keep best latency found cand_cnt++; if (choice < n_choice || (choice == n_choice && ((StressLCM && Compile::randomized_select(cand_cnt)) ||
*** 560,569 **** --- 606,709 ---- worklist.map((uint)idx, worklist.pop()); // Compress worklist return n; } + //-------------------------adjust_register_pressure---------------------------- + void PhaseCFG::adjust_register_pressure(Node* n, Block* block, intptr_t* recalc_pressure_nodes, bool finalize_mode) { + PhaseLive* liveinfo = _regalloc->get_live(); + IndexSet* liveout = liveinfo->live(block); + // first adjust the register pressure for the sources + for (uint i = 1; i < n->req(); i++) { + bool lrg_ends = false; + Node *src_n = n->in(i); + if (src_n == NULL) continue; + if (!src_n->is_Mach()) continue; + uint src = _regalloc->_lrg_map.find(src_n); + if (src == 0) continue; + LRG& lrg_src = _regalloc->lrgs(src); + // detect if the live range ends or not + if (liveout->member(src) == false) { + lrg_ends = true; + for (DUIterator_Fast jmax, j = src_n->fast_outs(jmax); j < jmax; j++) { + Node* m = src_n->fast_out(j); // Get user + if (m == n) continue; + if (!m->is_Mach()) continue; + MachNode *mach = m->as_Mach(); + bool src_matches = false; + int iop = mach->ideal_Opcode(); + + switch (iop) { + case Op_StoreB: + case Op_StoreC: + case Op_StoreCM: + case Op_StoreD: + case Op_StoreF: + case Op_StoreI: + case Op_StoreL: + case Op_StoreP: + case Op_StoreN: + case Op_StoreVector: + case Op_StoreNKlass: + for (uint k = 1; k < m->req(); k++) { + Node *in = m->in(k); + if (in == src_n) { + src_matches = true; + break; + } + } + break; + + default: + src_matches = true; + break; + } + + // If we have a store as our use, ignore the non source operands + if (src_matches == false) continue; + + // Mark every unscheduled use which is not n with a recalculation + if ((get_block_for_node(m) == block) && (!m->is_scheduled())) { + if (finalize_mode && !m->is_Phi()) { + recalc_pressure_nodes[m->_idx] = 0x7fff7fff; + } + lrg_ends = false; + } + } + } + // if none, this live range ends and we can adjust register pressure + if (lrg_ends) { + if (finalize_mode) { + _regalloc->lower_pressure(block, 0, lrg_src, NULL, _regalloc->_sched_int_pressure, _regalloc->_sched_float_pressure); + } else { + _regalloc->lower_pressure(block, 0, lrg_src, NULL, _regalloc->_scratch_int_pressure, _regalloc->_scratch_float_pressure); + } + } + } + + // now add the register pressure from the dest and evaluate which heuristic we should use: + // 1.) The default, latency scheduling + // 2.) Register pressure scheduling based on the high pressure limit threshold for int or float register stacks + uint dst = _regalloc->_lrg_map.find(n); + if (dst != 0) { + LRG& lrg_dst = _regalloc->lrgs(dst); + if (finalize_mode) { + _regalloc->raise_pressure(block, lrg_dst, _regalloc->_sched_int_pressure, _regalloc->_sched_float_pressure); + // check to see if we fall over the register pressure cliff here + if (_regalloc->_sched_int_pressure.current_pressure() > _regalloc->_sched_int_pressure.high_pressure_limit()) { + _scheduling_for_pressure = true; + } else if (_regalloc->_sched_float_pressure.current_pressure() > _regalloc->_sched_float_pressure.high_pressure_limit()) { + _scheduling_for_pressure = true; + } else { + // restore latency scheduling mode + _scheduling_for_pressure = false; + } + } else { + _regalloc->raise_pressure(block, lrg_dst, _regalloc->_scratch_int_pressure, _regalloc->_scratch_float_pressure); + } + } + } //------------------------------set_next_call---------------------------------- void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) { if( next_call.test_set(n->_idx) ) return; for( uint i=0; i<n->len(); i++ ) {
*** 642,652 **** Node* m = n->fast_out(j); // Get user if(get_block_for_node(m) != block) { continue; } if( m->is_Phi() ) continue; ! int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } --- 782,792 ---- Node* m = n->fast_out(j); // Get user if(get_block_for_node(m) != block) { continue; } if( m->is_Phi() ) continue; ! int m_cnt = ready_cnt.at(m->_idx) - 1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); }
*** 709,719 **** } //------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. ! bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. --- 849,859 ---- } //------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. ! bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call, intptr_t *recalc_pressure_nodes) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted.
*** 731,751 **** // RootNode is already sorted if (block->number_of_nodes() == 1) { return true; } // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = block->end_idx(); uint phi_cnt = 1; - uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = block->get_node(i); if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == block->head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt block->map_node(block->get_node(phi_cnt), i); block->map_node(n, phi_cnt++); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { --- 871,909 ---- // RootNode is already sorted if (block->number_of_nodes() == 1) { return true; } + bool block_size_threshold_ok = (block->number_of_nodes() > 10) ? true : false; + + // We track the uses of local definitions as input dependences so that + // we know when a given instruction is avialable to be scheduled. + uint i; + if (OptoRegScheduling && block_size_threshold_ok) { + for (i = 1; i < block->number_of_nodes(); i++) { // setup nodes for pressure calc + Node *n = block->get_node(i); + n->remove_flag(Node::Flag_is_scheduled); + if (!n->is_Phi()) { + recalc_pressure_nodes[n->_idx] = 0x7fff7fff; + } + } + } + // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = block->end_idx(); uint phi_cnt = 1; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = block->get_node(i); if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == block->head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt block->map_node(block->get_node(phi_cnt), i); block->map_node(n, phi_cnt++); // swap Phi/Parm up front + if (OptoRegScheduling && block_size_threshold_ok) { + // mark n as scheduled + n->add_flag(Node::Flag_is_scheduled); + } } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) {
*** 789,804 **** for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count ready_cnt.at_put(block->get_node(i2)->_idx, 0); // All the prescheduled guys do not hold back internal nodes uint i3; ! for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = block->get_node(i3); // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if (get_block_for_node(m) == block) { // Local-block user int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count } } } --- 947,968 ---- for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count ready_cnt.at_put(block->get_node(i2)->_idx, 0); // All the prescheduled guys do not hold back internal nodes uint i3; ! for (i3 = 0; i3 < phi_cnt; i3++) { // For all pre-scheduled Node *n = block->get_node(i3); // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if (get_block_for_node(m) == block) { // Local-block user int m_cnt = ready_cnt.at(m->_idx)-1; + if (OptoRegScheduling && block_size_threshold_ok) { + // mark m as scheduled + if (m_cnt < 0) { + m->add_flag(Node::Flag_is_scheduled); + } + } ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count } } }
*** 825,834 **** --- 989,1015 ---- while (delay.size()) { Node* d = delay.pop(); worklist.push(d); } + if (OptoRegScheduling && block_size_threshold_ok) { + // To stage register pressure calculations we need to examine the live set variables + // breaking them up by register class to compartmentalize the calculations. + uint float_pressure = FLOATPRESSURE; + #ifdef _LP64 + if (UseAVX > 2) { + float_pressure *= 2; + } + #endif + _regalloc->_sched_int_pressure.init(INTPRESSURE); + _regalloc->_sched_float_pressure.init(float_pressure); + _regalloc->_scratch_int_pressure.init(INTPRESSURE); + _regalloc->_scratch_float_pressure.init(float_pressure); + + _regalloc->compute_entry_block_pressure(block); + } + // Warm up the 'next_call' heuristic bits needed_for_next_call(block, block->head(), next_call); #ifndef PRODUCT if (trace_opto_pipelining()) {
*** 856,868 **** tty->cr(); } #endif // Select and pop a ready guy from worklist ! Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt); block->map_node(n, phi_cnt++); // Schedule him next #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", get_latency_for_node(n)); n->dump(); --- 1037,1058 ---- tty->cr(); } #endif // Select and pop a ready guy from worklist ! Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt, recalc_pressure_nodes); block->map_node(n, phi_cnt++); // Schedule him next + if (OptoRegScheduling && block_size_threshold_ok) { + n->add_flag(Node::Flag_is_scheduled); + + // Now adjust the resister pressure with the node we selected + if (!n->is_Phi()) { + adjust_register_pressure(n, block, recalc_pressure_nodes, true); + } + } + #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", get_latency_for_node(n)); n->dump();
*** 904,914 **** if( m->is_Phi() ) continue; if (m->_idx >= max_idx) { // new node, skip it assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); continue; } ! int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } } --- 1094,1104 ---- if( m->is_Phi() ) continue; if (m->_idx >= max_idx) { // new node, skip it assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); continue; } ! int m_cnt = ready_cnt.at(m->_idx) - 1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } }
*** 923,945 **** } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print_cr("#"); tty->print_cr("# after schedule_local"); for (uint i = 0;i < block->number_of_nodes();i++) { tty->print("# "); block->get_node(i)->fast_dump(); } tty->cr(); } #endif - return true; } //--------------------------catch_cleanup_fix_all_inputs----------------------- static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) { --- 1113,1147 ---- } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } + if (OptoRegScheduling && block_size_threshold_ok) { + _regalloc->compute_exit_block_pressure(block); + block->_reg_pressure = _regalloc->_sched_int_pressure.final_pressure(); + block->_freg_pressure = _regalloc->_sched_float_pressure.final_pressure(); + } + #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print_cr("#"); tty->print_cr("# after schedule_local"); for (uint i = 0;i < block->number_of_nodes();i++) { tty->print("# "); block->get_node(i)->fast_dump(); } + tty->print_cr("# "); + + if (OptoRegScheduling && block_size_threshold_ok) { + tty->print_cr("# pressure info : %d", block->_pre_order); + _regalloc->print_pressure_info(_regalloc->_sched_int_pressure, "int register info"); + _regalloc->print_pressure_info(_regalloc->_sched_float_pressure, "float register info"); + } tty->cr(); } #endif return true; } //--------------------------catch_cleanup_fix_all_inputs----------------------- static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) {
< prev index next >