174 uint LiveRangeMap::find_const(uint lrg) const {
175 if (!lrg) {
176 return lrg; // Ignore the zero LRG
177 }
178
179 // Off the end? This happens during debugging dumps when you got
180 // brand new live ranges but have not told the allocator yet.
181 if (lrg >= _max_lrg_id) {
182 return lrg;
183 }
184
185 uint next = _uf_map.at(lrg);
186 while (next != lrg) { // Scan chain of equivalences
187 assert(next < lrg, "always union smaller");
188 lrg = next; // until find a fixed-point
189 next = _uf_map.at(lrg);
190 }
191 return next;
192 }
193
194 PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
195 : PhaseRegAlloc(unique, cfg, matcher,
196 #ifndef PRODUCT
197 print_chaitin_statistics
198 #else
199 NULL
200 #endif
201 )
202 , _lrg_map(Thread::current()->resource_area(), unique)
203 , _live(0)
204 , _spilled_once(Thread::current()->resource_area())
205 , _spilled_twice(Thread::current()->resource_area())
206 , _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0)
207 , _oldphi(unique)
208 #ifndef PRODUCT
209 , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
210 #endif
211 {
212 Compile::TracePhase tp("ctorChaitin", &timers[_t_ctorChaitin]);
213
214 _high_frequency_lrg = MIN2(double(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
215
216 // Build a list of basic blocks, sorted by frequency
217 _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
218 // Experiment with sorting strategies to speed compilation
219 double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
220 Block **buckets[NUMBUCKS]; // Array of buckets
221 uint buckcnt[NUMBUCKS]; // Array of bucket counters
222 double buckval[NUMBUCKS]; // Array of bucket value cutoffs
223 for (uint i = 0; i < NUMBUCKS; i++) {
224 buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
225 buckcnt[i] = 0;
226 // Bump by three orders of magnitude each time
227 cutoff *= 0.001;
333 // registers) is the outgoing argument area; above that is the spill/temp
334 // area. These are all "frame_slots". Arg_slots start at the zero
335 // stack_slots and count up to the known arg_size. Frame_slots start at
336 // the stack_slot #arg_size and go up. After allocation I map stack
337 // slots to actual offsets. Stack-slots in the arg_slot area are biased
338 // by the frame_size; stack-slots in the frame_slot area are biased by 0.
339
340 _trip_cnt = 0;
341 _alternate = 0;
342 _matcher._allocation_started = true;
343
344 ResourceArea split_arena; // Arena for Split local resources
345 ResourceArea live_arena; // Arena for liveness & IFG info
346 ResourceMark rm(&live_arena);
347
348 // Need live-ness for the IFG; need the IFG for coalescing. If the
349 // liveness is JUST for coalescing, then I can get some mileage by renaming
350 // all copy-related live ranges low and then using the max copy-related
351 // live range as a cut-off for LIVE and the IFG. In other words, I can
352 // build a subset of LIVE and IFG just for copies.
353 PhaseLive live(_cfg, _lrg_map.names(), &live_arena);
354
355 // Need IFG for coalescing and coloring
356 PhaseIFG ifg(&live_arena);
357 _ifg = &ifg;
358
359 // Come out of SSA world to the Named world. Assign (virtual) registers to
360 // Nodes. Use the same register for all inputs and the output of PhiNodes
361 // - effectively ending SSA form. This requires either coalescing live
362 // ranges or inserting copies. For the moment, we insert "virtual copies"
363 // - we pretend there is a copy prior to each Phi in predecessor blocks.
364 // We will attempt to coalesce such "virtual copies" before we manifest
365 // them for real.
366 de_ssa();
367
368 #ifdef ASSERT
369 // Veify the graph before RA.
370 verify(&live_arena);
371 #endif
372
373 {
673 // get allocated, but instead rely on correct scheduling to ensure that
674 // only one instance is simultaneously live at a time.
675 uint lr_counter = 1;
676 for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {
677 Block* block = _cfg.get_block(i);
678 uint cnt = block->number_of_nodes();
679
680 // Handle all the normal Nodes in the block
681 for( uint j = 0; j < cnt; j++ ) {
682 Node *n = block->get_node(j);
683 // Pre-color to the zero live range, or pick virtual register
684 const RegMask &rm = n->out_RegMask();
685 _lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0);
686 }
687 }
688
689 // Reset the Union-Find mapping to be identity
690 _lrg_map.reset_uf_map(lr_counter);
691 }
692
693
694 // Gather LiveRanGe information, including register masks. Modification of
695 // cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
696 void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
697
698 // Nail down the frame pointer live range
699 uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr));
700 lrgs(fp_lrg)._cost += 1e12; // Cost is infinite
701
702 // For all blocks
703 for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
704 Block* block = _cfg.get_block(i);
705
706 // For all instructions
707 for (uint j = 1; j < block->number_of_nodes(); j++) {
708 Node* n = block->get_node(j);
709 uint input_edge_start =1; // Skip control most nodes
710 if (n->is_Mach()) {
711 input_edge_start = n->as_Mach()->oper_input_base();
712 }
713 uint idx = n->is_Copy();
714
715 // Get virtual register number, same as LiveRanGe index
716 uint vreg = _lrg_map.live_range_id(n);
717 LRG& lrg = lrgs(vreg);
718 if (vreg) { // No vreg means un-allocable (e.g. memory)
719
720 // Collect has-copy bit
721 if (idx) {
722 lrg._has_copy = 1;
723 uint clidx = _lrg_map.live_range_id(n->in(idx));
724 LRG& copy_src = lrgs(clidx);
725 copy_src._has_copy = 1;
726 }
727
728 // Check for float-vs-int live range (used in register-pressure
729 // calculations)
730 const Type *n_type = n->bottom_type();
912 assert(RegMask::num_registers(Op_VecZ) == RegMask::SlotsPerVecZ, "sanity");
913 assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecZ), "vector should be aligned");
914 lrg.set_num_regs(RegMask::SlotsPerVecZ);
915 lrg.set_reg_pressure(1);
916 break;
917 default:
918 ShouldNotReachHere();
919 }
920 }
921
922 // Now do the same for inputs
923 uint cnt = n->req();
924 // Setup for CISC SPILLING
925 uint inp = (uint)AdlcVMDeps::Not_cisc_spillable;
926 if( UseCISCSpill && after_aggressive ) {
927 inp = n->cisc_operand();
928 if( inp != (uint)AdlcVMDeps::Not_cisc_spillable )
929 // Convert operand number to edge index number
930 inp = n->as_Mach()->operand_index(inp);
931 }
932 // Prepare register mask for each input
933 for( uint k = input_edge_start; k < cnt; k++ ) {
934 uint vreg = _lrg_map.live_range_id(n->in(k));
935 if (!vreg) {
936 continue;
937 }
938
939 // If this instruction is CISC Spillable, add the flags
940 // bit to its appropriate input
941 if( UseCISCSpill && after_aggressive && inp == k ) {
942 #ifndef PRODUCT
943 if( TraceCISCSpill ) {
944 tty->print(" use_cisc_RegMask: ");
945 n->dump();
946 }
947 #endif
948 n->as_Mach()->use_cisc_RegMask();
949 }
950
951 LRG &lrg = lrgs(vreg);
952 // // Testing for floating point code shape
953 // Node *test = n->in(k);
954 // if( test->is_Mach() ) {
955 // MachNode *m = test->as_Mach();
956 // int op = m->ideal_Opcode();
957 // if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) {
958 // int zzz = 1;
959 // }
960 // }
961
962 // Limit result register mask to acceptable registers.
963 // Do not limit registers from uncommon uses before
964 // AggressiveCoalesce. This effectively pre-virtual-splits
965 // around uncommon uses of common defs.
966 const RegMask &rm = n->in_RegMask(k);
967 if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) {
968 // Since we are BEFORE aggressive coalesce, leave the register
969 // mask untrimmed by the call. This encourages more coalescing.
970 // Later, AFTER aggressive, this live range will have to spill
972 } else {
973 lrg.AND( rm );
974 }
975
976 // Check for bound register masks
977 const RegMask &lrgmask = lrg.mask();
978 int kreg = n->in(k)->ideal_reg();
979 bool is_vect = RegMask::is_vector(kreg);
980 assert(n->in(k)->bottom_type()->isa_vect() == NULL ||
981 is_vect || kreg == Op_RegD || kreg == Op_RegL,
982 "vector must be in vector registers");
983 if (lrgmask.is_bound(kreg))
984 lrg._is_bound = 1;
985
986 // If this use of a double forces a mis-aligned double,
987 // flag as '_fat_proj' - really flag as allowing misalignment
988 // AND changes how we count interferences. A mis-aligned
989 // double can interfere with TWO aligned pairs, or effectively
990 // FOUR registers!
991 #ifdef ASSERT
992 if (is_vect) {
993 assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned");
994 assert(!lrg._fat_proj, "sanity");
995 assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity");
996 }
997 #endif
998 if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) {
999 lrg._fat_proj = 1;
1000 lrg._is_bound = 1;
1001 }
1002 // if the LRG is an unaligned pair, we will have to spill
1003 // so clear the LRG's register mask if it is not already spilled
1004 if (!is_vect && !n->is_SpillCopy() &&
1005 (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
1006 lrgmask.is_misaligned_pair()) {
1007 lrg.Clear();
1008 }
1009
1010 // Check for maximum frequency value
1011 if (lrg._maxfreq < block->_freq) {
1012 lrg._maxfreq = block->_freq;
|
174 uint LiveRangeMap::find_const(uint lrg) const {
175 if (!lrg) {
176 return lrg; // Ignore the zero LRG
177 }
178
179 // Off the end? This happens during debugging dumps when you got
180 // brand new live ranges but have not told the allocator yet.
181 if (lrg >= _max_lrg_id) {
182 return lrg;
183 }
184
185 uint next = _uf_map.at(lrg);
186 while (next != lrg) { // Scan chain of equivalences
187 assert(next < lrg, "always union smaller");
188 lrg = next; // until find a fixed-point
189 next = _uf_map.at(lrg);
190 }
191 return next;
192 }
193
194 PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher, bool scheduling_info_generated)
195 : PhaseRegAlloc(unique, cfg, matcher,
196 #ifndef PRODUCT
197 print_chaitin_statistics
198 #else
199 NULL
200 #endif
201 )
202 , _lrg_map(Thread::current()->resource_area(), unique)
203 , _live(0)
204 , _spilled_once(Thread::current()->resource_area())
205 , _spilled_twice(Thread::current()->resource_area())
206 , _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0)
207 , _oldphi(unique)
208 , _scheduling_info_generated(scheduling_info_generated)
209 , _sched_int_pressure(0, INTPRESSURE)
210 , _sched_float_pressure(0, FLOATPRESSURE)
211 , _scratch_int_pressure(0, INTPRESSURE)
212 , _scratch_float_pressure(0, FLOATPRESSURE)
213 #ifndef PRODUCT
214 , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
215 #endif
216 {
217 Compile::TracePhase tp("ctorChaitin", &timers[_t_ctorChaitin]);
218
219 _high_frequency_lrg = MIN2(double(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
220
221 // Build a list of basic blocks, sorted by frequency
222 _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
223 // Experiment with sorting strategies to speed compilation
224 double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
225 Block **buckets[NUMBUCKS]; // Array of buckets
226 uint buckcnt[NUMBUCKS]; // Array of bucket counters
227 double buckval[NUMBUCKS]; // Array of bucket value cutoffs
228 for (uint i = 0; i < NUMBUCKS; i++) {
229 buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
230 buckcnt[i] = 0;
231 // Bump by three orders of magnitude each time
232 cutoff *= 0.001;
338 // registers) is the outgoing argument area; above that is the spill/temp
339 // area. These are all "frame_slots". Arg_slots start at the zero
340 // stack_slots and count up to the known arg_size. Frame_slots start at
341 // the stack_slot #arg_size and go up. After allocation I map stack
342 // slots to actual offsets. Stack-slots in the arg_slot area are biased
343 // by the frame_size; stack-slots in the frame_slot area are biased by 0.
344
345 _trip_cnt = 0;
346 _alternate = 0;
347 _matcher._allocation_started = true;
348
349 ResourceArea split_arena; // Arena for Split local resources
350 ResourceArea live_arena; // Arena for liveness & IFG info
351 ResourceMark rm(&live_arena);
352
353 // Need live-ness for the IFG; need the IFG for coalescing. If the
354 // liveness is JUST for coalescing, then I can get some mileage by renaming
355 // all copy-related live ranges low and then using the max copy-related
356 // live range as a cut-off for LIVE and the IFG. In other words, I can
357 // build a subset of LIVE and IFG just for copies.
358 PhaseLive live(_cfg, _lrg_map.names(), &live_arena, false);
359
360 // Need IFG for coalescing and coloring
361 PhaseIFG ifg(&live_arena);
362 _ifg = &ifg;
363
364 // Come out of SSA world to the Named world. Assign (virtual) registers to
365 // Nodes. Use the same register for all inputs and the output of PhiNodes
366 // - effectively ending SSA form. This requires either coalescing live
367 // ranges or inserting copies. For the moment, we insert "virtual copies"
368 // - we pretend there is a copy prior to each Phi in predecessor blocks.
369 // We will attempt to coalesce such "virtual copies" before we manifest
370 // them for real.
371 de_ssa();
372
373 #ifdef ASSERT
374 // Veify the graph before RA.
375 verify(&live_arena);
376 #endif
377
378 {
678 // get allocated, but instead rely on correct scheduling to ensure that
679 // only one instance is simultaneously live at a time.
680 uint lr_counter = 1;
681 for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {
682 Block* block = _cfg.get_block(i);
683 uint cnt = block->number_of_nodes();
684
685 // Handle all the normal Nodes in the block
686 for( uint j = 0; j < cnt; j++ ) {
687 Node *n = block->get_node(j);
688 // Pre-color to the zero live range, or pick virtual register
689 const RegMask &rm = n->out_RegMask();
690 _lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0);
691 }
692 }
693
694 // Reset the Union-Find mapping to be identity
695 _lrg_map.reset_uf_map(lr_counter);
696 }
697
698 void PhaseChaitin::mark_ssa() {
699 // Use ssa names to populate the live range maps or if no mask
700 // is available, use the 0 entry.
701 uint max_idx = 0;
702 for ( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {
703 Block* block = _cfg.get_block(i);
704 uint cnt = block->number_of_nodes();
705
706 // Handle all the normal Nodes in the block
707 for ( uint j = 0; j < cnt; j++ ) {
708 Node *n = block->get_node(j);
709 // Pre-color to the zero live range, or pick virtual register
710 const RegMask &rm = n->out_RegMask();
711 _lrg_map.map(n->_idx, rm.is_NotEmpty() ? n->_idx : 0);
712 max_idx = (n->_idx > max_idx) ? n->_idx : max_idx;
713 }
714 }
715 _lrg_map.set_max_lrg_id(max_idx+1);
716
717 // Reset the Union-Find mapping to be identity
718 _lrg_map.reset_uf_map(max_idx+1);
719 }
720
721
722 // Gather LiveRanGe information, including register masks. Modification of
723 // cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
724 void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
725
726 // Nail down the frame pointer live range
727 uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr));
728 lrgs(fp_lrg)._cost += 1e12; // Cost is infinite
729
730 // For all blocks
731 for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
732 Block* block = _cfg.get_block(i);
733
734 // For all instructions
735 for (uint j = 1; j < block->number_of_nodes(); j++) {
736 Node* n = block->get_node(j);
737 uint input_edge_start =1; // Skip control most nodes
738 bool is_machine_node = false;
739 if (n->is_Mach()) {
740 is_machine_node = true;
741 input_edge_start = n->as_Mach()->oper_input_base();
742 }
743 uint idx = n->is_Copy();
744
745 // Get virtual register number, same as LiveRanGe index
746 uint vreg = _lrg_map.live_range_id(n);
747 LRG& lrg = lrgs(vreg);
748 if (vreg) { // No vreg means un-allocable (e.g. memory)
749
750 // Collect has-copy bit
751 if (idx) {
752 lrg._has_copy = 1;
753 uint clidx = _lrg_map.live_range_id(n->in(idx));
754 LRG& copy_src = lrgs(clidx);
755 copy_src._has_copy = 1;
756 }
757
758 // Check for float-vs-int live range (used in register-pressure
759 // calculations)
760 const Type *n_type = n->bottom_type();
942 assert(RegMask::num_registers(Op_VecZ) == RegMask::SlotsPerVecZ, "sanity");
943 assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecZ), "vector should be aligned");
944 lrg.set_num_regs(RegMask::SlotsPerVecZ);
945 lrg.set_reg_pressure(1);
946 break;
947 default:
948 ShouldNotReachHere();
949 }
950 }
951
952 // Now do the same for inputs
953 uint cnt = n->req();
954 // Setup for CISC SPILLING
955 uint inp = (uint)AdlcVMDeps::Not_cisc_spillable;
956 if( UseCISCSpill && after_aggressive ) {
957 inp = n->cisc_operand();
958 if( inp != (uint)AdlcVMDeps::Not_cisc_spillable )
959 // Convert operand number to edge index number
960 inp = n->as_Mach()->operand_index(inp);
961 }
962
963 // Prepare register mask for each input
964 for( uint k = input_edge_start; k < cnt; k++ ) {
965 uint vreg = _lrg_map.live_range_id(n->in(k));
966 if (!vreg) {
967 continue;
968 }
969
970 // If this instruction is CISC Spillable, add the flags
971 // bit to its appropriate input
972 if( UseCISCSpill && after_aggressive && inp == k ) {
973 #ifndef PRODUCT
974 if( TraceCISCSpill ) {
975 tty->print(" use_cisc_RegMask: ");
976 n->dump();
977 }
978 #endif
979 n->as_Mach()->use_cisc_RegMask();
980 }
981
982 if (is_machine_node && _scheduling_info_generated) {
983 MachNode* cur_node = n->as_Mach();
984 // this is cleaned up by register allocation
985 if (k >= cur_node->num_opnds()) continue;
986 }
987
988 LRG &lrg = lrgs(vreg);
989 // // Testing for floating point code shape
990 // Node *test = n->in(k);
991 // if( test->is_Mach() ) {
992 // MachNode *m = test->as_Mach();
993 // int op = m->ideal_Opcode();
994 // if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) {
995 // int zzz = 1;
996 // }
997 // }
998
999 // Limit result register mask to acceptable registers.
1000 // Do not limit registers from uncommon uses before
1001 // AggressiveCoalesce. This effectively pre-virtual-splits
1002 // around uncommon uses of common defs.
1003 const RegMask &rm = n->in_RegMask(k);
1004 if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) {
1005 // Since we are BEFORE aggressive coalesce, leave the register
1006 // mask untrimmed by the call. This encourages more coalescing.
1007 // Later, AFTER aggressive, this live range will have to spill
1009 } else {
1010 lrg.AND( rm );
1011 }
1012
1013 // Check for bound register masks
1014 const RegMask &lrgmask = lrg.mask();
1015 int kreg = n->in(k)->ideal_reg();
1016 bool is_vect = RegMask::is_vector(kreg);
1017 assert(n->in(k)->bottom_type()->isa_vect() == NULL ||
1018 is_vect || kreg == Op_RegD || kreg == Op_RegL,
1019 "vector must be in vector registers");
1020 if (lrgmask.is_bound(kreg))
1021 lrg._is_bound = 1;
1022
1023 // If this use of a double forces a mis-aligned double,
1024 // flag as '_fat_proj' - really flag as allowing misalignment
1025 // AND changes how we count interferences. A mis-aligned
1026 // double can interfere with TWO aligned pairs, or effectively
1027 // FOUR registers!
1028 #ifdef ASSERT
1029 if (is_vect && !_scheduling_info_generated) {
1030 assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned");
1031 assert(!lrg._fat_proj, "sanity");
1032 assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity");
1033 }
1034 #endif
1035 if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) {
1036 lrg._fat_proj = 1;
1037 lrg._is_bound = 1;
1038 }
1039 // if the LRG is an unaligned pair, we will have to spill
1040 // so clear the LRG's register mask if it is not already spilled
1041 if (!is_vect && !n->is_SpillCopy() &&
1042 (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
1043 lrgmask.is_misaligned_pair()) {
1044 lrg.Clear();
1045 }
1046
1047 // Check for maximum frequency value
1048 if (lrg._maxfreq < block->_freq) {
1049 lrg._maxfreq = block->_freq;
|