src/share/vm/opto/chaitin.cpp

Print this page




 192 }
 193 
 194 PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
 195   : PhaseRegAlloc(unique, cfg, matcher,
 196 #ifndef PRODUCT
 197        print_chaitin_statistics
 198 #else
 199        NULL
 200 #endif
 201        )
 202   , _lrg_map(Thread::current()->resource_area(), unique)
 203   , _live(0)
 204   , _spilled_once(Thread::current()->resource_area())
 205   , _spilled_twice(Thread::current()->resource_area())
 206   , _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0)
 207   , _oldphi(unique)
 208 #ifndef PRODUCT
 209   , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
 210 #endif
 211 {
 212   NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
 213 
 214   _high_frequency_lrg = MIN2(double(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
 215 
 216   // Build a list of basic blocks, sorted by frequency
 217   _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
 218   // Experiment with sorting strategies to speed compilation
 219   double  cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
 220   Block **buckets[NUMBUCKS];             // Array of buckets
 221   uint    buckcnt[NUMBUCKS];             // Array of bucket counters
 222   double  buckval[NUMBUCKS];             // Array of bucket value cutoffs
 223   for (uint i = 0; i < NUMBUCKS; i++) {
 224     buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
 225     buckcnt[i] = 0;
 226     // Bump by three orders of magnitude each time
 227     cutoff *= 0.001;
 228     buckval[i] = cutoff;
 229     for (uint j = 0; j < _cfg.number_of_blocks(); j++) {
 230       buckets[i][j] = NULL;
 231     }
 232   }


 278   uint cnt = orig->outcnt();
 279   for (uint i = 0; i < cnt; i++) {
 280     Node* proj = orig->raw_out(i);
 281     if (proj->is_MachProj()) {
 282       assert(proj->outcnt() == 0, "only kill projections are expected here");
 283       assert(_cfg.get_block_for_node(proj) == borig, "incorrect block for kill projections");
 284       found_projs++;
 285       // Copy kill projections after the cloned node
 286       Node* kills = proj->clone();
 287       kills->set_req(0, copy);
 288       b->insert_node(kills, idx++);
 289       _cfg.map_node_to_block(kills, b);
 290       new_lrg(kills, max_lrg_id++);
 291     }
 292   }
 293   return found_projs;
 294 }
 295 
 296 // Renumber the live ranges to compact them.  Makes the IFG smaller.
 297 void PhaseChaitin::compact() {


 298   // Current the _uf_map contains a series of short chains which are headed
 299   // by a self-cycle.  All the chains run from big numbers to little numbers.
 300   // The Find() call chases the chains & shortens them for the next Find call.
 301   // We are going to change this structure slightly.  Numbers above a moving
 302   // wave 'i' are unchanged.  Numbers below 'j' point directly to their
 303   // compacted live range with no further chaining.  There are no chains or
 304   // cycles below 'i', so the Find call no longer works.
 305   uint j=1;
 306   uint i;
 307   for (i = 1; i < _lrg_map.max_lrg_id(); i++) {
 308     uint lr = _lrg_map.uf_live_range_id(i);
 309     // Ignore unallocated live ranges
 310     if (!lr) {
 311       continue;
 312     }
 313     assert(lr <= i, "");
 314     _lrg_map.uf_map(i, ( lr == i ) ? j++ : _lrg_map.uf_live_range_id(lr));
 315   }
 316   // Now change the Node->LR mapping to reflect the compacted names
 317   uint unique = _lrg_map.size();


 352 
 353   // Need IFG for coalescing and coloring
 354   PhaseIFG ifg(&live_arena);
 355   _ifg = &ifg;
 356 
 357   // Come out of SSA world to the Named world.  Assign (virtual) registers to
 358   // Nodes.  Use the same register for all inputs and the output of PhiNodes
 359   // - effectively ending SSA form.  This requires either coalescing live
 360   // ranges or inserting copies.  For the moment, we insert "virtual copies"
 361   // - we pretend there is a copy prior to each Phi in predecessor blocks.
 362   // We will attempt to coalesce such "virtual copies" before we manifest
 363   // them for real.
 364   de_ssa();
 365 
 366 #ifdef ASSERT
 367   // Veify the graph before RA.
 368   verify(&live_arena);
 369 #endif
 370 
 371   {
 372     NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
 373     _live = NULL;                 // Mark live as being not available
 374     rm.reset_to_mark();           // Reclaim working storage
 375     IndexSet::reset_memory(C, &live_arena);
 376     ifg.init(_lrg_map.max_lrg_id()); // Empty IFG
 377     gather_lrg_masks( false );    // Collect LRG masks
 378     live.compute(_lrg_map.max_lrg_id()); // Compute liveness
 379     _live = &live;                // Mark LIVE as being available
 380   }
 381 
 382   // Base pointers are currently "used" by instructions which define new
 383   // derived pointers.  This makes base pointers live up to the where the
 384   // derived pointer is made, but not beyond.  Really, they need to be live
 385   // across any GC point where the derived value is live.  So this code looks
 386   // at all the GC points, and "stretches" the live range of any base pointer
 387   // to the GC point.
 388   if (stretch_base_pointer_live_ranges(&live_arena)) {
 389     NOT_PRODUCT(Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler);)
 390     // Since some live range stretched, I need to recompute live
 391     _live = NULL;
 392     rm.reset_to_mark();         // Reclaim working storage
 393     IndexSet::reset_memory(C, &live_arena);
 394     ifg.init(_lrg_map.max_lrg_id());
 395     gather_lrg_masks(false);
 396     live.compute(_lrg_map.max_lrg_id());
 397     _live = &live;
 398   }
 399   // Create the interference graph using virtual copies
 400   build_ifg_virtual();  // Include stack slots this time
 401 
 402   // Aggressive (but pessimistic) copy coalescing.
 403   // This pass works on virtual copies.  Any virtual copies which are not
 404   // coalesced get manifested as actual copies
 405   {


 406     // The IFG is/was triangular.  I am 'squaring it up' so Union can run
 407     // faster.  Union requires a 'for all' operation which is slow on the
 408     // triangular adjacency matrix (quick reminder: the IFG is 'sparse' -
 409     // meaning I can visit all the Nodes neighbors less than a Node in time
 410     // O(# of neighbors), but I have to visit all the Nodes greater than a
 411     // given Node and search them for an instance, i.e., time O(#MaxLRG)).
 412     _ifg->SquareUp();
 413 
 414     PhaseAggressiveCoalesce coalesce(*this);
 415     coalesce.coalesce_driver();
 416     // Insert un-coalesced copies.  Visit all Phis.  Where inputs to a Phi do
 417     // not match the Phi itself, insert a copy.
 418     coalesce.insert_copies(_matcher);
 419     if (C->failing()) {
 420       return;
 421     }
 422   }
 423 
 424   // After aggressive coalesce, attempt a first cut at coloring.
 425   // To color, we need the IFG and for that we need LIVE.
 426   {
 427     NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
 428     _live = NULL;
 429     rm.reset_to_mark();           // Reclaim working storage
 430     IndexSet::reset_memory(C, &live_arena);
 431     ifg.init(_lrg_map.max_lrg_id());
 432     gather_lrg_masks( true );
 433     live.compute(_lrg_map.max_lrg_id());
 434     _live = &live;
 435   }
 436 
 437   // Build physical interference graph
 438   uint must_spill = 0;
 439   must_spill = build_ifg_physical(&live_arena);
 440   // If we have a guaranteed spill, might as well spill now
 441   if (must_spill) {
 442     if(!_lrg_map.max_lrg_id()) {
 443       return;
 444     }
 445     // Bail out if unique gets too large (ie - unique > MaxNodeLimit)
 446     C->check_node_count(10*must_spill, "out of nodes before split");
 447     if (C->failing()) {
 448       return;
 449     }
 450 
 451     uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena);  // Split spilling LRG everywhere
 452     _lrg_map.set_max_lrg_id(new_max_lrg_id);
 453     // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
 454     // or we failed to split
 455     C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split");
 456     if (C->failing()) {
 457       return;
 458     }
 459 
 460     NOT_PRODUCT(C->verify_graph_edges();)
 461 
 462     compact();                  // Compact LRGs; return new lower max lrg
 463 
 464     {
 465       NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
 466       _live = NULL;
 467       rm.reset_to_mark();         // Reclaim working storage
 468       IndexSet::reset_memory(C, &live_arena);
 469       ifg.init(_lrg_map.max_lrg_id()); // Build a new interference graph
 470       gather_lrg_masks( true );   // Collect intersect mask
 471       live.compute(_lrg_map.max_lrg_id()); // Compute LIVE
 472       _live = &live;
 473     }
 474     build_ifg_physical(&live_arena);
 475     _ifg->SquareUp();
 476     _ifg->Compute_Effective_Degree();
 477     // Only do conservative coalescing if requested
 478     if (OptoCoalesce) {

 479       // Conservative (and pessimistic) copy coalescing of those spills
 480       PhaseConservativeCoalesce coalesce(*this);
 481       // If max live ranges greater than cutoff, don't color the stack.
 482       // This cutoff can be larger than below since it is only done once.
 483       coalesce.coalesce_driver();
 484     }
 485     _lrg_map.compress_uf_map_for_nodes();
 486 
 487 #ifdef ASSERT
 488     verify(&live_arena, true);
 489 #endif
 490   } else {
 491     ifg.SquareUp();
 492     ifg.Compute_Effective_Degree();
 493 #ifdef ASSERT
 494     set_was_low();
 495 #endif
 496   }
 497 
 498   // Prepare for Simplify & Select


 514         C->record_method_not_compilable("failed spill-split-recycle sanity check");
 515         return;
 516       }
 517     }
 518 
 519     if (!_lrg_map.max_lrg_id()) {
 520       return;
 521     }
 522     uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena);  // Split spilling LRG everywhere
 523     _lrg_map.set_max_lrg_id(new_max_lrg_id);
 524     // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
 525     C->check_node_count(2 * NodeLimitFudgeFactor, "out of nodes after split");
 526     if (C->failing()) {
 527       return;
 528     }
 529 
 530     compact(); // Compact LRGs; return new lower max lrg
 531 
 532     // Nuke the live-ness and interference graph and LiveRanGe info
 533     {
 534       NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
 535       _live = NULL;
 536       rm.reset_to_mark();         // Reclaim working storage
 537       IndexSet::reset_memory(C, &live_arena);
 538       ifg.init(_lrg_map.max_lrg_id());
 539 
 540       // Create LiveRanGe array.
 541       // Intersect register masks for all USEs and DEFs
 542       gather_lrg_masks(true);
 543       live.compute(_lrg_map.max_lrg_id());
 544       _live = &live;
 545     }
 546     must_spill = build_ifg_physical(&live_arena);
 547     _ifg->SquareUp();
 548     _ifg->Compute_Effective_Degree();
 549 
 550     // Only do conservative coalescing if requested
 551     if (OptoCoalesce) {

 552       // Conservative (and pessimistic) copy coalescing
 553       PhaseConservativeCoalesce coalesce(*this);
 554       // Check for few live ranges determines how aggressive coalesce is.
 555       coalesce.coalesce_driver();
 556     }
 557     _lrg_map.compress_uf_map_for_nodes();
 558 #ifdef ASSERT
 559     verify(&live_arena, true);
 560 #endif
 561     cache_lrg_info();           // Count degree of LRGs
 562 
 563     // Simplify the InterFerence Graph by removing LRGs of low degree.
 564     // LRGs of low degree are trivially colorable.
 565     Simplify();
 566 
 567     // Select colors by re-inserting LRGs back into the IFG in reverse order.
 568     // Return whether or not something spills.
 569     spills = Select();
 570   }
 571 


1037       int briggs_degree = 0;
1038       IndexSet *s = _ifg->neighbors(i);
1039       IndexSetIterator elements(s);
1040       uint lidx;
1041       while((lidx = elements.next()) != 0) {
1042         if( !lrgs(lidx).lo_degree() )
1043           briggs_degree += MAX2(size,lrgs(lidx).num_regs());
1044       }
1045       if( briggs_degree < lrgs(i).degrees_of_freedom() )
1046         lrgs(i)._was_lo = 1;    // Low degree via the briggs assertion
1047     }
1048     assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease");
1049   }
1050 #endif
1051 }
1052 
1053 #define REGISTER_CONSTRAINED 16
1054 
1055 // Compute cost/area ratio, in case we spill.  Build the lo-degree list.
1056 void PhaseChaitin::cache_lrg_info( ) {

1057 
1058   for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
1059     LRG &lrg = lrgs(i);
1060 
1061     // Check for being of low degree: means we can be trivially colored.
1062     // Low degree, dead or must-spill guys just get to simplify right away
1063     if( lrg.lo_degree() ||
1064        !lrg.alive() ||
1065         lrg._must_spill ) {
1066       // Split low degree list into those guys that must get a
1067       // register and those that can go to register or stack.
1068       // The idea is LRGs that can go register or stack color first when
1069       // they have a good chance of getting a register.  The register-only
1070       // lo-degree live ranges always get a register.
1071       OptoReg::Name hi_reg = lrg.mask().find_last_elem();
1072       if( OptoReg::is_stack(hi_reg)) { // Can go to stack?
1073         lrg._next = _lo_stk_degree;
1074         _lo_stk_degree = i;
1075       } else {
1076         lrg._next = _lo_degree;


1120     uint neighbor;
1121     while ((neighbor = elements.next()) != 0) {
1122       LRG *n = &lrgs(neighbor);
1123       assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
1124 
1125       // Check for just becoming of-low-degree
1126       if( n->just_lo_degree() && !n->_has_copy ) {
1127         assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
1128         // Put on lo-degree list
1129         n->_next = lo_no_copy;
1130         lo_no_copy = neighbor;
1131       }
1132     }
1133   } // End of while lo-degree no_copy worklist not empty
1134 
1135   // No more lo-degree no-copy live ranges to simplify
1136 }
1137 
1138 // Simplify the IFG by removing LRGs of low degree.
1139 void PhaseChaitin::Simplify( ) {

1140 
1141   while( 1 ) {                  // Repeat till simplified it all
1142     // May want to explore simplifying lo_degree before _lo_stk_degree.
1143     // This might result in more spills coloring into registers during
1144     // Select().
1145     while( _lo_degree || _lo_stk_degree ) {
1146       // If possible, pull from lo_stk first
1147       uint lo;
1148       if( _lo_degree ) {
1149         lo = _lo_degree;
1150         _lo_degree = lrgs(lo)._next;
1151       } else {
1152         lo = _lo_stk_degree;
1153         _lo_stk_degree = lrgs(lo)._next;
1154       }
1155 
1156       // Put the simplified guy on the simplified list.
1157       lrgs(lo)._next = _simplified;
1158       _simplified = lo;
1159       // If this guy is "at risk" then mark his current neighbors


1367       !lrg._fat_proj )          // Aligned+adjacent pairs ok
1368     // Use a heuristic to "bias" the color choice
1369     return bias_color(lrg, chunk);
1370 
1371   assert(!lrg._is_vector, "should be not vector here" );
1372   assert( lrg.num_regs() >= 2, "dead live ranges do not color" );
1373 
1374   // Fat-proj case or misaligned double argument.
1375   assert(lrg.compute_mask_size() == lrg.num_regs() ||
1376          lrg.num_regs() == 2,"fat projs exactly color" );
1377   assert( !chunk, "always color in 1st chunk" );
1378   // Return the highest element in the set.
1379   return lrg.mask().find_last_elem();
1380 }
1381 
1382 // Select colors by re-inserting LRGs back into the IFG.  LRGs are re-inserted
1383 // in reverse order of removal.  As long as nothing of hi-degree was yanked,
1384 // everything going back is guaranteed a color.  Select that color.  If some
1385 // hi-degree LRG cannot get a color then we record that we must spill.
1386 uint PhaseChaitin::Select( ) {


1387   uint spill_reg = LRG::SPILL_REG;
1388   _max_reg = OptoReg::Name(0);  // Past max register used
1389   while( _simplified ) {
1390     // Pull next LRG from the simplified list - in reverse order of removal
1391     uint lidx = _simplified;
1392     LRG *lrg = &lrgs(lidx);
1393     _simplified = lrg->_next;
1394 
1395 
1396 #ifndef PRODUCT
1397     if (trace_spilling()) {
1398       ttyLocker ttyl;
1399       tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(),
1400                     lrg->degrees_of_freedom());
1401       lrg->dump();
1402     }
1403 #endif
1404 
1405     // Re-insert into the IFG
1406     _ifg->re_insert(lidx);


1560     lrgs(_lrg_map.find(dst))._was_spilled1 = 1;
1561     if( _spilled_twice.test(src->_idx) ) {
1562       _spilled_twice.set(dst->_idx);
1563       lrgs(_lrg_map.find(dst))._was_spilled2 = 1;
1564     }
1565   }
1566 }
1567 
1568 // Set the 'spilled_once' or 'spilled_twice' flag on a node.
1569 void PhaseChaitin::set_was_spilled( Node *n ) {
1570   if( _spilled_once.test_set(n->_idx) )
1571     _spilled_twice.set(n->_idx);
1572 }
1573 
1574 // Convert Ideal spill instructions into proper FramePtr + offset Loads and
1575 // Stores.  Use-def chains are NOT preserved, but Node->LRG->reg maps are.
1576 void PhaseChaitin::fixup_spills() {
1577   // This function does only cisc spill work.
1578   if( !UseCISCSpill ) return;
1579 
1580   NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )
1581 
1582   // Grab the Frame Pointer
1583   Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr);
1584 
1585   // For all blocks
1586   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
1587     Block* block = _cfg.get_block(i);
1588 
1589     // For all instructions in block
1590     uint last_inst = block->end_idx();
1591     for (uint j = 1; j <= last_inst; j++) {
1592       Node* n = block->get_node(j);
1593 
1594       // Dead instruction???
1595       assert( n->outcnt() != 0 ||// Nothing dead after post alloc
1596               C->top() == n ||  // Or the random TOP node
1597               n->is_Proj(),     // Or a fat-proj kill node
1598               "No dead instructions after post-alloc" );
1599 
1600       int inp = n->cisc_operand();




 192 }
 193 
 194 PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
 195   : PhaseRegAlloc(unique, cfg, matcher,
 196 #ifndef PRODUCT
 197        print_chaitin_statistics
 198 #else
 199        NULL
 200 #endif
 201        )
 202   , _lrg_map(Thread::current()->resource_area(), unique)
 203   , _live(0)
 204   , _spilled_once(Thread::current()->resource_area())
 205   , _spilled_twice(Thread::current()->resource_area())
 206   , _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0)
 207   , _oldphi(unique)
 208 #ifndef PRODUCT
 209   , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
 210 #endif
 211 {
 212   Compile::TracePhase t3("ctorChaitin", &timers[_t_ctorChaitin]);
 213 
 214   _high_frequency_lrg = MIN2(double(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
 215 
 216   // Build a list of basic blocks, sorted by frequency
 217   _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
 218   // Experiment with sorting strategies to speed compilation
 219   double  cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
 220   Block **buckets[NUMBUCKS];             // Array of buckets
 221   uint    buckcnt[NUMBUCKS];             // Array of bucket counters
 222   double  buckval[NUMBUCKS];             // Array of bucket value cutoffs
 223   for (uint i = 0; i < NUMBUCKS; i++) {
 224     buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
 225     buckcnt[i] = 0;
 226     // Bump by three orders of magnitude each time
 227     cutoff *= 0.001;
 228     buckval[i] = cutoff;
 229     for (uint j = 0; j < _cfg.number_of_blocks(); j++) {
 230       buckets[i][j] = NULL;
 231     }
 232   }


 278   uint cnt = orig->outcnt();
 279   for (uint i = 0; i < cnt; i++) {
 280     Node* proj = orig->raw_out(i);
 281     if (proj->is_MachProj()) {
 282       assert(proj->outcnt() == 0, "only kill projections are expected here");
 283       assert(_cfg.get_block_for_node(proj) == borig, "incorrect block for kill projections");
 284       found_projs++;
 285       // Copy kill projections after the cloned node
 286       Node* kills = proj->clone();
 287       kills->set_req(0, copy);
 288       b->insert_node(kills, idx++);
 289       _cfg.map_node_to_block(kills, b);
 290       new_lrg(kills, max_lrg_id++);
 291     }
 292   }
 293   return found_projs;
 294 }
 295 
 296 // Renumber the live ranges to compact them.  Makes the IFG smaller.
 297 void PhaseChaitin::compact() {
 298   Compile::TracePhase t3("chaitinCompact", &timers[_t_chaitinCompact]);
 299 
 300   // Current the _uf_map contains a series of short chains which are headed
 301   // by a self-cycle.  All the chains run from big numbers to little numbers.
 302   // The Find() call chases the chains & shortens them for the next Find call.
 303   // We are going to change this structure slightly.  Numbers above a moving
 304   // wave 'i' are unchanged.  Numbers below 'j' point directly to their
 305   // compacted live range with no further chaining.  There are no chains or
 306   // cycles below 'i', so the Find call no longer works.
 307   uint j=1;
 308   uint i;
 309   for (i = 1; i < _lrg_map.max_lrg_id(); i++) {
 310     uint lr = _lrg_map.uf_live_range_id(i);
 311     // Ignore unallocated live ranges
 312     if (!lr) {
 313       continue;
 314     }
 315     assert(lr <= i, "");
 316     _lrg_map.uf_map(i, ( lr == i ) ? j++ : _lrg_map.uf_live_range_id(lr));
 317   }
 318   // Now change the Node->LR mapping to reflect the compacted names
 319   uint unique = _lrg_map.size();


 354 
 355   // Need IFG for coalescing and coloring
 356   PhaseIFG ifg(&live_arena);
 357   _ifg = &ifg;
 358 
 359   // Come out of SSA world to the Named world.  Assign (virtual) registers to
 360   // Nodes.  Use the same register for all inputs and the output of PhiNodes
 361   // - effectively ending SSA form.  This requires either coalescing live
 362   // ranges or inserting copies.  For the moment, we insert "virtual copies"
 363   // - we pretend there is a copy prior to each Phi in predecessor blocks.
 364   // We will attempt to coalesce such "virtual copies" before we manifest
 365   // them for real.
 366   de_ssa();
 367 
 368 #ifdef ASSERT
 369   // Veify the graph before RA.
 370   verify(&live_arena);
 371 #endif
 372 
 373   {
 374     Compile::TracePhase t3("computeLive", &timers[_t_computeLive]);
 375     _live = NULL;                 // Mark live as being not available
 376     rm.reset_to_mark();           // Reclaim working storage
 377     IndexSet::reset_memory(C, &live_arena);
 378     ifg.init(_lrg_map.max_lrg_id()); // Empty IFG
 379     gather_lrg_masks( false );    // Collect LRG masks
 380     live.compute(_lrg_map.max_lrg_id()); // Compute liveness
 381     _live = &live;                // Mark LIVE as being available
 382   }
 383 
 384   // Base pointers are currently "used" by instructions which define new
 385   // derived pointers.  This makes base pointers live up to the where the
 386   // derived pointer is made, but not beyond.  Really, they need to be live
 387   // across any GC point where the derived value is live.  So this code looks
 388   // at all the GC points, and "stretches" the live range of any base pointer
 389   // to the GC point.
 390   if (stretch_base_pointer_live_ranges(&live_arena)) {
 391     Compile::TracePhase t3("computeLive (sbplr)", &timers[_t_computeLive]);
 392     // Since some live range stretched, I need to recompute live
 393     _live = NULL;
 394     rm.reset_to_mark();         // Reclaim working storage
 395     IndexSet::reset_memory(C, &live_arena);
 396     ifg.init(_lrg_map.max_lrg_id());
 397     gather_lrg_masks(false);
 398     live.compute(_lrg_map.max_lrg_id());
 399     _live = &live;
 400   }
 401   // Create the interference graph using virtual copies
 402   build_ifg_virtual();  // Include stack slots this time
 403 
 404   // Aggressive (but pessimistic) copy coalescing.
 405   // This pass works on virtual copies.  Any virtual copies which are not
 406   // coalesced get manifested as actual copies
 407   {
 408     Compile::TracePhase t3("chaitinCoalesce", &timers[_t_chaitinCoalesce]);
 409     
 410     // The IFG is/was triangular.  I am 'squaring it up' so Union can run
 411     // faster.  Union requires a 'for all' operation which is slow on the
 412     // triangular adjacency matrix (quick reminder: the IFG is 'sparse' -
 413     // meaning I can visit all the Nodes neighbors less than a Node in time
 414     // O(# of neighbors), but I have to visit all the Nodes greater than a
 415     // given Node and search them for an instance, i.e., time O(#MaxLRG)).
 416     _ifg->SquareUp();
 417 
 418     PhaseAggressiveCoalesce coalesce(*this);
 419     coalesce.coalesce_driver();
 420     // Insert un-coalesced copies.  Visit all Phis.  Where inputs to a Phi do
 421     // not match the Phi itself, insert a copy.
 422     coalesce.insert_copies(_matcher);
 423     if (C->failing()) {
 424       return;
 425     }
 426   }
 427 
 428   // After aggressive coalesce, attempt a first cut at coloring.
 429   // To color, we need the IFG and for that we need LIVE.
 430   {
 431     Compile::TracePhase t3("computeLive", &timers[_t_computeLive]);
 432     _live = NULL;
 433     rm.reset_to_mark();           // Reclaim working storage
 434     IndexSet::reset_memory(C, &live_arena);
 435     ifg.init(_lrg_map.max_lrg_id());
 436     gather_lrg_masks( true );
 437     live.compute(_lrg_map.max_lrg_id());
 438     _live = &live;
 439   }
 440 
 441   // Build physical interference graph
 442   uint must_spill = 0;
 443   must_spill = build_ifg_physical(&live_arena);
 444   // If we have a guaranteed spill, might as well spill now
 445   if (must_spill) {
 446     if(!_lrg_map.max_lrg_id()) {
 447       return;
 448     }
 449     // Bail out if unique gets too large (ie - unique > MaxNodeLimit)
 450     C->check_node_count(10*must_spill, "out of nodes before split");
 451     if (C->failing()) {
 452       return;
 453     }
 454 
 455     uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena);  // Split spilling LRG everywhere
 456     _lrg_map.set_max_lrg_id(new_max_lrg_id);
 457     // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
 458     // or we failed to split
 459     C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split");
 460     if (C->failing()) {
 461       return;
 462     }
 463 
 464     NOT_PRODUCT(C->verify_graph_edges();)
 465 
 466     compact();                  // Compact LRGs; return new lower max lrg
 467 
 468     {
 469       Compile::TracePhase t3("computeLive", &timers[_t_computeLive]);
 470       _live = NULL;
 471       rm.reset_to_mark();         // Reclaim working storage
 472       IndexSet::reset_memory(C, &live_arena);
 473       ifg.init(_lrg_map.max_lrg_id()); // Build a new interference graph
 474       gather_lrg_masks( true );   // Collect intersect mask
 475       live.compute(_lrg_map.max_lrg_id()); // Compute LIVE
 476       _live = &live;
 477     }
 478     build_ifg_physical(&live_arena);
 479     _ifg->SquareUp();
 480     _ifg->Compute_Effective_Degree();
 481     // Only do conservative coalescing if requested
 482     if (OptoCoalesce) {
 483       Compile::TracePhase t3("chaitinCoalesce", &timers[_t_chaitinCoalesce]);
 484       // Conservative (and pessimistic) copy coalescing of those spills
 485       PhaseConservativeCoalesce coalesce(*this);
 486       // If max live ranges greater than cutoff, don't color the stack.
 487       // This cutoff can be larger than below since it is only done once.
 488       coalesce.coalesce_driver();
 489     }
 490     _lrg_map.compress_uf_map_for_nodes();
 491 
 492 #ifdef ASSERT
 493     verify(&live_arena, true);
 494 #endif
 495   } else {
 496     ifg.SquareUp();
 497     ifg.Compute_Effective_Degree();
 498 #ifdef ASSERT
 499     set_was_low();
 500 #endif
 501   }
 502 
 503   // Prepare for Simplify & Select


 519         C->record_method_not_compilable("failed spill-split-recycle sanity check");
 520         return;
 521       }
 522     }
 523 
 524     if (!_lrg_map.max_lrg_id()) {
 525       return;
 526     }
 527     uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena);  // Split spilling LRG everywhere
 528     _lrg_map.set_max_lrg_id(new_max_lrg_id);
 529     // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
 530     C->check_node_count(2 * NodeLimitFudgeFactor, "out of nodes after split");
 531     if (C->failing()) {
 532       return;
 533     }
 534 
 535     compact(); // Compact LRGs; return new lower max lrg
 536 
 537     // Nuke the live-ness and interference graph and LiveRanGe info
 538     {
 539       Compile::TracePhase t3("computeLive", &timers[_t_computeLive]);
 540       _live = NULL;
 541       rm.reset_to_mark();         // Reclaim working storage
 542       IndexSet::reset_memory(C, &live_arena);
 543       ifg.init(_lrg_map.max_lrg_id());
 544 
 545       // Create LiveRanGe array.
 546       // Intersect register masks for all USEs and DEFs
 547       gather_lrg_masks(true);
 548       live.compute(_lrg_map.max_lrg_id());
 549       _live = &live;
 550     }
 551     must_spill = build_ifg_physical(&live_arena);
 552     _ifg->SquareUp();
 553     _ifg->Compute_Effective_Degree();
 554 
 555     // Only do conservative coalescing if requested
 556     if (OptoCoalesce) {
 557       Compile::TracePhase t3("chaitinCoalesce", &timers[_t_chaitinCoalesce]);
 558       // Conservative (and pessimistic) copy coalescing
 559       PhaseConservativeCoalesce coalesce(*this);
 560       // Check for few live ranges determines how aggressive coalesce is.
 561       coalesce.coalesce_driver();
 562     }
 563     _lrg_map.compress_uf_map_for_nodes();
 564 #ifdef ASSERT
 565     verify(&live_arena, true);
 566 #endif
 567     cache_lrg_info();           // Count degree of LRGs
 568 
 569     // Simplify the InterFerence Graph by removing LRGs of low degree.
 570     // LRGs of low degree are trivially colorable.
 571     Simplify();
 572 
 573     // Select colors by re-inserting LRGs back into the IFG in reverse order.
 574     // Return whether or not something spills.
 575     spills = Select();
 576   }
 577 


1043       int briggs_degree = 0;
1044       IndexSet *s = _ifg->neighbors(i);
1045       IndexSetIterator elements(s);
1046       uint lidx;
1047       while((lidx = elements.next()) != 0) {
1048         if( !lrgs(lidx).lo_degree() )
1049           briggs_degree += MAX2(size,lrgs(lidx).num_regs());
1050       }
1051       if( briggs_degree < lrgs(i).degrees_of_freedom() )
1052         lrgs(i)._was_lo = 1;    // Low degree via the briggs assertion
1053     }
1054     assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease");
1055   }
1056 #endif
1057 }
1058 
1059 #define REGISTER_CONSTRAINED 16
1060 
1061 // Compute cost/area ratio, in case we spill.  Build the lo-degree list.
1062 void PhaseChaitin::cache_lrg_info( ) {
1063   Compile::TracePhase t3("chaitinCacheLRG", &timers[_t_chaitinCacheLRG]);
1064   
1065   for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
1066     LRG &lrg = lrgs(i);
1067 
1068     // Check for being of low degree: means we can be trivially colored.
1069     // Low degree, dead or must-spill guys just get to simplify right away
1070     if( lrg.lo_degree() ||
1071        !lrg.alive() ||
1072         lrg._must_spill ) {
1073       // Split low degree list into those guys that must get a
1074       // register and those that can go to register or stack.
1075       // The idea is LRGs that can go register or stack color first when
1076       // they have a good chance of getting a register.  The register-only
1077       // lo-degree live ranges always get a register.
1078       OptoReg::Name hi_reg = lrg.mask().find_last_elem();
1079       if( OptoReg::is_stack(hi_reg)) { // Can go to stack?
1080         lrg._next = _lo_stk_degree;
1081         _lo_stk_degree = i;
1082       } else {
1083         lrg._next = _lo_degree;


1127     uint neighbor;
1128     while ((neighbor = elements.next()) != 0) {
1129       LRG *n = &lrgs(neighbor);
1130       assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
1131 
1132       // Check for just becoming of-low-degree
1133       if( n->just_lo_degree() && !n->_has_copy ) {
1134         assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
1135         // Put on lo-degree list
1136         n->_next = lo_no_copy;
1137         lo_no_copy = neighbor;
1138       }
1139     }
1140   } // End of while lo-degree no_copy worklist not empty
1141 
1142   // No more lo-degree no-copy live ranges to simplify
1143 }
1144 
1145 // Simplify the IFG by removing LRGs of low degree.
1146 void PhaseChaitin::Simplify( ) {
1147   Compile::TracePhase t3("chaitinSimplify", &timers[_t_chaitinSimplify]);
1148 
1149   while( 1 ) {                  // Repeat till simplified it all
1150     // May want to explore simplifying lo_degree before _lo_stk_degree.
1151     // This might result in more spills coloring into registers during
1152     // Select().
1153     while( _lo_degree || _lo_stk_degree ) {
1154       // If possible, pull from lo_stk first
1155       uint lo;
1156       if( _lo_degree ) {
1157         lo = _lo_degree;
1158         _lo_degree = lrgs(lo)._next;
1159       } else {
1160         lo = _lo_stk_degree;
1161         _lo_stk_degree = lrgs(lo)._next;
1162       }
1163 
1164       // Put the simplified guy on the simplified list.
1165       lrgs(lo)._next = _simplified;
1166       _simplified = lo;
1167       // If this guy is "at risk" then mark his current neighbors


1375       !lrg._fat_proj )          // Aligned+adjacent pairs ok
1376     // Use a heuristic to "bias" the color choice
1377     return bias_color(lrg, chunk);
1378 
1379   assert(!lrg._is_vector, "should be not vector here" );
1380   assert( lrg.num_regs() >= 2, "dead live ranges do not color" );
1381 
1382   // Fat-proj case or misaligned double argument.
1383   assert(lrg.compute_mask_size() == lrg.num_regs() ||
1384          lrg.num_regs() == 2,"fat projs exactly color" );
1385   assert( !chunk, "always color in 1st chunk" );
1386   // Return the highest element in the set.
1387   return lrg.mask().find_last_elem();
1388 }
1389 
1390 // Select colors by re-inserting LRGs back into the IFG.  LRGs are re-inserted
1391 // in reverse order of removal.  As long as nothing of hi-degree was yanked,
1392 // everything going back is guaranteed a color.  Select that color.  If some
1393 // hi-degree LRG cannot get a color then we record that we must spill.
1394 uint PhaseChaitin::Select( ) {
1395   Compile::TracePhase t3("chaitinSelect", &timers[_t_chaitinSelect]);
1396   
1397   uint spill_reg = LRG::SPILL_REG;
1398   _max_reg = OptoReg::Name(0);  // Past max register used
1399   while( _simplified ) {
1400     // Pull next LRG from the simplified list - in reverse order of removal
1401     uint lidx = _simplified;
1402     LRG *lrg = &lrgs(lidx);
1403     _simplified = lrg->_next;
1404 
1405 
1406 #ifndef PRODUCT
1407     if (trace_spilling()) {
1408       ttyLocker ttyl;
1409       tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(),
1410                     lrg->degrees_of_freedom());
1411       lrg->dump();
1412     }
1413 #endif
1414 
1415     // Re-insert into the IFG
1416     _ifg->re_insert(lidx);


1570     lrgs(_lrg_map.find(dst))._was_spilled1 = 1;
1571     if( _spilled_twice.test(src->_idx) ) {
1572       _spilled_twice.set(dst->_idx);
1573       lrgs(_lrg_map.find(dst))._was_spilled2 = 1;
1574     }
1575   }
1576 }
1577 
1578 // Set the 'spilled_once' or 'spilled_twice' flag on a node.
1579 void PhaseChaitin::set_was_spilled( Node *n ) {
1580   if( _spilled_once.test_set(n->_idx) )
1581     _spilled_twice.set(n->_idx);
1582 }
1583 
1584 // Convert Ideal spill instructions into proper FramePtr + offset Loads and
1585 // Stores.  Use-def chains are NOT preserved, but Node->LRG->reg maps are.
1586 void PhaseChaitin::fixup_spills() {
1587   // This function does only cisc spill work.
1588   if( !UseCISCSpill ) return;
1589 
1590   Compile::TracePhase t3("fixupSpills", &timers[_t_fixupSpills]);
1591 
1592   // Grab the Frame Pointer
1593   Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr);
1594 
1595   // For all blocks
1596   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
1597     Block* block = _cfg.get_block(i);
1598 
1599     // For all instructions in block
1600     uint last_inst = block->end_idx();
1601     for (uint j = 1; j <= last_inst; j++) {
1602       Node* n = block->get_node(j);
1603 
1604       // Dead instruction???
1605       assert( n->outcnt() != 0 ||// Nothing dead after post alloc
1606               C->top() == n ||  // Or the random TOP node
1607               n->is_Proj(),     // Or a fat-proj kill node
1608               "No dead instructions after post-alloc" );
1609 
1610       int inp = n->cisc_operand();