src/share/vm/opto/coalesce.cpp

Print this page




 590   while ((neighbor = two.next()) != 0)
 591     if( _phc._ifg->neighbors(neighbor)->remove(lr2) )
 592       lrgs(neighbor).inc_degree( -lrg2.compute_degree(lrgs(neighbor)) );
 593 
 594   // Some neighbors of intermediate copies now interfere with the
 595   // combined live range.
 596   IndexSetIterator three(&_ulr);
 597   while ((neighbor = three.next()) != 0)
 598     if( _phc._ifg->neighbors(neighbor)->insert(lr1) )
 599       lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
 600 }
 601 
 602 static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
 603   // Tag copy bias here
 604   if( !ifg->lrgs(lr1)._copy_bias )
 605     ifg->lrgs(lr1)._copy_bias = lr2;
 606   if( !ifg->lrgs(lr2)._copy_bias )
 607     ifg->lrgs(lr2)._copy_bias = lr1;
 608 }
 609 
















































































 610 // See if I can coalesce a series of multiple copies together.  I need the
 611 // final dest copy and the original src copy.  They can be the same Node.
 612 // Compute the compatible register masks.
 613 bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block *b, uint bindex) {
 614 
 615   if (!dst_copy->is_SpillCopy()) {
 616     return false;
 617   }
 618   if (!src_copy->is_SpillCopy()) {
 619     return false;
 620   }
 621   Node *src_def = src_copy->in(src_copy->is_Copy());
 622   uint lr1 = _phc._lrg_map.find(dst_copy);
 623   uint lr2 = _phc._lrg_map.find(src_def);
 624 
 625   // Same live ranges already?
 626   if (lr1 == lr2) {
 627     return false;
 628   }
 629 


 673 
 674   // Another early bail-out test is when we are double-coalescing and the
 675   // 2 copies are separated by some control flow.
 676   if( dst_copy != src_copy ) {
 677     Block *src_b = _phc._cfg.get_block_for_node(src_copy);
 678     Block *b2 = b;
 679     while( b2 != src_b ) {
 680       if( b2->num_preds() > 2 ){// Found merge-point
 681         _phc._lost_opp_cflow_coalesce++;
 682         // extra record_bias commented out because Chris believes it is not
 683         // productive.  Since we can record only 1 bias, we want to choose one
 684         // that stands a chance of working and this one probably does not.
 685         //record_bias( _phc._lrgs, lr1, lr2 );
 686         return false;           // To hard to find all interferences
 687       }
 688       b2 = _phc._cfg.get_block_for_node(b2->pred(1));
 689     }
 690   }
 691 
 692   // Union the two interference sets together into '_ulr'
 693   uint reg_degree = _ulr.lrg_union( lr1, lr2, rm_size, _phc._ifg, rm );
 694 
 695   if( reg_degree >= rm_size ) {
 696     record_bias( _phc._ifg, lr1, lr2 );
 697     return false;
 698   }
 699 
 700   // Now I need to compute all the interferences between dst_copy and
 701   // src_copy.  I'm not willing visit the entire interference graph, so
 702   // I limit my search to things in dst_copy's block or in a straight
 703   // line of previous blocks.  I give up at merge points or when I get
 704   // more interferences than my degree.  I can stop when I find src_copy.
 705   if( dst_copy != src_copy ) {
 706     reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 );
 707     if( reg_degree == max_juint ) {
 708       record_bias( _phc._ifg, lr1, lr2 );
 709       return false;
 710     }
 711   } // End of if dst_copy & src_copy are different
 712 
 713 


 719   IndexSet *n_lr1 = _phc._ifg->neighbors(lr1);
 720   IndexSet *n_lr2 = _phc._ifg->neighbors(lr2);
 721 
 722   // Update the interference graph
 723   update_ifg(lr1, lr2, n_lr1, n_lr2);
 724 
 725   _ulr.remove(lr1);
 726 
 727   // Uncomment the following code to trace Coalescing in great detail.
 728   //
 729   //if (false) {
 730   //  tty->cr();
 731   //  tty->print_cr("#######################################");
 732   //  tty->print_cr("union %d and %d", lr1, lr2);
 733   //  n_lr1->dump();
 734   //  n_lr2->dump();
 735   //  tty->print_cr("resulting set is");
 736   //  _ulr.dump();
 737   //}
 738 
 739   // Replace n_lr1 with the new combined live range.  _ulr will use
 740   // n_lr1's old memory on the next iteration.  n_lr2 is cleared to
 741   // send its internal memory to the free list.
 742   _ulr.swap(n_lr1);
 743   _ulr.clear();
 744   n_lr2->clear();

 745 
 746   lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) );
 747   lrgs(lr2).set_degree( 0 );
 748 
 749   // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
 750   // union-find tree
 751   union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex );
 752   // Combine register restrictions
 753   lrgs(lr1).set_mask(rm);
 754   lrgs(lr1).compute_set_mask_size();
 755   lrgs(lr1)._cost += lrgs(lr2)._cost;
 756   lrgs(lr1)._area += lrgs(lr2)._area;
 757 
 758   // While its uncommon to successfully coalesce live ranges that started out
 759   // being not-lo-degree, it can happen.  In any case the combined coalesced
 760   // live range better Simplify nicely.
 761   lrgs(lr1)._was_lo = 1;
 762 
 763   // kinda expensive to do all the time
 764   //tty->print_cr("warning: slow verify happening");




 590   while ((neighbor = two.next()) != 0)
 591     if( _phc._ifg->neighbors(neighbor)->remove(lr2) )
 592       lrgs(neighbor).inc_degree( -lrg2.compute_degree(lrgs(neighbor)) );
 593 
 594   // Some neighbors of intermediate copies now interfere with the
 595   // combined live range.
 596   IndexSetIterator three(&_ulr);
 597   while ((neighbor = three.next()) != 0)
 598     if( _phc._ifg->neighbors(neighbor)->insert(lr1) )
 599       lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
 600 }
 601 
 602 static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
 603   // Tag copy bias here
 604   if( !ifg->lrgs(lr1)._copy_bias )
 605     ifg->lrgs(lr1)._copy_bias = lr2;
 606   if( !ifg->lrgs(lr2)._copy_bias )
 607     ifg->lrgs(lr2)._copy_bias = lr1;
 608 }
 609 
 610 //------------------------------lrg_union--------------------------------------
 611 // Compute the union of all elements of one and two which interfere with
 612 // the RegMask mask.  If the degree of the union becomes exceeds
 613 // fail_degree, the union bails out.  The destination set is cleared before
 614 // the union is performed.
 615 
 616 uint PhaseConservativeCoalesce::lrg_union(IndexSet& dst, uint lr1, uint lr2,
 617                          const uint fail_degree,
 618                          const PhaseIFG *ifg,
 619                          const RegMask &mask ) {
 620   IndexSet *one = ifg->neighbors(lr1);
 621   IndexSet *two = ifg->neighbors(lr2);
 622   LRG &lrg1 = ifg->lrgs(lr1);
 623   LRG &lrg2 = ifg->lrgs(lr2);
 624 #ifdef ASSERT
 625 //  assert(_max_elements == one->_max_elements, "max element mismatch");
 626 //  check_watch("union destination");
 627 //  one->check_watch("union source");
 628 //  two->check_watch("union source");
 629 #endif
 630 
 631   // Compute the degree of the combined live-range.  The combined
 632   // live-range has the union of the original live-ranges' neighbors set as
 633   // well as the neighbors of all intermediate copies, minus those neighbors
 634   // that can not use the intersected allowed-register-set.
 635 
 636   // Copy the larger set.  Insert the smaller set into the larger.
 637   if (two->count() > one->count()) {
 638     IndexSet *temp = one;
 639     one = two;
 640     two = temp;
 641   }
 642 
 643   dst.clear();
 644 
 645   // Used to compute degree of register-only interferences.  Infinite-stack
 646   // neighbors do not alter colorability, as they can always color to some
 647   // other color.  (A variant of the Briggs assertion)
 648   uint reg_degree = 0;
 649 
 650   uint element;
 651   // Load up the combined interference set with the neighbors of one
 652   IndexSetIterator elements(one);
 653   while ((element = elements.next()) != 0) {
 654     LRG &lrg = ifg->lrgs(element);
 655     if (mask.overlap(lrg.mask())) {
 656       dst.insert(element);
 657       if( !lrg.mask().is_AllStack() ) {
 658         reg_degree += lrg1.compute_degree(lrg);
 659         if( reg_degree >= fail_degree ) return reg_degree;
 660       } else {
 661         // !!!!! Danger!  No update to reg_degree despite having a neighbor.
 662         // A variant of the Briggs assertion.
 663         // Not needed if I simplify during coalesce, ala George/Appel.
 664         assert( lrg.lo_degree(), "" );
 665       }
 666     }
 667   }
 668   // Add neighbors of two as well
 669   IndexSetIterator elements2(two);
 670   while ((element = elements2.next()) != 0) {
 671     LRG &lrg = ifg->lrgs(element);
 672     if (mask.overlap(lrg.mask())) {
 673       if (dst.insert(element)) {
 674         if( !lrg.mask().is_AllStack() ) {
 675           reg_degree += lrg2.compute_degree(lrg);
 676           if( reg_degree >= fail_degree ) return reg_degree;
 677         } else {
 678           // !!!!! Danger!  No update to reg_degree despite having a neighbor.
 679           // A variant of the Briggs assertion.
 680           // Not needed if I simplify during coalesce, ala George/Appel.
 681           assert( lrg.lo_degree(), "" );
 682         }
 683       }
 684     }
 685   }
 686 
 687   return reg_degree;
 688 }
 689 
 690 // See if I can coalesce a series of multiple copies together.  I need the
 691 // final dest copy and the original src copy.  They can be the same Node.
 692 // Compute the compatible register masks.
 693 bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block *b, uint bindex) {
 694 
 695   if (!dst_copy->is_SpillCopy()) {
 696     return false;
 697   }
 698   if (!src_copy->is_SpillCopy()) {
 699     return false;
 700   }
 701   Node *src_def = src_copy->in(src_copy->is_Copy());
 702   uint lr1 = _phc._lrg_map.find(dst_copy);
 703   uint lr2 = _phc._lrg_map.find(src_def);
 704 
 705   // Same live ranges already?
 706   if (lr1 == lr2) {
 707     return false;
 708   }
 709 


 753 
 754   // Another early bail-out test is when we are double-coalescing and the
 755   // 2 copies are separated by some control flow.
 756   if( dst_copy != src_copy ) {
 757     Block *src_b = _phc._cfg.get_block_for_node(src_copy);
 758     Block *b2 = b;
 759     while( b2 != src_b ) {
 760       if( b2->num_preds() > 2 ){// Found merge-point
 761         _phc._lost_opp_cflow_coalesce++;
 762         // extra record_bias commented out because Chris believes it is not
 763         // productive.  Since we can record only 1 bias, we want to choose one
 764         // that stands a chance of working and this one probably does not.
 765         //record_bias( _phc._lrgs, lr1, lr2 );
 766         return false;           // To hard to find all interferences
 767       }
 768       b2 = _phc._cfg.get_block_for_node(b2->pred(1));
 769     }
 770   }
 771 
 772   // Union the two interference sets together into '_ulr'
 773   uint reg_degree = lrg_union( _ulr, lr1, lr2, rm_size, _phc._ifg, rm );
 774 
 775   if( reg_degree >= rm_size ) {
 776     record_bias( _phc._ifg, lr1, lr2 );
 777     return false;
 778   }
 779 
 780   // Now I need to compute all the interferences between dst_copy and
 781   // src_copy.  I'm not willing visit the entire interference graph, so
 782   // I limit my search to things in dst_copy's block or in a straight
 783   // line of previous blocks.  I give up at merge points or when I get
 784   // more interferences than my degree.  I can stop when I find src_copy.
 785   if( dst_copy != src_copy ) {
 786     reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 );
 787     if( reg_degree == max_juint ) {
 788       record_bias( _phc._ifg, lr1, lr2 );
 789       return false;
 790     }
 791   } // End of if dst_copy & src_copy are different
 792 
 793 


 799   IndexSet *n_lr1 = _phc._ifg->neighbors(lr1);
 800   IndexSet *n_lr2 = _phc._ifg->neighbors(lr2);
 801 
 802   // Update the interference graph
 803   update_ifg(lr1, lr2, n_lr1, n_lr2);
 804 
 805   _ulr.remove(lr1);
 806 
 807   // Uncomment the following code to trace Coalescing in great detail.
 808   //
 809   //if (false) {
 810   //  tty->cr();
 811   //  tty->print_cr("#######################################");
 812   //  tty->print_cr("union %d and %d", lr1, lr2);
 813   //  n_lr1->dump();
 814   //  n_lr2->dump();
 815   //  tty->print_cr("resulting set is");
 816   //  _ulr.dump();
 817   //}
 818 
 819   // Replace n_lr1 with the new combined live range.
 820   // _ulr should be clean on the next iteration.
 821   n_lr1->set_from(&_ulr);


 822   n_lr2->clear();
 823   _ulr.clear();
 824 
 825   lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) );
 826   lrgs(lr2).set_degree( 0 );
 827 
 828   // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
 829   // union-find tree
 830   union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex );
 831   // Combine register restrictions
 832   lrgs(lr1).set_mask(rm);
 833   lrgs(lr1).compute_set_mask_size();
 834   lrgs(lr1)._cost += lrgs(lr2)._cost;
 835   lrgs(lr1)._area += lrgs(lr2)._area;
 836 
 837   // While its uncommon to successfully coalesce live ranges that started out
 838   // being not-lo-degree, it can happen.  In any case the combined coalesced
 839   // live range better Simplify nicely.
 840   lrgs(lr1)._was_lo = 1;
 841 
 842   // kinda expensive to do all the time
 843   //tty->print_cr("warning: slow verify happening");