hotspot Cdiff src/share/vm/opto/loopTransform.cpp

src/share/vm/opto/loopTransform.cpp


*** 981,991 ****
  
  //------------------------------insert_pre_post_loops--------------------------
  // Insert pre and post loops.  If peel_only is set, the pre-loop can not have
  // more iterations added.  It acts as a 'peel' only, no lower-bound RCE, no
  // alignment.  Useful to unroll loops that do no array accesses.
! void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
  
  #ifndef PRODUCT
    if (TraceLoopOpts) {
      if (peel_only)
        tty->print("PeelMainPost ");
--- 981,991 ----
  
  //------------------------------insert_pre_post_loops--------------------------
  // Insert pre and post loops.  If peel_only is set, the pre-loop can not have
  // more iterations added.  It acts as a 'peel' only, no lower-bound RCE, no
  // alignment.  Useful to unroll loops that do no array accesses.
! void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_new, bool peel_only) {
  
  #ifndef PRODUCT
    if (TraceLoopOpts) {
      if (peel_only)
        tty->print("PeelMainPost ");
*** 996,1009 ****
  #endif
    C->set_major_progress();
  
    // Find common pieces of the loop being guarded with pre & post loops
    CountedLoopNode *main_head = loop->_head->as_CountedLoop();
!   assert( main_head->is_normal_loop(), "" );
    CountedLoopEndNode *main_end = main_head->loopexit();
    guarantee(main_end != NULL, "no loop exit node");
!   assert( main_end->outcnt() == 2, "1 true, 1 false path only" );
    uint dd_main_head = dom_depth(main_head);
    uint max = main_head->outcnt();
  
    Node *pre_header= main_head->in(LoopNode::EntryControl);
    Node *init      = main_head->init_trip();
--- 996,1009 ----
  #endif
    C->set_major_progress();
  
    // Find common pieces of the loop being guarded with pre & post loops
    CountedLoopNode *main_head = loop->_head->as_CountedLoop();
!   assert(main_head->is_normal_loop(), "");
    CountedLoopEndNode *main_end = main_head->loopexit();
    guarantee(main_end != NULL, "no loop exit node");
!   assert(main_end->outcnt() == 2, "1 true, 1 false path only");
    uint dd_main_head = dom_depth(main_head);
    uint max = main_head->outcnt();
  
    Node *pre_header= main_head->in(LoopNode::EntryControl);
    Node *init      = main_head->init_trip();
*** 1013,1179 ****
    Node *cmp       = main_end ->cmp_node();
    BoolTest::mask b_test = main_end->test_trip();
  
    // Need only 1 user of 'bol' because I will be hacking the loop bounds.
    Node *bol = main_end->in(CountedLoopEndNode::TestValue);
!   if( bol->outcnt() != 1 ) {
      bol = bol->clone();
      register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
      _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
    }
    // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
!   if( cmp->outcnt() != 1 ) {
      cmp = cmp->clone();
      register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
      _igvn.replace_input_of(bol, 1, cmp);
    }
  
!   //------------------------------
!   // Step A: Create Post-Loop.
!   Node* main_exit = main_end->proj_out(false);
!   assert( main_exit->Opcode() == Op_IfFalse, "" );
!   int dd_main_exit = dom_depth(main_exit);
! 
!   // Step A1: Clone the loop body.  The clone becomes the post-loop.  The main
!   // loop pre-header illegally has 2 control users (old & new loops).
!   clone_loop( loop, old_new, dd_main_exit );
!   assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" );
!   CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
!   post_head->set_post_loop(main_head);
! 
!   // Reduce the post-loop trip count.
!   CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
!   post_end->_prob = PROB_FAIR;
! 
!   // Build the main-loop normal exit.
!   IfFalseNode *new_main_exit = new IfFalseNode(main_end);
!   _igvn.register_new_node_with_optimizer( new_main_exit );
!   set_idom(new_main_exit, main_end, dd_main_exit );
!   set_loop(new_main_exit, loop->_parent);
! 
!   // Step A2: Build a zero-trip guard for the post-loop.  After leaving the
!   // main-loop, the post-loop may not execute at all.  We 'opaque' the incr
!   // (the main-loop trip-counter exit value) because we will be changing
!   // the exit value (via unrolling) so we cannot constant-fold away the zero
!   // trip guard until all unrolling is done.
!   Node *zer_opaq = new Opaque1Node(C, incr);
!   Node *zer_cmp  = new CmpINode( zer_opaq, limit );
!   Node *zer_bol  = new BoolNode( zer_cmp, b_test );
!   register_new_node( zer_opaq, new_main_exit );
!   register_new_node( zer_cmp , new_main_exit );
!   register_new_node( zer_bol , new_main_exit );
! 
!   // Build the IfNode
!   IfNode *zer_iff = new IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN );
!   _igvn.register_new_node_with_optimizer( zer_iff );
!   set_idom(zer_iff, new_main_exit, dd_main_exit);
!   set_loop(zer_iff, loop->_parent);
! 
!   // Plug in the false-path, taken if we need to skip post-loop
!   _igvn.replace_input_of(main_exit, 0, zer_iff);
!   set_idom(main_exit, zer_iff, dd_main_exit);
!   set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
!   // Make the true-path, must enter the post loop
!   Node *zer_taken = new IfTrueNode( zer_iff );
!   _igvn.register_new_node_with_optimizer( zer_taken );
!   set_idom(zer_taken, zer_iff, dd_main_exit);
!   set_loop(zer_taken, loop->_parent);
!   // Plug in the true path
!   _igvn.hash_delete( post_head );
!   post_head->set_req(LoopNode::EntryControl, zer_taken);
!   set_idom(post_head, zer_taken, dd_main_exit);
! 
!   Arena *a = Thread::current()->resource_area();
!   VectorSet visited(a);
!   Node_Stack clones(a, main_head->back_control()->outcnt());
!   // Step A3: Make the fall-in values to the post-loop come from the
!   // fall-out values of the main-loop.
!   for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
!     Node* main_phi = main_head->fast_out(i);
!     if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) {
!       Node *post_phi = old_new[main_phi->_idx];
!       Node *fallmain  = clone_up_backedge_goo(main_head->back_control(),
!                                               post_head->init_control(),
!                                               main_phi->in(LoopNode::LoopBackControl),
!                                               visited, clones);
!       _igvn.hash_delete(post_phi);
!       post_phi->set_req( LoopNode::EntryControl, fallmain );
!     }
!   }
  
    // Update local caches for next stanza
!   main_exit = new_main_exit;
  
  
    //------------------------------
    // Step B: Create Pre-Loop.
  
    // Step B1: Clone the loop body.  The clone becomes the pre-loop.  The main
    // loop pre-header illegally has 2 control users (old & new loops).
!   clone_loop( loop, old_new, dd_main_head );
    CountedLoopNode*    pre_head = old_new[main_head->_idx]->as_CountedLoop();
    CountedLoopEndNode* pre_end  = old_new[main_end ->_idx]->as_CountedLoopEnd();
    pre_head->set_pre_loop(main_head);
    Node *pre_incr = old_new[incr->_idx];
  
    // Reduce the pre-loop trip count.
    pre_end->_prob = PROB_FAIR;
  
    // Find the pre-loop normal exit.
    Node* pre_exit = pre_end->proj_out(false);
!   assert( pre_exit->Opcode() == Op_IfFalse, "" );
    IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
!   _igvn.register_new_node_with_optimizer( new_pre_exit );
    set_idom(new_pre_exit, pre_end, dd_main_head);
    set_loop(new_pre_exit, loop->_parent);
  
    // Step B2: Build a zero-trip guard for the main-loop.  After leaving the
    // pre-loop, the main-loop may not execute at all.  Later in life this
    // zero-trip guard will become the minimum-trip guard when we unroll
    // the main-loop.
    Node *min_opaq = new Opaque1Node(C, limit);
!   Node *min_cmp  = new CmpINode( pre_incr, min_opaq );
!   Node *min_bol  = new BoolNode( min_cmp, b_test );
!   register_new_node( min_opaq, new_pre_exit );
!   register_new_node( min_cmp , new_pre_exit );
!   register_new_node( min_bol , new_pre_exit );
  
    // Build the IfNode (assume the main-loop is executed always).
!   IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
!   _igvn.register_new_node_with_optimizer( min_iff );
    set_idom(min_iff, new_pre_exit, dd_main_head);
    set_loop(min_iff, loop->_parent);
  
    // Plug in the false-path, taken if we need to skip main-loop
!   _igvn.hash_delete( pre_exit );
    pre_exit->set_req(0, min_iff);
    set_idom(pre_exit, min_iff, dd_main_head);
    set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
    // Make the true-path, must enter the main loop
!   Node *min_taken = new IfTrueNode( min_iff );
!   _igvn.register_new_node_with_optimizer( min_taken );
    set_idom(min_taken, min_iff, dd_main_head);
    set_loop(min_taken, loop->_parent);
    // Plug in the true path
!   _igvn.hash_delete( main_head );
    main_head->set_req(LoopNode::EntryControl, min_taken);
    set_idom(main_head, min_taken, dd_main_head);
  
!   visited.Clear();
!   clones.clear();
    // Step B3: Make the fall-in values to the main-loop come from the
    // fall-out values of the pre-loop.
    for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
      Node* main_phi = main_head->fast_out(i2);
!     if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
        Node *pre_phi = old_new[main_phi->_idx];
        Node *fallpre  = clone_up_backedge_goo(pre_head->back_control(),
                                               main_head->init_control(),
                                               pre_phi->in(LoopNode::LoopBackControl),
                                               visited, clones);
        _igvn.hash_delete(main_phi);
!       main_phi->set_req( LoopNode::EntryControl, fallpre );
      }
    }
  
    // Nodes inside the loop may be control dependent on a predicate
    // that was moved before the preloop. If the back branch of the main
--- 1013,1111 ----
    Node *cmp       = main_end ->cmp_node();
    BoolTest::mask b_test = main_end->test_trip();
  
    // Need only 1 user of 'bol' because I will be hacking the loop bounds.
    Node *bol = main_end->in(CountedLoopEndNode::TestValue);
!   if (bol->outcnt() != 1) {
      bol = bol->clone();
      register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
      _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
    }
    // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
!   if (cmp->outcnt() != 1) {
      cmp = cmp->clone();
      register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
      _igvn.replace_input_of(bol, 1, cmp);
    }
  
!   // Add the post loop
!   PostLoopInfo post_loop_info;
!   insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
  
    // Update local caches for next stanza
!   Node *main_exit = post_loop_info.new_main_exit;
  
  
    //------------------------------
    // Step B: Create Pre-Loop.
  
    // Step B1: Clone the loop body.  The clone becomes the pre-loop.  The main
    // loop pre-header illegally has 2 control users (old & new loops).
!   clone_loop(loop, old_new, dd_main_head);
    CountedLoopNode*    pre_head = old_new[main_head->_idx]->as_CountedLoop();
    CountedLoopEndNode* pre_end  = old_new[main_end ->_idx]->as_CountedLoopEnd();
    pre_head->set_pre_loop(main_head);
    Node *pre_incr = old_new[incr->_idx];
  
    // Reduce the pre-loop trip count.
    pre_end->_prob = PROB_FAIR;
  
    // Find the pre-loop normal exit.
    Node* pre_exit = pre_end->proj_out(false);
!   assert(pre_exit->Opcode() == Op_IfFalse, "");
    IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
!   _igvn.register_new_node_with_optimizer(new_pre_exit);
    set_idom(new_pre_exit, pre_end, dd_main_head);
    set_loop(new_pre_exit, loop->_parent);
  
    // Step B2: Build a zero-trip guard for the main-loop.  After leaving the
    // pre-loop, the main-loop may not execute at all.  Later in life this
    // zero-trip guard will become the minimum-trip guard when we unroll
    // the main-loop.
    Node *min_opaq = new Opaque1Node(C, limit);
!   Node *min_cmp  = new CmpINode(pre_incr, min_opaq);
!   Node *min_bol  = new BoolNode(min_cmp, b_test);
!   register_new_node(min_opaq, new_pre_exit);
!   register_new_node(min_cmp , new_pre_exit);
!   register_new_node(min_bol , new_pre_exit);
  
    // Build the IfNode (assume the main-loop is executed always).
!   IfNode *min_iff = new IfNode(new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN);
!   _igvn.register_new_node_with_optimizer(min_iff);
    set_idom(min_iff, new_pre_exit, dd_main_head);
    set_loop(min_iff, loop->_parent);
  
    // Plug in the false-path, taken if we need to skip main-loop
!   _igvn.hash_delete(pre_exit);
    pre_exit->set_req(0, min_iff);
    set_idom(pre_exit, min_iff, dd_main_head);
    set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
    // Make the true-path, must enter the main loop
!   Node *min_taken = new IfTrueNode(min_iff);
!   _igvn.register_new_node_with_optimizer(min_taken);
    set_idom(min_taken, min_iff, dd_main_head);
    set_loop(min_taken, loop->_parent);
    // Plug in the true path
!   _igvn.hash_delete(main_head);
    main_head->set_req(LoopNode::EntryControl, min_taken);
    set_idom(main_head, min_taken, dd_main_head);
  
!   Arena *a = Thread::current()->resource_area();
!   VectorSet visited(a);
!   Node_Stack clones(a, main_head->back_control()->outcnt());
    // Step B3: Make the fall-in values to the main-loop come from the
    // fall-out values of the pre-loop.
    for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
      Node* main_phi = main_head->fast_out(i2);
!     if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0) {
        Node *pre_phi = old_new[main_phi->_idx];
        Node *fallpre  = clone_up_backedge_goo(pre_head->back_control(),
                                               main_head->init_control(),
                                               pre_phi->in(LoopNode::LoopBackControl),
                                               visited, clones);
        _igvn.hash_delete(main_phi);
!       main_phi->set_req(LoopNode::EntryControl, fallpre);
      }
    }
  
    // Nodes inside the loop may be control dependent on a predicate
    // that was moved before the preloop. If the back branch of the main
*** 1183,1215 ****
    // test that was guarding the loop nest. We add a special CastII on
    // the if branch that enters the loop, between the input induction
    // variable value and the induction variable Phi to preserve correct
    // dependencies.
  
-   // CastII for the post loop:
-   bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
-   assert(inserted, "no castII inserted");
- 
    // CastII for the main loop:
!   inserted = cast_incr_before_loop(pre_incr, min_taken, main_head);
    assert(inserted, "no castII inserted");
  
    // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
    // RCE and alignment may change this later.
    Node *cmp_end = pre_end->cmp_node();
!   assert( cmp_end->in(2) == limit, "" );
!   Node *pre_limit = new AddINode( init, stride );
  
    // Save the original loop limit in this Opaque1 node for
    // use by range check elimination.
    Node *pre_opaq  = new Opaque1Node(C, pre_limit, limit);
  
!   register_new_node( pre_limit, pre_head->in(0) );
!   register_new_node( pre_opaq , pre_head->in(0) );
  
    // Since no other users of pre-loop compare, I can hack limit directly
!   assert( cmp_end->outcnt() == 1, "no other users" );
    _igvn.hash_delete(cmp_end);
    cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
  
    // Special case for not-equal loop bounds:
    // Change pre loop test, main loop test, and the
--- 1115,1143 ----
    // test that was guarding the loop nest. We add a special CastII on
    // the if branch that enters the loop, between the input induction
    // variable value and the induction variable Phi to preserve correct
    // dependencies.
  
    // CastII for the main loop:
!   bool inserted = cast_incr_before_loop(pre_incr, min_taken, main_head);
    assert(inserted, "no castII inserted");
  
    // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
    // RCE and alignment may change this later.
    Node *cmp_end = pre_end->cmp_node();
!   assert(cmp_end->in(2) == limit, "");
!   Node *pre_limit = new AddINode(init, stride);
  
    // Save the original loop limit in this Opaque1 node for
    // use by range check elimination.
    Node *pre_opaq  = new Opaque1Node(C, pre_limit, limit);
  
!   register_new_node(pre_limit, pre_head->in(0));
!   register_new_node(pre_opaq , pre_head->in(0));
  
    // Since no other users of pre-loop compare, I can hack limit directly
!   assert(cmp_end->outcnt() == 1, "no other users");
    _igvn.hash_delete(cmp_end);
    cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
  
    // Special case for not-equal loop bounds:
    // Change pre loop test, main loop test, and the
*** 1225,1260 ****
  
      BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
      // Modify pre loop end condition
      Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
      BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
!     register_new_node( new_bol0, pre_head->in(0) );
      _igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
      // Modify main loop guard condition
      assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
      BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
!     register_new_node( new_bol1, new_pre_exit );
      _igvn.hash_delete(min_iff);
      min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
      // Modify main loop end condition
      BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
      BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
!     register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
      _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
    }
  
    // Flag main loop
    main_head->set_main_loop();
!   if( peel_only ) main_head->set_main_no_pre_loop();
  
    // Subtract a trip count for the pre-loop.
    main_head->set_trip_count(main_head->trip_count() - 1);
  
    // It's difficult to be precise about the trip-counts
    // for the pre/post loops.  They are usually very short,
    // so guess that 4 trips is a reasonable value.
!   post_head->set_profile_trip_cnt(4.0);
    pre_head->set_profile_trip_cnt(4.0);
  
    // Now force out all loop-invariant dominating tests.  The optimizer
    // finds some, but we _know_ they are all useless.
    peeled_dom_test_elim(loop,old_new);
--- 1153,1188 ----
  
      BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
      // Modify pre loop end condition
      Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
      BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
!     register_new_node(new_bol0, pre_head->in(0));
      _igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
      // Modify main loop guard condition
      assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
      BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
!     register_new_node(new_bol1, new_pre_exit);
      _igvn.hash_delete(min_iff);
      min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
      // Modify main loop end condition
      BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
      BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
!     register_new_node(new_bol2, main_end->in(CountedLoopEndNode::TestControl));
      _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
    }
  
    // Flag main loop
    main_head->set_main_loop();
!   if (peel_only) main_head->set_main_no_pre_loop();
  
    // Subtract a trip count for the pre-loop.
    main_head->set_trip_count(main_head->trip_count() - 1);
  
    // It's difficult to be precise about the trip-counts
    // for the pre/post loops.  They are usually very short,
    // so guess that 4 trips is a reasonable value.
!   post_loop_info.post_head->set_profile_trip_cnt(4.0);
    pre_head->set_profile_trip_cnt(4.0);
  
    // Now force out all loop-invariant dominating tests.  The optimizer
    // finds some, but we _know_ they are all useless.
    peeled_dom_test_elim(loop,old_new);
*** 1296,1318 ****
    CountedLoopNode *main_head = loop->_head->as_CountedLoop();
    CountedLoopEndNode *main_end = main_head->loopexit();
    guarantee(main_end != NULL, "no loop exit node");
    // diagnostic to show loop end is not properly formed
    assert(main_end->outcnt() == 2, "1 true, 1 false path only");
-   uint dd_main_head = dom_depth(main_head);
-   uint max = main_head->outcnt();
  
    // mark this loop as processed
    main_head->mark_has_atomic_post_loop();
  
-   Node *pre_header = main_head->in(LoopNode::EntryControl);
-   Node *init = main_head->init_trip();
    Node *incr = main_end->incr();
    Node *limit = main_end->limit();
!   Node *stride = main_end->stride();
!   Node *cmp = main_end->cmp_node();
!   BoolTest::mask b_test = main_end->test_trip();
  
    //------------------------------
    // Step A: Create a new post-Loop.
    Node* main_exit = main_end->proj_out(false);
    assert(main_exit->Opcode() == Op_IfFalse, "");
--- 1224,1308 ----
    CountedLoopNode *main_head = loop->_head->as_CountedLoop();
    CountedLoopEndNode *main_end = main_head->loopexit();
    guarantee(main_end != NULL, "no loop exit node");
    // diagnostic to show loop end is not properly formed
    assert(main_end->outcnt() == 2, "1 true, 1 false path only");
  
    // mark this loop as processed
    main_head->mark_has_atomic_post_loop();
  
    Node *incr = main_end->incr();
    Node *limit = main_end->limit();
! 
!   // In this case we throw away the result as we are not using it to connect anything else.
!   PostLoopInfo post_loop_info;
!   insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
! 
!   // It's difficult to be precise about the trip-counts
!   // for post loops.  They are usually very short,
!   // so guess that unit vector trips is a reasonable value.
!   post_loop_info.post_head->set_profile_trip_cnt(cur_unroll);
! 
!   // Now force out all loop-invariant dominating tests.  The optimizer
!   // finds some, but we _know_ they are all useless.
!   peeled_dom_test_elim(loop, old_new);
!   loop->record_for_igvn();
! }
! 
! 
! //-------------------------insert_scalar_rced_post_loop------------------------
! // Insert a copy of the rce'd main loop as a post loop,
! // We have not unrolled the main loop, so this is the right time to inject this.
! // Later we will examine the partner of this post loop pair which still has range checks
! // to see inject code which tests at runtime if the range checks are applicable.
! void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List &old_new) {
!   if (!loop->_head->is_CountedLoop()) return;
! 
!   CountedLoopNode *cl = loop->_head->as_CountedLoop();
! 
!   // only process RCE'd main loops
!   if (cl->loop_has_range_checks() || !cl->is_main_loop()) return;
! 
! #ifndef PRODUCT
!   if (TraceLoopOpts) {
!     tty->print("PostScalarRce  ");
!     loop->dump_head();
!   }
! #endif
!   C->set_major_progress();
! 
!   // Find common pieces of the loop being guarded with pre & post loops
!   CountedLoopNode *main_head = loop->_head->as_CountedLoop();
!   CountedLoopEndNode *main_end = main_head->loopexit();
!   guarantee(main_end != NULL, "no loop exit node");
!   // diagnostic to show loop end is not properly formed
!   assert(main_end->outcnt() == 2, "1 true, 1 false path only");
! 
!   Node *incr = main_end->incr();
!   Node *limit = main_end->limit();
! 
!   // In this case we throw away the result as we are not using it to connect anything else.
!   PostLoopInfo post_loop_info;
!   insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_loop_info);
! 
!   // It's difficult to be precise about the trip-counts
!   // for post loops.  They are usually very short,
!   // so guess that unit vector trips is a reasonable value.
!   post_loop_info.post_head->set_profile_trip_cnt(4.0);
! 
!   // Now force out all loop-invariant dominating tests.  The optimizer
!   // finds some, but we _know_ they are all useless.
!   peeled_dom_test_elim(loop, old_new);
!   loop->record_for_igvn();
! }
! 
! 
! //------------------------------insert_post_loop-------------------------------
! // Insert post loops.  Add a post loop to the given loop passed.
! void PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
!                                       CountedLoopNode *main_head, CountedLoopEndNode *main_end,
!                                       Node *incr, Node *limit, PostLoopInfo &post_loop_info) {
  
    //------------------------------
    // Step A: Create a new post-Loop.
    Node* main_exit = main_end->proj_out(false);
    assert(main_exit->Opcode() == Op_IfFalse, "");
*** 1323,1332 ****
--- 1313,1323 ----
    clone_loop(loop, old_new, dd_main_exit);
    assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
    CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
    post_head->set_normal_loop();
    post_head->set_post_loop(main_head);
+   post_loop_info.post_head = post_head;
  
    // Reduce the post-loop trip count.
    CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
    post_end->_prob = PROB_FAIR;
  
*** 1334,1366 ****
    IfFalseNode *new_main_exit = new IfFalseNode(main_end);
    _igvn.register_new_node_with_optimizer(new_main_exit);
    set_idom(new_main_exit, main_end, dd_main_exit);
    set_loop(new_main_exit, loop->_parent);
  
!   // Step A2: Build a zero-trip guard for the vector post-loop.  After leaving the
!   // main-loop, the vector post-loop may not execute at all.  We 'opaque' the incr
!   // (the vectorized main-loop trip-counter exit value) because we will be changing
    // the exit value (via additional unrolling) so we cannot constant-fold away the zero
    // trip guard until all unrolling is done.
    Node *zer_opaq = new Opaque1Node(C, incr);
    Node *zer_cmp = new CmpINode(zer_opaq, limit);
!   Node *zer_bol = new BoolNode(zer_cmp, b_test);
    register_new_node(zer_opaq, new_main_exit);
    register_new_node(zer_cmp, new_main_exit);
    register_new_node(zer_bol, new_main_exit);
  
    // Build the IfNode
    IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
    _igvn.register_new_node_with_optimizer(zer_iff);
    set_idom(zer_iff, new_main_exit, dd_main_exit);
    set_loop(zer_iff, loop->_parent);
  
!   // Plug in the false-path, taken if we need to skip vector post-loop
    _igvn.replace_input_of(main_exit, 0, zer_iff);
    set_idom(main_exit, zer_iff, dd_main_exit);
    set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
!   // Make the true-path, must enter the vector post loop
    Node *zer_taken = new IfTrueNode(zer_iff);
    _igvn.register_new_node_with_optimizer(zer_taken);
    set_idom(zer_taken, zer_iff, dd_main_exit);
    set_loop(zer_taken, loop->_parent);
    // Plug in the true path
--- 1325,1357 ----
    IfFalseNode *new_main_exit = new IfFalseNode(main_end);
    _igvn.register_new_node_with_optimizer(new_main_exit);
    set_idom(new_main_exit, main_end, dd_main_exit);
    set_loop(new_main_exit, loop->_parent);
  
!   // Step A2: Build a zero-trip guard for the post-loop.  After leaving the
!   // main-loop, the post-loop may not execute at all.  We 'opaque' the incr
!   // (the previous loop trip-counter exit value) because we will be changing
    // the exit value (via additional unrolling) so we cannot constant-fold away the zero
    // trip guard until all unrolling is done.
    Node *zer_opaq = new Opaque1Node(C, incr);
    Node *zer_cmp = new CmpINode(zer_opaq, limit);
!   Node *zer_bol = new BoolNode(zer_cmp, main_end->test_trip());
    register_new_node(zer_opaq, new_main_exit);
    register_new_node(zer_cmp, new_main_exit);
    register_new_node(zer_bol, new_main_exit);
  
    // Build the IfNode
    IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
    _igvn.register_new_node_with_optimizer(zer_iff);
    set_idom(zer_iff, new_main_exit, dd_main_exit);
    set_loop(zer_iff, loop->_parent);
  
!   // Plug in the false-path, taken if we need to skip this post-loop
    _igvn.replace_input_of(main_exit, 0, zer_iff);
    set_idom(main_exit, zer_iff, dd_main_exit);
    set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
!   // Make the true-path, must enter this post loop
    Node *zer_taken = new IfTrueNode(zer_iff);
    _igvn.register_new_node_with_optimizer(zer_taken);
    set_idom(zer_taken, zer_iff, dd_main_exit);
    set_loop(zer_taken, loop->_parent);
    // Plug in the true path
*** 1369,1379 ****
    set_idom(post_head, zer_taken, dd_main_exit);
  
    Arena *a = Thread::current()->resource_area();
    VectorSet visited(a);
    Node_Stack clones(a, main_head->back_control()->outcnt());
!   // Step A3: Make the fall-in values to the vector post-loop come from the
    // fall-out values of the main-loop.
    for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
      Node* main_phi = main_head->fast_out(i);
      if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
        Node *cur_phi = old_new[main_phi->_idx];
--- 1360,1370 ----
    set_idom(post_head, zer_taken, dd_main_exit);
  
    Arena *a = Thread::current()->resource_area();
    VectorSet visited(a);
    Node_Stack clones(a, main_head->back_control()->outcnt());
!   // Step A3: Make the fall-in values to the post-loop come from the
    // fall-out values of the main-loop.
    for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
      Node* main_phi = main_head->fast_out(i);
      if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
        Node *cur_phi = old_new[main_phi->_idx];
*** 1388,1408 ****
  
    // CastII for the new post loop:
    bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
    assert(inserted, "no castII inserted");
  
!   // It's difficult to be precise about the trip-counts
!   // for post loops.  They are usually very short,
!   // so guess that unit vector trips is a reasonable value.
!   post_head->set_profile_trip_cnt((float)slp_max_unroll_factor);
! 
!   // Now force out all loop-invariant dominating tests.  The optimizer
!   // finds some, but we _know_ they are all useless.
!   peeled_dom_test_elim(loop, old_new);
!   loop->record_for_igvn();
  }
  
  //------------------------------is_invariant-----------------------------
  // Return true if n is invariant
  bool IdealLoopTree::is_invariant(Node* n) const {
    Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
    if (n_c->is_top()) return false;
--- 1379,1392 ----
  
    // CastII for the new post loop:
    bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
    assert(inserted, "no castII inserted");
  
!   post_loop_info.new_main_exit = new_main_exit;
  }
  
+ 
  //------------------------------is_invariant-----------------------------
  // Return true if n is invariant
  bool IdealLoopTree::is_invariant(Node* n) const {
    Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
    if (n_c->is_top()) return false;
*** 2095,2105 ****
    return false;
  }
  
  //------------------------------do_range_check---------------------------------
  // Eliminate range-checks and other trip-counter vs loop-invariant tests.
! void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
  #ifndef PRODUCT
    if (PrintOpto && VerifyLoopOptimizations) {
      tty->print("Range Check Elimination ");
      loop->dump_head();
    } else if (TraceLoopOpts) {
--- 2079,2089 ----
    return false;
  }
  
  //------------------------------do_range_check---------------------------------
  // Eliminate range-checks and other trip-counter vs loop-invariant tests.
! void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &range_check_list) {
  #ifndef PRODUCT
    if (PrintOpto && VerifyLoopOptimizations) {
      tty->print("Range Check Elimination ");
      loop->dump_head();
    } else if (TraceLoopOpts) {
*** 2139,2151 ****
  
    // Find the pre-loop limit; we will expand its iterations to
    // not ever trip low tests.
    Node *p_f = iffm->in(0);
    // pre loop may have been optimized out
!   if (p_f->Opcode() != Op_IfFalse) {
      return;
!   }
    CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
    assert(pre_end->loopnode()->is_pre_loop(), "");
    Node *pre_opaq1 = pre_end->limit();
    // Occasionally it's possible for a pre-loop Opaque1 node to be
    // optimized away and then another round of loop opts attempted.
--- 2123,2135 ----
  
    // Find the pre-loop limit; we will expand its iterations to
    // not ever trip low tests.
    Node *p_f = iffm->in(0);
    // pre loop may have been optimized out
!   if (p_f->Opcode() != Op_IfFalse)
      return;
! 
    CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
    assert(pre_end->loopnode()->is_pre_loop(), "");
    Node *pre_opaq1 = pre_end->limit();
    // Occasionally it's possible for a pre-loop Opaque1 node to be
    // optimized away and then another round of loop opts attempted.
*** 2183,2209 ****
    // executed.
    bool conditional_rc = false;
  
    // Check loop body for tests of trip-counter plus loop-invariant vs
    // loop-invariant.
!   for( uint i = 0; i < loop->_body.size(); i++ ) {
      Node *iff = loop->_body[i];
      if (iff->Opcode() == Op_If ||
          iff->Opcode() == Op_RangeCheck) { // Test?
        // Test is an IfNode, has 2 projections.  If BOTH are in the loop
        // we need loop unswitching instead of iteration splitting.
        Node *exit = loop->is_loop_exit(iff);
!       if( !exit ) continue;
        int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
  
        // Get boolean condition to test
        Node *i1 = iff->in(1);
!       if( !i1->is_Bool() ) continue;
        BoolNode *bol = i1->as_Bool();
        BoolTest b_test = bol->_test;
        // Flip sense of test if exit condition is flipped
!       if( flip )
          b_test = b_test.negate();
  
        // Get compare
        Node *cmp = bol->in(1);
  
--- 2167,2194 ----
    // executed.
    bool conditional_rc = false;
  
    // Check loop body for tests of trip-counter plus loop-invariant vs
    // loop-invariant.
!   for (uint i = 0; i < loop->_body.size(); i++) {
      Node *iff = loop->_body[i];
      if (iff->Opcode() == Op_If ||
          iff->Opcode() == Op_RangeCheck) { // Test?
        // Test is an IfNode, has 2 projections.  If BOTH are in the loop
        // we need loop unswitching instead of iteration splitting.
+       range_check_list.push(iff);
        Node *exit = loop->is_loop_exit(iff);
!       if (!exit) continue;
        int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
  
        // Get boolean condition to test
        Node *i1 = iff->in(1);
!       if (!i1->is_Bool()) continue;
        BoolNode *bol = i1->as_Bool();
        BoolTest b_test = bol->_test;
        // Flip sense of test if exit condition is flipped
!       if (flip)
          b_test = b_test.negate();
  
        // Get compare
        Node *cmp = bol->in(1);
  
*** 2211,2255 ****
        Node *rc_exp = cmp->in(1);
        Node *limit  = cmp->in(2);
        jint scale_con= 1;        // Assume trip counter not scaled
  
        Node *limit_c = get_ctrl(limit);
!       if( loop->is_member(get_loop(limit_c) ) ) {
          // Compare might have operands swapped; commute them
          b_test = b_test.commute();
          rc_exp = cmp->in(2);
          limit  = cmp->in(1);
          limit_c = get_ctrl(limit);
!         if( loop->is_member(get_loop(limit_c) ) )
            continue;             // Both inputs are loop varying; cannot RCE
        }
        // Here we know 'limit' is loop invariant
  
        // 'limit' maybe pinned below the zero trip test (probably from a
        // previous round of rce), in which case, it can't be used in the
        // zero trip test expression which must occur before the zero test's if.
!       if( limit_c == ctrl ) {
          continue;  // Don't rce this check but continue looking for other candidates.
-       }
  
        // Check for scaled induction variable plus an offset
        Node *offset = NULL;
  
!       if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) {
          continue;
-       }
  
        Node *offset_c = get_ctrl(offset);
!       if( loop->is_member( get_loop(offset_c) ) )
          continue;               // Offset is not really loop invariant
        // Here we know 'offset' is loop invariant.
  
        // As above for the 'limit', the 'offset' maybe pinned below the
        // zero trip test.
!       if( offset_c == ctrl ) {
          continue; // Don't rce this check but continue looking for other candidates.
!       }
  #ifdef ASSERT
        if (TraceRangeLimitCheck) {
          tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
          bol->dump(2);
        }
--- 2196,2238 ----
        Node *rc_exp = cmp->in(1);
        Node *limit  = cmp->in(2);
        jint scale_con= 1;        // Assume trip counter not scaled
  
        Node *limit_c = get_ctrl(limit);
!       if (loop->is_member(get_loop(limit_c))) {
          // Compare might have operands swapped; commute them
          b_test = b_test.commute();
          rc_exp = cmp->in(2);
          limit  = cmp->in(1);
          limit_c = get_ctrl(limit);
!         if (loop->is_member(get_loop(limit_c)))
            continue;             // Both inputs are loop varying; cannot RCE
        }
        // Here we know 'limit' is loop invariant
  
        // 'limit' maybe pinned below the zero trip test (probably from a
        // previous round of rce), in which case, it can't be used in the
        // zero trip test expression which must occur before the zero test's if.
!       if (limit_c == ctrl)
          continue;  // Don't rce this check but continue looking for other candidates.
  
        // Check for scaled induction variable plus an offset
        Node *offset = NULL;
  
!       if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset))
          continue;
  
        Node *offset_c = get_ctrl(offset);
!       if (loop->is_member( get_loop(offset_c)))
          continue;               // Offset is not really loop invariant
        // Here we know 'offset' is loop invariant.
  
        // As above for the 'limit', the 'offset' maybe pinned below the
        // zero trip test.
!       if (offset_c == ctrl)
          continue; // Don't rce this check but continue looking for other candidates.
! 
  #ifdef ASSERT
        if (TraceRangeLimitCheck) {
          tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
          bol->dump(2);
        }
*** 2260,2271 ****
        // monotonically increases by stride_con, a constant.  Both (or either)
        // stride_con and scale_con can be negative which will flip about the
        // sense of the test.
  
        // Adjust pre and main loop limits to guard the correct iteration set
!       if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
!         if( b_test._test == BoolTest::lt ) { // Range checks always use lt
            // The underflow and overflow limits: 0 <= scale*I+offset < limit
            add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
            if (!conditional_rc) {
              // (0-offset)/scale could be outside of loop iterations range.
              conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
--- 2243,2254 ----
        // monotonically increases by stride_con, a constant.  Both (or either)
        // stride_con and scale_con can be negative which will flip about the
        // sense of the test.
  
        // Adjust pre and main loop limits to guard the correct iteration set
!       if (cmp->Opcode() == Op_CmpU) {// Unsigned compare is really 2 tests
!         if (b_test._test == BoolTest::lt) { // Range checks always use lt
            // The underflow and overflow limits: 0 <= scale*I+offset < limit
            add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
            if (!conditional_rc) {
              // (0-offset)/scale could be outside of loop iterations range.
              conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
*** 2281,2307 ****
          case BoolTest::gt:
            // Fall into GE case
          case BoolTest::ge:
            // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
            scale_con = -scale_con;
!           offset = new SubINode( zero, offset );
!           register_new_node( offset, pre_ctrl );
!           limit  = new SubINode( zero, limit  );
!           register_new_node( limit, pre_ctrl );
            // Fall into LE case
          case BoolTest::le:
            if (b_test._test != BoolTest::gt) {
              // Convert X <= Y to X < Y+1
!             limit = new AddINode( limit, one );
!             register_new_node( limit, pre_ctrl );
            }
            // Fall into LT case
          case BoolTest::lt:
            // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
            // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
            // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
!           add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
            if (!conditional_rc) {
              // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
              // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
              // still be outside of loop range.
              conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
--- 2264,2290 ----
          case BoolTest::gt:
            // Fall into GE case
          case BoolTest::ge:
            // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
            scale_con = -scale_con;
!           offset = new SubINode(zero, offset);
!           register_new_node(offset, pre_ctrl);
!           limit  = new SubINode(zero, limit);
!           register_new_node(limit, pre_ctrl);
            // Fall into LE case
          case BoolTest::le:
            if (b_test._test != BoolTest::gt) {
              // Convert X <= Y to X < Y+1
!             limit = new AddINode(limit, one);
!             register_new_node(limit, pre_ctrl);
            }
            // Fall into LT case
          case BoolTest::lt:
            // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
            // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
            // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
!           add_constraint(stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
            if (!conditional_rc) {
              // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
              // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
              // still be outside of loop range.
              conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
*** 2315,2325 ****
          }
        }
  
        // Kill the eliminated test
        C->set_major_progress();
!       Node *kill_con = _igvn.intcon( 1-flip );
        set_ctrl(kill_con, C->root());
        _igvn.replace_input_of(iff, 1, kill_con);
        // Find surviving projection
        assert(iff->is_If(), "");
        ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
--- 2298,2308 ----
          }
        }
  
        // Kill the eliminated test
        C->set_major_progress();
!       Node *kill_con = _igvn.intcon(1-flip);
        set_ctrl(kill_con, C->root());
        _igvn.replace_input_of(iff, 1, kill_con);
        // Find surviving projection
        assert(iff->is_If(), "");
        ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
*** 2332,2341 ****
--- 2315,2325 ----
            _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
            --i;
            --imax;
          }
        }
+       range_check_list.pop();
  
      } // End of is IF
  
    }
  
*** 2371,2398 ****
    }
  
    Node *main_cle = cl->loopexit();
    Node *main_bol = main_cle->in(1);
    // Hacking loop bounds; need private copies of exit test
!   if( main_bol->outcnt() > 1 ) {// BoolNode shared?
      main_bol = main_bol->clone();// Clone a private BoolNode
      register_new_node( main_bol, main_cle->in(0) );
      _igvn.replace_input_of(main_cle, 1, main_bol);
    }
    Node *main_cmp = main_bol->in(1);
!   if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
      main_cmp = main_cmp->clone();// Clone a private CmpNode
!     register_new_node( main_cmp, main_cle->in(0) );
      _igvn.replace_input_of(main_bol, 1, main_cmp);
    }
    // Hack the now-private loop bounds
    _igvn.replace_input_of(main_cmp, 2, main_limit);
    // The OpaqueNode is unshared by design
    assert( opqzm->outcnt() == 1, "cannot hack shared node" );
    _igvn.replace_input_of(opqzm, 1, main_limit);
  }
  
  //------------------------------DCE_loop_body----------------------------------
  // Remove simplistic dead code from loop body
  void IdealLoopTree::DCE_loop_body() {
    for( uint i = 0; i < _body.size(); i++ )
      if( _body.at(i)->outcnt() == 0 )
--- 2355,2542 ----
    }
  
    Node *main_cle = cl->loopexit();
    Node *main_bol = main_cle->in(1);
    // Hacking loop bounds; need private copies of exit test
!   if (main_bol->outcnt() > 1) {// BoolNode shared?
      main_bol = main_bol->clone();// Clone a private BoolNode
      register_new_node( main_bol, main_cle->in(0) );
      _igvn.replace_input_of(main_cle, 1, main_bol);
    }
    Node *main_cmp = main_bol->in(1);
!   if (main_cmp->outcnt() > 1) { // CmpNode shared?
      main_cmp = main_cmp->clone();// Clone a private CmpNode
!     register_new_node(main_cmp, main_cle->in(0));
      _igvn.replace_input_of(main_bol, 1, main_cmp);
    }
    // Hack the now-private loop bounds
    _igvn.replace_input_of(main_cmp, 2, main_limit);
    // The OpaqueNode is unshared by design
    assert( opqzm->outcnt() == 1, "cannot hack shared node" );
    _igvn.replace_input_of(opqzm, 1, main_limit);
  }
  
+ //------------------------------has_range_checks-------------------------------
+ // Check to see if RCE cleaned the current loop of range-checks.
+ void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
+   assert(RangeCheckElimination, "");
+   CountedLoopNode *cl = loop->_head->as_CountedLoop();
+ 
+   // skip this loop if it is already marked
+   if (cl->loop_has_range_checks()) return;
+ 
+   if (cl->is_main_loop() || cl->is_post_loop()) {
+     // Now check for existance of range checks
+     for (uint i = 0; i < loop->_body.size(); i++) {
+       Node *iff = loop->_body[i];
+       if (iff->Opcode() == Op_If ||
+         iff->Opcode() == Op_RangeCheck) {
+         cl->mark_has_range_checks();
+         break;
+       }
+     }
+   }
+ }
+ 
+ //-------------------------multi_version_post_loops----------------------------
+ // Check the range checks that remain, if simple, use the bounds to guard
+ // which version to a post loop we execute, one with range checks or one without
+ bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop) {
+   bool multi_version_succeeded = false;
+   assert(RangeCheckElimination, "");
+   CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
+   assert(legacy_cl->is_post_loop(), "");
+ 
+   // Check for existance of range checks using the unique instance to make a guard with
+   Unique_Node_List worklist;
+   for (uint i = 0; i < legacy_loop->_body.size(); i++) {
+     Node *iff = legacy_loop->_body[i];
+     if (iff->Opcode() == Op_If || iff->Opcode() == Op_RangeCheck) {
+       worklist.push(iff);
+     }
+   }
+ 
+   // Find RCE'd post loop so that we can stage its guard.
+   Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
+   if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
+   Node* iffm = ctrl->in(0);
+   if (!iffm->is_If()) return multi_version_succeeded;
+   Node* cur_bool = iffm->in(1);
+   if (!cur_bool->is_Bool()) return multi_version_succeeded;
+   Node* cur_cmp = cur_bool->in(1);
+   if (!cur_cmp->is_Cmp()) return multi_version_succeeded;
+   Node* cur_opq = cur_cmp->in(1);
+   // Can not optimize a loop if zero-trip Opaque1 node is optimized away.
+   if (cur_opq->Opcode() != Op_Opaque1) return multi_version_succeeded;
+ 
+   // Now we test that both the post loops are connected
+   Node* post_loop_region = iffm->in(0);
+   if (post_loop_region == NULL) return multi_version_succeeded;
+   if (!post_loop_region->is_Region()) return multi_version_succeeded;
+   Node* covering_region = post_loop_region->in(RegionNode::Control+1);
+   if (covering_region == NULL) return multi_version_succeeded;
+   if (!covering_region->is_Region()) return multi_version_succeeded;
+   Node* p_f = covering_region->in(RegionNode::Control);
+   if (p_f == NULL) return multi_version_succeeded;
+   if (!p_f->is_IfFalse()) return multi_version_succeeded;
+   if (!p_f->in(0)->is_CountedLoopEnd()) return multi_version_succeeded;
+   CountedLoopEndNode* rce_loop_end = p_f->in(0)->as_CountedLoopEnd();
+   if (rce_loop_end == NULL) return multi_version_succeeded;
+   CountedLoopNode* rce_cl = rce_loop_end->loopnode();
+   if (rce_cl == NULL || !rce_cl->is_post_loop()) return multi_version_succeeded;
+   CountedLoopNode *known_rce_cl = rce_loop->_head->as_CountedLoop();
+   if (rce_cl != known_rce_cl) return multi_version_succeeded;
+ 
+   // Then we fetch the cover entry test
+   ctrl = rce_cl->in(LoopNode::EntryControl);
+   if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
+ 
+ #ifndef PRODUCT
+   if (TraceLoopOpts) {
+     tty->print("PostMultiVersion\n");
+     rce_loop->dump_head();
+     legacy_loop->dump_head();
+   }
+ #endif
+ 
+   // Now fetch the limit we want to compare against
+   Node *limit = rce_cl->limit();
+   bool first_time = true;
+ 
+   // If we got this far, we identified the post loop which has been RCE'd and
+   // we have a work list.  Now we will try to transform the if guard to cause
+   // the loop pair to be multi version executed with the determination left to runtime
+   // or the optimizer if full information is known about the given arrays at compile time.
+   Node *last_min = NULL;
+   while (worklist.size()) {
+     Node* rc_iffm = worklist.pop();
+     if (rc_iffm->is_If()) {
+       Node *rc_bolzm = rc_iffm->in(1);
+       if (rc_bolzm->is_Bool()) {
+         Node *rc_cmpzm = rc_bolzm->in(1);
+         if (rc_cmpzm->is_Cmp()) {
+           Node *rc_left = rc_cmpzm->in(2);
+           if (first_time) {
+             last_min = rc_left;
+             first_time = false;
+           } else {
+             Node *cur_min = new MinINode(last_min, rc_left);
+             last_min = cur_min;
+             _igvn.register_new_node_with_optimizer(last_min);
+           }
+         }
+       }
+     }
+   }
+ 
+   // All we have to do is update the limit of the rce loop
+   // with the min of our expression and the current limit.
+   // We will use this expression to replace the current limit.
+   if (last_min) {
+     Node *cur_min = new MinINode(last_min, limit);
+     _igvn.register_new_node_with_optimizer(cur_min);
+     Node *cmp_node = rce_loop_end->cmp_node();
+     _igvn.replace_input_of(cmp_node, 2, cur_min);
+     set_idom(cmp_node, cur_min, dom_depth(ctrl));
+     set_ctrl(cur_min, ctrl);
+     set_loop(cur_min, rce_loop->_parent);
+ 
+     legacy_cl->mark_is_multiversioned();
+     rce_cl->mark_is_multiversioned();
+     multi_version_succeeded = true;
+ 
+     C->set_major_progress();
+   }
+ 
+   return multi_version_succeeded;
+ }
+ 
+ //-------------------------poison_rce_post_loop--------------------------------
+ // Causes the rce'd post loop to be optimized away if multiverioning fails
+ void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
+   CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
+   Node* ctrl = rce_cl->in(LoopNode::EntryControl);
+   if (ctrl->is_IfTrue() || ctrl->is_IfFalse()) {
+     Node* iffm = ctrl->in(0);
+     if (iffm->is_If()) {
+       Node* cur_bool = iffm->in(1);
+       if (cur_bool->is_Bool()) {
+         Node* cur_cmp = cur_bool->in(1);
+         if (cur_cmp->is_Cmp()) {
+           BoolTest::mask new_test = BoolTest::gt;
+           BoolNode *new_bool = new BoolNode(cur_cmp, new_test);
+           _igvn.replace_node(cur_bool, new_bool);
+           _igvn._worklist.push(new_bool);
+           Node* left_op = cur_cmp->in(1);
+           _igvn.replace_input_of(cur_cmp, 2, left_op);
+           C->set_major_progress();
+         }
+       }
+     }
+   }
+ }
+ 
  //------------------------------DCE_loop_body----------------------------------
  // Remove simplistic dead code from loop body
  void IdealLoopTree::DCE_loop_body() {
    for( uint i = 0; i < _body.size(); i++ )
      if( _body.at(i)->outcnt() == 0 )
*** 2746,2757 ****
        phase->insert_pre_post_loops(this,old_new, !may_rce_align);
  
      // Adjust the pre- and main-loop limits to let the pre and post loops run
      // with full checks, but the main-loop with no checks.  Remove said
      // checks from the main body.
!     if (should_rce)
!       phase->do_range_check(this,old_new);
  
      // Double loop body for unrolling.  Adjust the minimum-trip test (will do
      // twice as many iterations as before) and the main body limit (only do
      // an even number of trips).  If we are peeling, we might enable some RCE
      // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
--- 2890,2912 ----
        phase->insert_pre_post_loops(this,old_new, !may_rce_align);
  
      // Adjust the pre- and main-loop limits to let the pre and post loops run
      // with full checks, but the main-loop with no checks.  Remove said
      // checks from the main body.
!     if (should_rce) {
!       Node_List range_check_list;
!       phase->do_range_check(this, range_check_list);
!       if (range_check_list.size() > 0) {
!         cl->mark_has_range_checks();
!       }
!     }
! 
!     if (should_rce && should_unroll && !should_peel && PostLoopMultiversioning) {
!       // Try to setup multiversioning on main loops before they are unrolled
!       if (cl->is_main_loop() && (cl->unrolled_count() == 1))
!         phase->insert_scalar_rced_post_loop(this, old_new);
!     }
  
      // Double loop body for unrolling.  Adjust the minimum-trip test (will do
      // twice as many iterations as before) and the main body limit (only do
      // an even number of trips).  If we are peeling, we might enable some RCE
      // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
< prev index next >