JDK-8022284 Cdiff src/share/vm/opto/gcm.cpp

src/share/vm/opto/gcm.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File JDK-8022284 Cdiff src/share/vm/opto/gcm.cpp
src/share/vm/opto/gcm.cpp

Print this page

*** 64,87 ****
  //----------------------------schedule_node_into_block-------------------------
  // Insert node n into block b. Look for projections of n and make sure they
  // are in b also.
  void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) {
    // Set basic block of n, Add n to b,
!   _bbs.map(n->_idx, b);
    b->add_inst(n);
  
    // After Matching, nearly any old Node may have projections trailing it.
    // These are usually machine-dependent flags.  In any case, they might
    // float to another block below this one.  Move them up.
    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
      Node*  use  = n->fast_out(i);
      if (use->is_Proj()) {
!       Block* buse = _bbs[use->_idx];
        if (buse != b) {              // In wrong block?
!         if (buse != NULL)
            buse->find_remove(use);   // Remove from wrong block
!         _bbs.map(use->_idx, b);     // Re-insert in this block
          b->add_inst(use);
        }
      }
    }
  }
--- 64,88 ----
  //----------------------------schedule_node_into_block-------------------------
  // Insert node n into block b. Look for projections of n and make sure they
  // are in b also.
  void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) {
    // Set basic block of n, Add n to b,
!   map_node_to_block(n, b);
    b->add_inst(n);
  
    // After Matching, nearly any old Node may have projections trailing it.
    // These are usually machine-dependent flags.  In any case, they might
    // float to another block below this one.  Move them up.
    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
      Node*  use  = n->fast_out(i);
      if (use->is_Proj()) {
!       Block* buse = get_block_for_node(use);
        if (buse != b) {              // In wrong block?
!         if (buse != NULL) {
            buse->find_remove(use);   // Remove from wrong block
!         }
!         map_node_to_block(use, b);
          b->add_inst(use);
        }
      }
    }
  }
*** 95,105 ****
    assert(in0 != NULL, "Only control-dependent");
    const Node *p = in0->is_block_proj();
    if (p != NULL && p != n) {    // Control from a block projection?
      assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here");
      // Find trailing Region
!     Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block
      uint j = 0;
      if (pb->_num_succs != 1) {  // More then 1 successor?
        // Search for successor
        uint max = pb->_nodes.size();
        assert( max > 1, "" );
--- 96,106 ----
    assert(in0 != NULL, "Only control-dependent");
    const Node *p = in0->is_block_proj();
    if (p != NULL && p != n) {    // Control from a block projection?
      assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here");
      // Find trailing Region
!     Block *pb = get_block_for_node(in0); // Block-projection already has basic block
      uint j = 0;
      if (pb->_num_succs != 1) {  // More then 1 successor?
        // Search for successor
        uint max = pb->_nodes.size();
        assert( max > 1, "" );
*** 125,142 ****
    GrowableArray <Node *> spstack(C->unique()+8);
    spstack.push(_root);
    while ( spstack.is_nonempty() ) {
      Node *n = spstack.pop();
      if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
!       if( n->pinned() && !_bbs.lookup(n->_idx) ) {  // Pinned?  Nail it down!
          assert( n->in(0), "pinned Node must have Control" );
          // Before setting block replace block_proj control edge
          replace_block_proj_ctrl(n);
          Node *input = n->in(0);
!         while( !input->is_block_start() )
            input = input->in(0);
!         Block *b = _bbs[input->_idx];  // Basic block of controlling input
          schedule_node_into_block(n, b);
        }
        for( int i = n->req() - 1; i >= 0; --i ) {  // For all inputs
          if( n->in(i) != NULL )
            spstack.push(n->in(i));
--- 126,144 ----
    GrowableArray <Node *> spstack(C->unique()+8);
    spstack.push(_root);
    while ( spstack.is_nonempty() ) {
      Node *n = spstack.pop();
      if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
!       if( n->pinned() && !has_block(n)) {  // Pinned?  Nail it down!
          assert( n->in(0), "pinned Node must have Control" );
          // Before setting block replace block_proj control edge
          replace_block_proj_ctrl(n);
          Node *input = n->in(0);
!         while (!input->is_block_start()) {
            input = input->in(0);
!         }
!         Block *b = get_block_for_node(input); // Basic block of controlling input
          schedule_node_into_block(n, b);
        }
        for( int i = n->req() - 1; i >= 0; --i ) {  // For all inputs
          if( n->in(i) != NULL )
            spstack.push(n->in(i));
*** 147,157 ****
  
  #ifdef ASSERT
  // Assert that new input b2 is dominated by all previous inputs.
  // Check this by by seeing that it is dominated by b1, the deepest
  // input observed until b2.
! static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) {
    if (b1 == NULL)  return;
    assert(b1->_dom_depth < b2->_dom_depth, "sanity");
    Block* tmp = b2;
    while (tmp != b1 && tmp != NULL) {
      tmp = tmp->_idom;
--- 149,159 ----
  
  #ifdef ASSERT
  // Assert that new input b2 is dominated by all previous inputs.
  // Check this by by seeing that it is dominated by b1, the deepest
  // input observed until b2.
! static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) {
    if (b1 == NULL)  return;
    assert(b1->_dom_depth < b2->_dom_depth, "sanity");
    Block* tmp = b2;
    while (tmp != b1 && tmp != NULL) {
      tmp = tmp->_idom;
*** 160,170 ****
      // Detected an unschedulable graph.  Print some nice stuff and die.
      tty->print_cr("!!! Unschedulable graph !!!");
      for (uint j=0; j<n->len(); j++) { // For all inputs
        Node* inn = n->in(j); // Get input
        if (inn == NULL)  continue;  // Ignore NULL, missing inputs
!       Block* inb = bbs[inn->_idx];
        tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order,
                   inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth);
        inn->dump();
      }
      tty->print("Failing node: ");
--- 162,172 ----
      // Detected an unschedulable graph.  Print some nice stuff and die.
      tty->print_cr("!!! Unschedulable graph !!!");
      for (uint j=0; j<n->len(); j++) { // For all inputs
        Node* inn = n->in(j); // Get input
        if (inn == NULL)  continue;  // Ignore NULL, missing inputs
!       Block* inb = cfg->get_block_for_node(inn);
        tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order,
                   inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth);
        inn->dump();
      }
      tty->print("Failing node: ");
*** 172,195 ****
      assert(false, "unscheduable graph");
    }
  }
  #endif
  
! static Block* find_deepest_input(Node* n, Block_Array &bbs) {
    // Find the last input dominated by all other inputs.
    Block* deepb           = NULL;        // Deepest block so far
    int    deepb_dom_depth = 0;
    for (uint k = 0; k < n->len(); k++) { // For all inputs
      Node* inn = n->in(k);               // Get input
      if (inn == NULL)  continue;         // Ignore NULL, missing inputs
!     Block* inb = bbs[inn->_idx];
      assert(inb != NULL, "must already have scheduled this input");
      if (deepb_dom_depth < (int) inb->_dom_depth) {
        // The new inb must be dominated by the previous deepb.
        // The various inputs must be linearly ordered in the dom
        // tree, or else there will not be a unique deepest block.
!       DEBUG_ONLY(assert_dom(deepb, inb, n, bbs));
        deepb = inb;                      // Save deepest block
        deepb_dom_depth = deepb->_dom_depth;
      }
    }
    assert(deepb != NULL, "must be at least one input to n");
--- 174,197 ----
      assert(false, "unscheduable graph");
    }
  }
  #endif
  
! static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) {
    // Find the last input dominated by all other inputs.
    Block* deepb           = NULL;        // Deepest block so far
    int    deepb_dom_depth = 0;
    for (uint k = 0; k < n->len(); k++) { // For all inputs
      Node* inn = n->in(k);               // Get input
      if (inn == NULL)  continue;         // Ignore NULL, missing inputs
!     Block* inb = cfg->get_block_for_node(inn);
      assert(inb != NULL, "must already have scheduled this input");
      if (deepb_dom_depth < (int) inb->_dom_depth) {
        // The new inb must be dominated by the previous deepb.
        // The various inputs must be linearly ordered in the dom
        // tree, or else there will not be a unique deepest block.
!       DEBUG_ONLY(assert_dom(deepb, inb, n, cfg));
        deepb = inb;                      // Save deepest block
        deepb_dom_depth = deepb->_dom_depth;
      }
    }
    assert(deepb != NULL, "must be at least one input to n");
*** 241,251 ****
        while (i < n->len()) {         // For all inputs
          Node *in = n->in(i);         // Get input
          ++i;
          if (in == NULL) continue;    // Ignore NULL, missing inputs
          int is_visited = visited.test_set(in->_idx);
!         if (!_bbs.lookup(in->_idx)) { // Missing block selection?
            if (is_visited) {
              // assert( !visited.test(in->_idx), "did not schedule early" );
              return false;
            }
            nstack.push(n, i);         // Save parent node and next input's index.
--- 243,253 ----
        while (i < n->len()) {         // For all inputs
          Node *in = n->in(i);         // Get input
          ++i;
          if (in == NULL) continue;    // Ignore NULL, missing inputs
          int is_visited = visited.test_set(in->_idx);
!         if (!has_block(in)) { // Missing block selection?
            if (is_visited) {
              // assert( !visited.test(in->_idx), "did not schedule early" );
              return false;
            }
            nstack.push(n, i);         // Save parent node and next input's index.
*** 263,275 ****
          // Some instructions are pinned into a block.  These include Region,
          // Phi, Start, Return, and other control-dependent instructions and
          // any projections which depend on them.
          if (!n->pinned()) {
            // Set earliest legal block.
!           _bbs.map(n->_idx, find_deepest_input(n, _bbs));
          } else {
!           assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge");
          }
  
          if (nstack.is_empty()) {
            // Finished all nodes on stack.
            // Process next node on the worklist 'roots'.
--- 265,277 ----
          // Some instructions are pinned into a block.  These include Region,
          // Phi, Start, Return, and other control-dependent instructions and
          // any projections which depend on them.
          if (!n->pinned()) {
            // Set earliest legal block.
!           map_node_to_block(n, find_deepest_input(n, this));
          } else {
!           assert(get_block_for_node(n) == get_block_for_node(n->in(0)), "Pinned Node should be at the same block as its control edge");
          }
  
          if (nstack.is_empty()) {
            // Finished all nodes on stack.
            // Process next node on the worklist 'roots'.
*** 311,322 ****
  //--------------------------raise_LCA_above_use--------------------------------
  // We are placing a definition, and have been given a def->use edge.
  // The definition must dominate the use, so move the LCA upward in the
  // dominator tree to dominate the use.  If the use is a phi, adjust
  // the LCA only with the phi input paths which actually use this def.
! static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) {
!   Block* buse = bbs[use->_idx];
    if (buse == NULL)    return LCA;   // Unused killing Projs have no use block
    if (!use->is_Phi())  return buse->dom_lca(LCA);
    uint pmax = use->req();       // Number of Phi inputs
    // Why does not this loop just break after finding the matching input to
    // the Phi?  Well...it's like this.  I do not have true def-use/use-def
--- 313,324 ----
  //--------------------------raise_LCA_above_use--------------------------------
  // We are placing a definition, and have been given a def->use edge.
  // The definition must dominate the use, so move the LCA upward in the
  // dominator tree to dominate the use.  If the use is a phi, adjust
  // the LCA only with the phi input paths which actually use this def.
! static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) {
!   Block* buse = cfg->get_block_for_node(use);
    if (buse == NULL)    return LCA;   // Unused killing Projs have no use block
    if (!use->is_Phi())  return buse->dom_lca(LCA);
    uint pmax = use->req();       // Number of Phi inputs
    // Why does not this loop just break after finding the matching input to
    // the Phi?  Well...it's like this.  I do not have true def-use/use-def
*** 327,337 ****
    // which use-def edge I should find the predecessor block for.  So I find
    // them all.  Means I do a little extra work if a Phi uses the same value
    // more than once.
    for (uint j=1; j<pmax; j++) { // For all inputs
      if (use->in(j) == def) {    // Found matching input?
!       Block* pred = bbs[buse->pred(j)->_idx];
        LCA = pred->dom_lca(LCA);
      }
    }
    return LCA;
  }
--- 329,339 ----
    // which use-def edge I should find the predecessor block for.  So I find
    // them all.  Means I do a little extra work if a Phi uses the same value
    // more than once.
    for (uint j=1; j<pmax; j++) { // For all inputs
      if (use->in(j) == def) {    // Found matching input?
!       Block* pred = cfg->get_block_for_node(buse->pred(j));
        LCA = pred->dom_lca(LCA);
      }
    }
    return LCA;
  }
*** 340,351 ****
  // Return a new LCA that dominates LCA and any of its marked predecessors.
  // Search all my parents up to 'early' (exclusive), looking for predecessors
  // which are marked with the given index.  Return the LCA (in the dom tree)
  // of all marked blocks.  If there are none marked, return the original
  // LCA.
! static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark,
!                                     Block* early, Block_Array &bbs) {
    Block_List worklist;
    worklist.push(LCA);
    while (worklist.size() > 0) {
      Block* mid = worklist.pop();
      if (mid == early)  continue;  // stop searching here
--- 342,352 ----
  // Return a new LCA that dominates LCA and any of its marked predecessors.
  // Search all my parents up to 'early' (exclusive), looking for predecessors
  // which are marked with the given index.  Return the LCA (in the dom tree)
  // of all marked blocks.  If there are none marked, return the original
  // LCA.
! static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, Block* early, const PhaseCFG* cfg) {
    Block_List worklist;
    worklist.push(LCA);
    while (worklist.size() > 0) {
      Block* mid = worklist.pop();
      if (mid == early)  continue;  // stop searching here
*** 364,374 ****
        if (LCA == mid)
          continue; // Don't mark as visited to avoid early termination.
      } else {
        // Keep searching through this block's predecessors.
        for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) {
!         Block* mid_parent = bbs[ mid->pred(j)->_idx ];
          worklist.push(mid_parent);
        }
      }
      mid->set_raise_LCA_visited(mark);
    }
--- 365,375 ----
        if (LCA == mid)
          continue; // Don't mark as visited to avoid early termination.
      } else {
        // Keep searching through this block's predecessors.
        for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) {
!         Block* mid_parent = cfg->get_block_for_node(mid->pred(j));
          worklist.push(mid_parent);
        }
      }
      mid->set_raise_LCA_visited(mark);
    }
*** 382,392 ****
  //
  // Because a subset of edges are considered, the resulting block will
  // be earlier (at a shallower dom_depth) than the true schedule_early
  // point of the node. We compute this earlier block as a more permissive
  // site for anti-dependency insertion, but only if subsume_loads is enabled.
! static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) {
    Node* base;
    Node* index;
    Node* store = load->in(MemNode::Memory);
    load->as_Mach()->memory_inputs(base, index);
  
--- 383,393 ----
  //
  // Because a subset of edges are considered, the resulting block will
  // be earlier (at a shallower dom_depth) than the true schedule_early
  // point of the node. We compute this earlier block as a more permissive
  // site for anti-dependency insertion, but only if subsume_loads is enabled.
! static Block* memory_early_block(Node* load, Block* early, const PhaseCFG* cfg) {
    Node* base;
    Node* index;
    Node* store = load->in(MemNode::Memory);
    load->as_Mach()->memory_inputs(base, index);
  
*** 410,425 ****
      if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0);
  
      Block* deepb           = NULL;        // Deepest block so far
      int    deepb_dom_depth = 0;
      for (int i = 0; i < mem_inputs_length; i++) {
!       Block* inb = bbs[mem_inputs[i]->_idx];
        if (deepb_dom_depth < (int) inb->_dom_depth) {
          // The new inb must be dominated by the previous deepb.
          // The various inputs must be linearly ordered in the dom
          // tree, or else there will not be a unique deepest block.
!         DEBUG_ONLY(assert_dom(deepb, inb, load, bbs));
          deepb = inb;                      // Save deepest block
          deepb_dom_depth = deepb->_dom_depth;
        }
      }
      early = deepb;
--- 411,426 ----
      if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0);
  
      Block* deepb           = NULL;        // Deepest block so far
      int    deepb_dom_depth = 0;
      for (int i = 0; i < mem_inputs_length; i++) {
!       Block* inb = cfg->get_block_for_node(mem_inputs[i]);
        if (deepb_dom_depth < (int) inb->_dom_depth) {
          // The new inb must be dominated by the previous deepb.
          // The various inputs must be linearly ordered in the dom
          // tree, or else there will not be a unique deepest block.
!         DEBUG_ONLY(assert_dom(deepb, inb, load, cfg));
          deepb = inb;                      // Save deepest block
          deepb_dom_depth = deepb->_dom_depth;
        }
      }
      early = deepb;
*** 486,503 ****
    // Note the earliest legal placement of 'load', as determined by
    // by the unique point in the dom tree where all memory effects
    // and other inputs are first available.  (Computed by schedule_early.)
    // For normal loads, 'early' is the shallowest place (dom graph wise)
    // to look for anti-deps between this load and any store.
!   Block* early = _bbs[load_index];
  
    // If we are subsuming loads, compute an "early" block that only considers
    // memory or address inputs. This block may be different than the
    // schedule_early block in that it could be at an even shallower depth in the
    // dominator tree, and allow for a broader discovery of anti-dependences.
    if (C->subsume_loads()) {
!     early = memory_early_block(load, early, _bbs);
    }
  
    ResourceArea *area = Thread::current()->resource_area();
    Node_List worklist_mem(area);     // prior memory state to store
    Node_List worklist_store(area);   // possible-def to explore
--- 487,504 ----
    // Note the earliest legal placement of 'load', as determined by
    // by the unique point in the dom tree where all memory effects
    // and other inputs are first available.  (Computed by schedule_early.)
    // For normal loads, 'early' is the shallowest place (dom graph wise)
    // to look for anti-deps between this load and any store.
!   Block* early = get_block_for_node(load);
  
    // If we are subsuming loads, compute an "early" block that only considers
    // memory or address inputs. This block may be different than the
    // schedule_early block in that it could be at an even shallower depth in the
    // dominator tree, and allow for a broader discovery of anti-dependences.
    if (C->subsume_loads()) {
!     early = memory_early_block(load, early, this);
    }
  
    ResourceArea *area = Thread::current()->resource_area();
    Node_List worklist_mem(area);     // prior memory state to store
    Node_List worklist_store(area);   // possible-def to explore
*** 617,627 ****
  
      // Identify a block that the current load must be above,
      // or else observe that 'store' is all the way up in the
      // earliest legal block for 'load'.  In the latter case,
      // immediately insert an anti-dependence edge.
!     Block* store_block = _bbs[store->_idx];
      assert(store_block != NULL, "unused killing projections skipped above");
  
      if (store->is_Phi()) {
        // 'load' uses memory which is one (or more) of the Phi's inputs.
        // It must be scheduled not before the Phi, but rather before
--- 618,628 ----
  
      // Identify a block that the current load must be above,
      // or else observe that 'store' is all the way up in the
      // earliest legal block for 'load'.  In the latter case,
      // immediately insert an anti-dependence edge.
!     Block* store_block = get_block_for_node(store);
      assert(store_block != NULL, "unused killing projections skipped above");
  
      if (store->is_Phi()) {
        // 'load' uses memory which is one (or more) of the Phi's inputs.
        // It must be scheduled not before the Phi, but rather before
*** 635,645 ****
        // PhiNode may be at start of block 'early' with backedge to 'early'
        DEBUG_ONLY(bool found_match = false);
        for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) {
          if (store->in(j) == mem) {   // Found matching input?
            DEBUG_ONLY(found_match = true);
!           Block* pred_block = _bbs[store_block->pred(j)->_idx];
            if (pred_block != early) {
              // If any predecessor of the Phi matches the load's "early block",
              // we do not need a precedence edge between the Phi and 'load'
              // since the load will be forced into a block preceding the Phi.
              pred_block->set_raise_LCA_mark(load_index);
--- 636,646 ----
        // PhiNode may be at start of block 'early' with backedge to 'early'
        DEBUG_ONLY(bool found_match = false);
        for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) {
          if (store->in(j) == mem) {   // Found matching input?
            DEBUG_ONLY(found_match = true);
!           Block* pred_block = get_block_for_node(store_block->pred(j));
            if (pred_block != early) {
              // If any predecessor of the Phi matches the load's "early block",
              // we do not need a precedence edge between the Phi and 'load'
              // since the load will be forced into a block preceding the Phi.
              pred_block->set_raise_LCA_mark(load_index);
*** 709,728 ****
    //
    // The raised LCA will be a lower bound for placing the load,
    // preventing the load from sinking past any block containing
    // a store that may invalidate the memory state required by 'load'.
    if (must_raise_LCA)
!     LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs);
    if (LCA == early)  return LCA;
  
    // Insert anti-dependence edges from 'load' to each store
    // in the non-early LCA block.
    // Mine the non_early_stores list for such stores.
    if (LCA->raise_LCA_mark() == load_index) {
      while (non_early_stores.size() > 0) {
        Node* store = non_early_stores.pop();
!       Block* store_block = _bbs[store->_idx];
        if (store_block == LCA) {
          // add anti_dependence from store to load in its own block
          assert(store != load->in(0), "dependence cycle found");
          if (verify) {
            assert(store->find_edge(load) != -1, "missing precedence edge");
--- 710,729 ----
    //
    // The raised LCA will be a lower bound for placing the load,
    // preventing the load from sinking past any block containing
    // a store that may invalidate the memory state required by 'load'.
    if (must_raise_LCA)
!     LCA = raise_LCA_above_marks(LCA, load->_idx, early, this);
    if (LCA == early)  return LCA;
  
    // Insert anti-dependence edges from 'load' to each store
    // in the non-early LCA block.
    // Mine the non_early_stores list for such stores.
    if (LCA->raise_LCA_mark() == load_index) {
      while (non_early_stores.size() > 0) {
        Node* store = non_early_stores.pop();
!       Block* store_block = get_block_for_node(store);
        if (store_block == LCA) {
          // add anti_dependence from store to load in its own block
          assert(store != load->in(0), "dependence cycle found");
          if (verify) {
            assert(store->find_edge(load) != -1, "missing precedence edge");
*** 752,775 ****
  private:
    Node_Backward_Iterator();
  
  public:
    // Constructor for the iterator
!   Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs);
  
    // Postincrement operator to iterate over the nodes
    Node *next();
  
  private:
    VectorSet   &_visited;
    Node_List   &_stack;
!   Block_Array &_bbs;
  };
  
  // Constructor for the Node_Backward_Iterator
! Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs )
!   : _visited(visited), _stack(stack), _bbs(bbs) {
    // The stack should contain exactly the root
    stack.clear();
    stack.push(root);
  
    // Clear the visited bits
--- 753,776 ----
  private:
    Node_Backward_Iterator();
  
  public:
    // Constructor for the iterator
!   Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg);
  
    // Postincrement operator to iterate over the nodes
    Node *next();
  
  private:
    VectorSet   &_visited;
    Node_List   &_stack;
!   PhaseCFG &_cfg;
  };
  
  // Constructor for the Node_Backward_Iterator
! Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg)
!   : _visited(visited), _stack(stack), _cfg(cfg) {
    // The stack should contain exactly the root
    stack.clear();
    stack.push(root);
  
    // Clear the visited bits
*** 795,806 ****
    while( 1 ) {
  
      _visited.set(self->_idx);
  
      // Now schedule all uses as late as possible.
!     uint src     = self->is_Proj() ? self->in(0)->_idx : self->_idx;
!     uint src_rpo = _bbs[src]->_rpo;
  
      // Schedule all nodes in a post-order visit
      Node *unvisited = NULL;  // Unvisited anti-dependent Node, if any
  
      // Scan for unvisited nodes
--- 796,807 ----
    while( 1 ) {
  
      _visited.set(self->_idx);
  
      // Now schedule all uses as late as possible.
!     const Node* src = self->is_Proj() ? self->in(0) : self;
!     uint src_rpo = _cfg.get_block_for_node(src)->_rpo;
  
      // Schedule all nodes in a post-order visit
      Node *unvisited = NULL;  // Unvisited anti-dependent Node, if any
  
      // Scan for unvisited nodes
*** 812,822 ****
        if ( _visited.test(n->_idx) )
          continue;
  
        // do not traverse backward control edges
        Node *use = n->is_Proj() ? n->in(0) : n;
!       uint use_rpo = _bbs[use->_idx]->_rpo;
  
        if ( use_rpo < src_rpo )
          continue;
  
        // Phi nodes always precede uses in a basic block
--- 813,823 ----
        if ( _visited.test(n->_idx) )
          continue;
  
        // do not traverse backward control edges
        Node *use = n->is_Proj() ? n->in(0) : n;
!       uint use_rpo = _cfg.get_block_for_node(use)->_rpo;
  
        if ( use_rpo < src_rpo )
          continue;
  
        // Phi nodes always precede uses in a basic block
*** 850,860 ****
  #ifndef PRODUCT
    if (trace_opto_pipelining())
      tty->print("\n#---- ComputeLatenciesBackwards ----\n");
  #endif
  
!   Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
    Node *n;
  
    // Walk over all the nodes from last to first
    while (n = iter.next()) {
      // Set the latency for the definitions of this instruction
--- 851,861 ----
  #ifndef PRODUCT
    if (trace_opto_pipelining())
      tty->print("\n#---- ComputeLatenciesBackwards ----\n");
  #endif
  
!   Node_Backward_Iterator iter((Node *)_root, visited, stack, *this);
    Node *n;
  
    // Walk over all the nodes from last to first
    while (n = iter.next()) {
      // Set the latency for the definitions of this instruction
*** 881,891 ****
    if (n->is_Root())
      return;
  
    uint nlen = n->len();
    uint use_latency = _node_latency->at_grow(n->_idx);
!   uint use_pre_order = _bbs[n->_idx]->_pre_order;
  
    for ( uint j=0; j<nlen; j++ ) {
      Node *def = n->in(j);
  
      if (!def || def == n)
--- 882,892 ----
    if (n->is_Root())
      return;
  
    uint nlen = n->len();
    uint use_latency = _node_latency->at_grow(n->_idx);
!   uint use_pre_order = get_block_for_node(n)->_pre_order;
  
    for ( uint j=0; j<nlen; j++ ) {
      Node *def = n->in(j);
  
      if (!def || def == n)
*** 901,911 ****
        def->dump();
      }
  #endif
  
      // If the defining block is not known, assume it is ok
!     Block *def_block = _bbs[def->_idx];
      uint def_pre_order = def_block ? def_block->_pre_order : 0;
  
      if ( (use_pre_order <  def_pre_order) ||
           (use_pre_order == def_pre_order && n->is_Phi()) )
        continue;
--- 902,912 ----
        def->dump();
      }
  #endif
  
      // If the defining block is not known, assume it is ok
!     Block *def_block = get_block_for_node(def);
      uint def_pre_order = def_block ? def_block->_pre_order : 0;
  
      if ( (use_pre_order <  def_pre_order) ||
           (use_pre_order == def_pre_order && n->is_Phi()) )
        continue;
*** 929,942 ****
  
  //------------------------------latency_from_use-------------------------------
  // Compute the latency of a specific use
  int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
    // If self-reference, return no latency
!   if (use == n || use->is_Root())
      return 0;
  
!   uint def_pre_order = _bbs[def->_idx]->_pre_order;
    uint latency = 0;
  
    // If the use is not a projection, then it is simple...
    if (!use->is_Proj()) {
  #ifndef PRODUCT
--- 930,944 ----
  
  //------------------------------latency_from_use-------------------------------
  // Compute the latency of a specific use
  int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
    // If self-reference, return no latency
!   if (use == n || use->is_Root()) {
      return 0;
+   }
  
!   uint def_pre_order = get_block_for_node(def)->_pre_order;
    uint latency = 0;
  
    // If the use is not a projection, then it is simple...
    if (!use->is_Proj()) {
  #ifndef PRODUCT
*** 944,954 ****
        tty->print("#    out(): ");
        use->dump();
      }
  #endif
  
!     uint use_pre_order = _bbs[use->_idx]->_pre_order;
  
      if (use_pre_order < def_pre_order)
        return 0;
  
      if (use_pre_order == def_pre_order && use->is_Phi())
--- 946,956 ----
        tty->print("#    out(): ");
        use->dump();
      }
  #endif
  
!     uint use_pre_order = get_block_for_node(use)->_pre_order;
  
      if (use_pre_order < def_pre_order)
        return 0;
  
      if (use_pre_order == def_pre_order && use->is_Phi())
*** 1016,1026 ****
    double least_freq  = least->_freq;
    uint target        = _node_latency->at_grow(self->_idx);
    uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
    uint end_latency   = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
    bool in_latency    = (target <= start_latency);
!   const Block* root_block = _bbs[_root->_idx];
  
    // Turn off latency scheduling if scheduling is just plain off
    if (!C->do_scheduling())
      in_latency = true;
  
--- 1018,1028 ----
    double least_freq  = least->_freq;
    uint target        = _node_latency->at_grow(self->_idx);
    uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
    uint end_latency   = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
    bool in_latency    = (target <= start_latency);
!   const Block* root_block = get_block_for_node(_root);
  
    // Turn off latency scheduling if scheduling is just plain off
    if (!C->do_scheduling())
      in_latency = true;
  
*** 1124,1139 ****
  #ifndef PRODUCT
    if (trace_opto_pipelining())
      tty->print("\n#---- schedule_late ----\n");
  #endif
  
!   Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
    Node *self;
  
    // Walk over all the nodes from last to first
    while (self = iter.next()) {
!     Block* early = _bbs[self->_idx];   // Earliest legal placement
  
      if (self->is_top()) {
        // Top node goes in bb #2 with other constants.
        // It must be special-cased, because it has no out edges.
        early->add_inst(self);
--- 1126,1141 ----
  #ifndef PRODUCT
    if (trace_opto_pipelining())
      tty->print("\n#---- schedule_late ----\n");
  #endif
  
!   Node_Backward_Iterator iter((Node *)_root, visited, stack, *this);
    Node *self;
  
    // Walk over all the nodes from last to first
    while (self = iter.next()) {
!     Block* early = get_block_for_node(self); // Earliest legal placement
  
      if (self->is_top()) {
        // Top node goes in bb #2 with other constants.
        // It must be special-cased, because it has no out edges.
        early->add_inst(self);
*** 1177,1195 ****
      Block *LCA = NULL;
      {
        for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
          // For all uses, find LCA
          Node* use = self->fast_out(i);
!         LCA = raise_LCA_above_use(LCA, use, self, _bbs);
        }
      }  // (Hide defs of imax, i from rest of block.)
  
      // Place temps in the block of their use.  This isn't a
      // requirement for correctness but it reduces useless
      // interference between temps and other nodes.
      if (mach != NULL && mach->is_MachTemp()) {
!       _bbs.map(self->_idx, LCA);
        LCA->add_inst(self);
        continue;
      }
  
      // Check if 'self' could be anti-dependent on memory
--- 1179,1197 ----
      Block *LCA = NULL;
      {
        for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
          // For all uses, find LCA
          Node* use = self->fast_out(i);
!         LCA = raise_LCA_above_use(LCA, use, self, this);
        }
      }  // (Hide defs of imax, i from rest of block.)
  
      // Place temps in the block of their use.  This isn't a
      // requirement for correctness but it reduces useless
      // interference between temps and other nodes.
      if (mach != NULL && mach->is_MachTemp()) {
!       map_node_to_block(self, LCA);
        LCA->add_inst(self);
        continue;
      }
  
      // Check if 'self' could be anti-dependent on memory
*** 1260,1273 ****
    if (trace_opto_pipelining()) {
      tty->print("\n---- Start GlobalCodeMotion ----\n");
    }
  #endif
  
!   // Initialize the bbs.map for things on the proj_list
!   uint i;
!   for( i=0; i < proj_list.size(); i++ )
!     _bbs.map(proj_list[i]->_idx, NULL);
  
    // Set the basic block for Nodes pinned into blocks
    Arena *a = Thread::current()->resource_area();
    VectorSet visited(a);
    schedule_pinned_nodes( visited );
--- 1262,1275 ----
    if (trace_opto_pipelining()) {
      tty->print("\n---- Start GlobalCodeMotion ----\n");
    }
  #endif
  
!   // Initialize the node to block mapping for things on the proj_list
!   for (uint i = 0; i < proj_list.size(); i++) {
!     unmap_node_from_block(proj_list[i]);
!   }
  
    // Set the basic block for Nodes pinned into blocks
    Arena *a = Thread::current()->resource_area();
    VectorSet visited(a);
    schedule_pinned_nodes( visited );
*** 1331,1341 ****
      // Feel free to revert to a forward loop for clarity.
      // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
      for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
        Node *proj = matcher._null_check_tests[i  ];
        Node *val  = matcher._null_check_tests[i+1];
!       _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons);
        // The implicit_null_check will only perform the transformation
        // if the null branch is truly uncommon, *and* it leads to an
        // uncommon trap.  Combined with the too_many_traps guards
        // above, this prevents SEGV storms reported in 6366351,
        // by recompiling offending methods without this optimization.
--- 1333,1343 ----
      // Feel free to revert to a forward loop for clarity.
      // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
      for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
        Node *proj = matcher._null_check_tests[i  ];
        Node *val  = matcher._null_check_tests[i+1];
!       get_block_for_node(proj)->implicit_null_check(this, proj, val, allowed_reasons);
        // The implicit_null_check will only perform the transformation
        // if the null branch is truly uncommon, *and* it leads to an
        // uncommon trap.  Combined with the too_many_traps guards
        // above, this prevents SEGV storms reported in 6366351,
        // by recompiling offending methods without this optimization.
*** 1351,1373 ****
    // Schedule locally.  Right now a simple topological sort.
    // Later, do a real latency aware scheduler.
    uint max_idx = C->unique();
    GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
    visited.Clear();
!   for (i = 0; i < _num_blocks; i++) {
      if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
        if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
          C->record_method_not_compilable("local schedule failed");
        }
        return;
      }
    }
  
    // If we inserted any instructions between a Call and his CatchNode,
    // clone the instructions on all paths below the Catch.
!   for( i=0; i < _num_blocks; i++ )
!     _blocks[i]->call_catch_cleanup(_bbs, C);
  
  #ifndef PRODUCT
    if (trace_opto_pipelining()) {
      tty->print("\n---- After GlobalCodeMotion ----\n");
      for (uint i = 0; i < _num_blocks; i++) {
--- 1353,1376 ----
    // Schedule locally.  Right now a simple topological sort.
    // Later, do a real latency aware scheduler.
    uint max_idx = C->unique();
    GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
    visited.Clear();
!   for (uint i = 0; i < _num_blocks; i++) {
      if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
        if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
          C->record_method_not_compilable("local schedule failed");
        }
        return;
      }
    }
  
    // If we inserted any instructions between a Call and his CatchNode,
    // clone the instructions on all paths below the Catch.
!   for (uint i = 0; i < _num_blocks; i++) {
!     _blocks[i]->call_catch_cleanup(this, C);
!   }
  
  #ifndef PRODUCT
    if (trace_opto_pipelining()) {
      tty->print("\n---- After GlobalCodeMotion ----\n");
      for (uint i = 0; i < _num_blocks; i++) {
*** 1390,1409 ****
    // there once.
    if (C->do_freq_based_layout()) {
      Block_List worklist;
      Block* root_blk = _blocks[0];
      for (uint i = 1; i < root_blk->num_preds(); i++) {
!       Block *pb = _bbs[root_blk->pred(i)->_idx];
        if (pb->has_uncommon_code()) {
          worklist.push(pb);
        }
      }
      while (worklist.size() > 0) {
        Block* uct = worklist.pop();
        if (uct == _broot) continue;
        for (uint i = 1; i < uct->num_preds(); i++) {
!         Block *pb = _bbs[uct->pred(i)->_idx];
          if (pb->_num_succs == 1) {
            worklist.push(pb);
          } else if (pb->num_fall_throughs() == 2) {
            pb->update_uncommon_branch(uct);
          }
--- 1393,1412 ----
    // there once.
    if (C->do_freq_based_layout()) {
      Block_List worklist;
      Block* root_blk = _blocks[0];
      for (uint i = 1; i < root_blk->num_preds(); i++) {
!       Block *pb = get_block_for_node(root_blk->pred(i));
        if (pb->has_uncommon_code()) {
          worklist.push(pb);
        }
      }
      while (worklist.size() > 0) {
        Block* uct = worklist.pop();
        if (uct == _broot) continue;
        for (uint i = 1; i < uct->num_preds(); i++) {
!         Block *pb = get_block_for_node(uct->pred(i));
          if (pb->_num_succs == 1) {
            worklist.push(pb);
          } else if (pb->num_fall_throughs() == 2) {
            pb->update_uncommon_branch(uct);
          }
*** 1428,1447 ****
    // force paths ending at uncommon traps to be infrequent
    if (!C->do_freq_based_layout()) {
      Block_List worklist;
      Block* root_blk = _blocks[0];
      for (uint i = 1; i < root_blk->num_preds(); i++) {
!       Block *pb = _bbs[root_blk->pred(i)->_idx];
        if (pb->has_uncommon_code()) {
          worklist.push(pb);
        }
      }
      while (worklist.size() > 0) {
        Block* uct = worklist.pop();
        uct->_freq = PROB_MIN;
        for (uint i = 1; i < uct->num_preds(); i++) {
!         Block *pb = _bbs[uct->pred(i)->_idx];
          if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) {
            worklist.push(pb);
          }
        }
      }
--- 1431,1450 ----
    // force paths ending at uncommon traps to be infrequent
    if (!C->do_freq_based_layout()) {
      Block_List worklist;
      Block* root_blk = _blocks[0];
      for (uint i = 1; i < root_blk->num_preds(); i++) {
!       Block *pb = get_block_for_node(root_blk->pred(i));
        if (pb->has_uncommon_code()) {
          worklist.push(pb);
        }
      }
      while (worklist.size() > 0) {
        Block* uct = worklist.pop();
        uct->_freq = PROB_MIN;
        for (uint i = 1; i < uct->num_preds(); i++) {
!         Block *pb = get_block_for_node(uct->pred(i));
          if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) {
            worklist.push(pb);
          }
        }
      }
*** 1497,1507 ****
  
      if (b->head()->is_Loop()) {
        Block* loop_head = b;
        assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
        Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
!       Block* tail = _bbs[tail_n->_idx];
  
        // Defensively filter out Loop nodes for non-single-entry loops.
        // For all reasonable loops, the head occurs before the tail in RPO.
        if (i <= tail->_rpo) {
  
--- 1500,1510 ----
  
      if (b->head()->is_Loop()) {
        Block* loop_head = b;
        assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
        Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
!       Block* tail = get_block_for_node(tail_n);
  
        // Defensively filter out Loop nodes for non-single-entry loops.
        // For all reasonable loops, the head occurs before the tail in RPO.
        if (i <= tail->_rpo) {
  
*** 1512,1528 ****
          CFGLoop* nloop = new CFGLoop(idct++);
          assert(loop_head->_loop == NULL, "just checking");
          loop_head->_loop = nloop;
          // Add to nloop so push_pred() will skip over inner loops
          nloop->add_member(loop_head);
!         nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs);
  
          while (worklist.size() > 0) {
            Block* member = worklist.pop();
            if (member != loop_head) {
              for (uint j = 1; j < member->num_preds(); j++) {
!               nloop->push_pred(member, j, worklist, _bbs);
              }
            }
          }
        }
      }
--- 1515,1531 ----
          CFGLoop* nloop = new CFGLoop(idct++);
          assert(loop_head->_loop == NULL, "just checking");
          loop_head->_loop = nloop;
          // Add to nloop so push_pred() will skip over inner loops
          nloop->add_member(loop_head);
!         nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this);
  
          while (worklist.size() > 0) {
            Block* member = worklist.pop();
            if (member != loop_head) {
              for (uint j = 1; j < member->num_preds(); j++) {
!               nloop->push_pred(member, j, worklist, this);
              }
            }
          }
        }
      }
*** 1555,1567 ****
  
    return root_loop;
  }
  
  //------------------------------push_pred--------------------------------------
! void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) {
    Node* pred_n = blk->pred(i);
!   Block* pred = node_to_blk[pred_n->_idx];
    CFGLoop *pred_loop = pred->_loop;
    if (pred_loop == NULL) {
      // Filter out blocks for non-single-entry loops.
      // For all reasonable loops, the head occurs before the tail in RPO.
      if (pred->_rpo > head()->_rpo) {
--- 1558,1570 ----
  
    return root_loop;
  }
  
  //------------------------------push_pred--------------------------------------
! void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) {
    Node* pred_n = blk->pred(i);
!   Block* pred = cfg->get_block_for_node(pred_n);
    CFGLoop *pred_loop = pred->_loop;
    if (pred_loop == NULL) {
      // Filter out blocks for non-single-entry loops.
      // For all reasonable loops, the head occurs before the tail in RPO.
      if (pred->_rpo > head()->_rpo) {
*** 1578,1588 ****
        add_nested_loop(pred_loop);
        // Continue with loop entry predecessor.
        Block* pred_head = pred_loop->head();
        assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
        assert(pred_head != head(), "loop head in only one loop");
!       push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk);
      } else {
        assert(pred_loop->_parent == this && _parent == NULL, "just checking");
      }
    }
  }
--- 1581,1591 ----
        add_nested_loop(pred_loop);
        // Continue with loop entry predecessor.
        Block* pred_head = pred_loop->head();
        assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
        assert(pred_head != head(), "loop head in only one loop");
!       push_pred(pred_head, LoopNode::EntryControl, worklist, cfg);
      } else {
        assert(pred_loop->_parent == this && _parent == NULL, "just checking");
      }
    }
  }
src/share/vm/opto/gcm.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File