6973963 Sdiff src/share/vm/opto

src/share/vm/opto/gcm.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 6973963 Sdiff src/share/vm/opto

src/share/vm/opto/gcm.cpp

Print this page

 824 #endif
 825 
 826   Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
 827   Node *n;
 828 
 829   // Walk over all the nodes from last to first
 830   while (n = iter.next()) {
 831     // Set the latency for the definitions of this instruction
 832     partial_latency_of_defs(n);
 833   }
 834 } // end ComputeLatenciesBackwards
 835 
 836 //------------------------------partial_latency_of_defs------------------------
 837 // Compute the latency impact of this node on all defs.  This computes
 838 // a number that increases as we approach the beginning of the routine.
 839 void PhaseCFG::partial_latency_of_defs(Node *n) {
 840   // Set the latency for this instruction
 841 #ifndef PRODUCT
 842   if (trace_opto_pipelining()) {
 843     tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
 844                n->_idx, _node_latency.at_grow(n->_idx));
 845     dump();
 846   }
 847 #endif
 848 
 849   if (n->is_Proj())
 850     n = n->in(0);
 851 
 852   if (n->is_Root())
 853     return;
 854 
 855   uint nlen = n->len();
 856   uint use_latency = _node_latency.at_grow(n->_idx);
 857   uint use_pre_order = _bbs[n->_idx]->_pre_order;
 858 
 859   for ( uint j=0; j<nlen; j++ ) {
 860     Node *def = n->in(j);
 861 
 862     if (!def || def == n)
 863       continue;
 864 
 865     // Walk backwards thru projections
 866     if (def->is_Proj())
 867       def = def->in(0);
 868 
 869 #ifndef PRODUCT
 870     if (trace_opto_pipelining()) {
 871       tty->print("#    in(%2d): ", j);
 872       def->dump();
 873     }
 874 #endif
 875 
 876     // If the defining block is not known, assume it is ok
 877     Block *def_block = _bbs[def->_idx];
 878     uint def_pre_order = def_block ? def_block->_pre_order : 0;
 879 
 880     if ( (use_pre_order <  def_pre_order) ||
 881          (use_pre_order == def_pre_order && n->is_Phi()) )
 882       continue;
 883 
 884     uint delta_latency = n->latency(j);
 885     uint current_latency = delta_latency + use_latency;
 886 
 887     if (_node_latency.at_grow(def->_idx) < current_latency) {
 888       _node_latency.at_put_grow(def->_idx, current_latency);
 889     }
 890 
 891 #ifndef PRODUCT
 892     if (trace_opto_pipelining()) {
 893       tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
 894                     use_latency, j, delta_latency, current_latency, def->_idx,
 895                     _node_latency.at_grow(def->_idx));
 896     }
 897 #endif
 898   }
 899 }
 900 
 901 //------------------------------latency_from_use-------------------------------
 902 // Compute the latency of a specific use
 903 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
 904   // If self-reference, return no latency
 905   if (use == n || use->is_Root())
 906     return 0;
 907 
 908   uint def_pre_order = _bbs[def->_idx]->_pre_order;
 909   uint latency = 0;
 910 
 911   // If the use is not a projection, then it is simple...
 912   if (!use->is_Proj()) {
 913 #ifndef PRODUCT
 914     if (trace_opto_pipelining()) {
 915       tty->print("#    out(): ");
 916       use->dump();
 917     }
 918 #endif
 919 
 920     uint use_pre_order = _bbs[use->_idx]->_pre_order;
 921 
 922     if (use_pre_order < def_pre_order)
 923       return 0;
 924 
 925     if (use_pre_order == def_pre_order && use->is_Phi())
 926       return 0;
 927 
 928     uint nlen = use->len();
 929     uint nl = _node_latency.at_grow(use->_idx);
 930 
 931     for ( uint j=0; j<nlen; j++ ) {
 932       if (use->in(j) == n) {
 933         // Change this if we want local latencies
 934         uint ul = use->latency(j);
 935         uint  l = ul + nl;
 936         if (latency < l) latency = l;
 937 #ifndef PRODUCT
 938         if (trace_opto_pipelining()) {
 939           tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, latency = %d",
 940                         nl, j, ul, l, latency);
 941         }
 942 #endif
 943       }
 944     }
 945   } else {
 946     // This is a projection, just grab the latency of the use(s)
 947     for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) {
 948       uint l = latency_from_use(use, def, use->fast_out(j));
 949       if (latency < l) latency = l;
 950     }
 951   }
 952 
 953   return latency;
 954 }
 955 
 956 //------------------------------latency_from_uses------------------------------
 957 // Compute the latency of this instruction relative to all of it's uses.
 958 // This computes a number that increases as we approach the beginning of the
 959 // routine.
 960 void PhaseCFG::latency_from_uses(Node *n) {
 961   // Set the latency for this instruction
 962 #ifndef PRODUCT
 963   if (trace_opto_pipelining()) {
 964     tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
 965                n->_idx, _node_latency.at_grow(n->_idx));
 966     dump();
 967   }
 968 #endif
 969   uint latency=0;
 970   const Node *def = n->is_Proj() ? n->in(0): n;
 971 
 972   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 973     uint l = latency_from_use(n, def, n->fast_out(i));
 974 
 975     if (latency < l) latency = l;
 976   }
 977 
 978   _node_latency.at_put_grow(n->_idx, latency);
 979 }
 980 
 981 //------------------------------hoist_to_cheaper_block-------------------------
 982 // Pick a block for node self, between early and LCA, that is a cheaper
 983 // alternative to LCA.
 984 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
 985   const double delta = 1+PROB_UNLIKELY_MAG(4);
 986   Block* least       = LCA;
 987   double least_freq  = least->_freq;
 988   uint target        = _node_latency.at_grow(self->_idx);
 989   uint start_latency = _node_latency.at_grow(LCA->_nodes[0]->_idx);
 990   uint end_latency   = _node_latency.at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
 991   bool in_latency    = (target <= start_latency);
 992   const Block* root_block = _bbs[_root->_idx];
 993 
 994   // Turn off latency scheduling if scheduling is just plain off
 995   if (!C->do_scheduling())
 996     in_latency = true;
 997 
 998   // Do not hoist (to cover latency) instructions which target a
 999   // single register.  Hoisting stretches the live range of the
1000   // single register and may force spilling.
1001   MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
1002   if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty())
1003     in_latency = true;
1004 
1005 #ifndef PRODUCT
1006   if (trace_opto_pipelining()) {
1007     tty->print("# Find cheaper block for latency %d: ",
1008       _node_latency.at_grow(self->_idx));
1009     self->dump();
1010     tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1011       LCA->_pre_order,
1012       LCA->_nodes[0]->_idx,
1013       start_latency,
1014       LCA->_nodes[LCA->end_idx()]->_idx,
1015       end_latency,
1016       least_freq);
1017   }
1018 #endif
1019 
1020   // Walk up the dominator tree from LCA (Lowest common ancestor) to
1021   // the earliest legal location.  Capture the least execution frequency.
1022   while (LCA != early) {
1023     LCA = LCA->_idom;         // Follow up the dominator tree
1024 
1025     if (LCA == NULL) {
1026       // Bailout without retry
1027       C->record_method_not_compilable("late schedule failed: LCA == NULL");
1028       return least;
1029     }
1030 
1031     // Don't hoist machine instructions to the root basic block
1032     if (mach && LCA == root_block)
1033       break;
1034 
1035     uint start_lat = _node_latency.at_grow(LCA->_nodes[0]->_idx);
1036     uint end_idx   = LCA->end_idx();
1037     uint end_lat   = _node_latency.at_grow(LCA->_nodes[end_idx]->_idx);
1038     double LCA_freq = LCA->_freq;
1039 #ifndef PRODUCT
1040     if (trace_opto_pipelining()) {
1041       tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1042         LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq);
1043     }
1044 #endif
1045     if (LCA_freq < least_freq              || // Better Frequency
1046         ( !in_latency                   &&    // No block containing latency
1047           LCA_freq < least_freq * delta &&    // No worse frequency
1048           target >= end_lat             &&    // within latency range
1049           !self->is_iteratively_computed() )  // But don't hoist IV increments
1050              // because they may end up above other uses of their phi forcing
1051              // their result register to be different from their input.
1052        ) {
1053       least = LCA;            // Found cheaper block
1054       least_freq = LCA_freq;
1055       start_latency = start_lat;
1056       end_latency = end_lat;
1057       if (target <= start_lat)
1058         in_latency = true;
1059     }
1060   }
1061 
1062 #ifndef PRODUCT
1063   if (trace_opto_pipelining()) {
1064     tty->print_cr("#  Choose block B%d with start latency=%d and freq=%g",
1065       least->_pre_order, start_latency, least_freq);
1066   }
1067 #endif
1068 
1069   // See if the latency needs to be updated
1070   if (target < end_latency) {
1071 #ifndef PRODUCT
1072     if (trace_opto_pipelining()) {
1073       tty->print_cr("#  Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
1074     }
1075 #endif
1076     _node_latency.at_put_grow(self->_idx, end_latency);
1077     partial_latency_of_defs(self);
1078   }
1079 
1080   return least;
1081 }
1082 
1083 
1084 //------------------------------schedule_late-----------------------------------
1085 // Now schedule all codes as LATE as possible.  This is the LCA in the
1086 // dominator tree of all USES of a value.  Pick the block with the least
1087 // loop nesting depth that is lowest in the dominator tree.
1088 extern const char must_clone[];
1089 void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) {
1090 #ifndef PRODUCT
1091   if (trace_opto_pipelining())
1092     tty->print("\n#---- schedule_late ----\n");
1093 #endif
1094 
1095   Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
1096   Node *self;

1238   schedule_pinned_nodes( visited );
1239 
1240   // Find the earliest Block any instruction can be placed in.  Some
1241   // instructions are pinned into Blocks.  Unpinned instructions can
1242   // appear in last block in which all their inputs occur.
1243   visited.Clear();
1244   Node_List stack(a);
1245   stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
1246   if (!schedule_early(visited, stack)) {
1247     // Bailout without retry
1248     C->record_method_not_compilable("early schedule failed");
1249     return;
1250   }
1251 
1252   // Build Def-Use edges.
1253   proj_list.push(_root);        // Add real root as another root
1254   proj_list.pop();
1255 
1256   // Compute the latency information (via backwards walk) for all the
1257   // instructions in the graph
1258   GrowableArray<uint> node_latency;
1259   _node_latency = node_latency;
1260 
1261   if( C->do_scheduling() )
1262     ComputeLatenciesBackwards(visited, stack);
1263 
1264   // Now schedule all codes as LATE as possible.  This is the LCA in the
1265   // dominator tree of all USES of a value.  Pick the block with the least
1266   // loop nesting depth that is lowest in the dominator tree.
1267   // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
1268   schedule_late(visited, stack);
1269   if( C->failing() ) {
1270     // schedule_late fails only when graph is incorrect.
1271     assert(!VerifyGraphEdges, "verification should have failed");
1272     return;
1273   }
1274 
1275   unique = C->unique();
1276 
1277 #ifndef PRODUCT
1278   if (trace_opto_pipelining()) {
1279     tty->print("\n---- Detect implicit null checks ----\n");

1324       if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
1325         C->record_method_not_compilable("local schedule failed");
1326       }
1327       return;
1328     }
1329   }
1330 
1331   // If we inserted any instructions between a Call and his CatchNode,
1332   // clone the instructions on all paths below the Catch.
1333   for( i=0; i < _num_blocks; i++ )
1334     _blocks[i]->call_catch_cleanup(_bbs);
1335 
1336 #ifndef PRODUCT
1337   if (trace_opto_pipelining()) {
1338     tty->print("\n---- After GlobalCodeMotion ----\n");
1339     for (uint i = 0; i < _num_blocks; i++) {
1340       _blocks[i]->dump();
1341     }
1342   }
1343 #endif


1344 }
1345 
1346 
1347 //------------------------------Estimate_Block_Frequency-----------------------
1348 // Estimate block frequencies based on IfNode probabilities.
1349 void PhaseCFG::Estimate_Block_Frequency() {
1350 
1351   // Force conditional branches leading to uncommon traps to be unlikely,
1352   // not because we get to the uncommon_trap with less relative frequency,
1353   // but because an uncommon_trap typically causes a deopt, so we only get
1354   // there once.
1355   if (C->do_freq_based_layout()) {
1356     Block_List worklist;
1357     Block* root_blk = _blocks[0];
1358     for (uint i = 1; i < root_blk->num_preds(); i++) {
1359       Block *pb = _bbs[root_blk->pred(i)->_idx];
1360       if (pb->has_uncommon_code()) {
1361         worklist.push(pb);
1362       }
1363     }

 824 #endif
 825 
 826   Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
 827   Node *n;
 828 
 829   // Walk over all the nodes from last to first
 830   while (n = iter.next()) {
 831     // Set the latency for the definitions of this instruction
 832     partial_latency_of_defs(n);
 833   }
 834 } // end ComputeLatenciesBackwards
 835 
 836 //------------------------------partial_latency_of_defs------------------------
 837 // Compute the latency impact of this node on all defs.  This computes
 838 // a number that increases as we approach the beginning of the routine.
 839 void PhaseCFG::partial_latency_of_defs(Node *n) {
 840   // Set the latency for this instruction
 841 #ifndef PRODUCT
 842   if (trace_opto_pipelining()) {
 843     tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
 844                n->_idx, _node_latency->at_grow(n->_idx));
 845     dump();
 846   }
 847 #endif
 848 
 849   if (n->is_Proj())
 850     n = n->in(0);
 851 
 852   if (n->is_Root())
 853     return;
 854 
 855   uint nlen = n->len();
 856   uint use_latency = _node_latency->at_grow(n->_idx);
 857   uint use_pre_order = _bbs[n->_idx]->_pre_order;
 858 
 859   for ( uint j=0; j<nlen; j++ ) {
 860     Node *def = n->in(j);
 861 
 862     if (!def || def == n)
 863       continue;
 864 
 865     // Walk backwards thru projections
 866     if (def->is_Proj())
 867       def = def->in(0);
 868 
 869 #ifndef PRODUCT
 870     if (trace_opto_pipelining()) {
 871       tty->print("#    in(%2d): ", j);
 872       def->dump();
 873     }
 874 #endif
 875 
 876     // If the defining block is not known, assume it is ok
 877     Block *def_block = _bbs[def->_idx];
 878     uint def_pre_order = def_block ? def_block->_pre_order : 0;
 879 
 880     if ( (use_pre_order <  def_pre_order) ||
 881          (use_pre_order == def_pre_order && n->is_Phi()) )
 882       continue;
 883 
 884     uint delta_latency = n->latency(j);
 885     uint current_latency = delta_latency + use_latency;
 886 
 887     if (_node_latency->at_grow(def->_idx) < current_latency) {
 888       _node_latency->at_put_grow(def->_idx, current_latency);
 889     }
 890 
 891 #ifndef PRODUCT
 892     if (trace_opto_pipelining()) {
 893       tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
 894                     use_latency, j, delta_latency, current_latency, def->_idx,
 895                     _node_latency->at_grow(def->_idx));
 896     }
 897 #endif
 898   }
 899 }
 900 
 901 //------------------------------latency_from_use-------------------------------
 902 // Compute the latency of a specific use
 903 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
 904   // If self-reference, return no latency
 905   if (use == n || use->is_Root())
 906     return 0;
 907 
 908   uint def_pre_order = _bbs[def->_idx]->_pre_order;
 909   uint latency = 0;
 910 
 911   // If the use is not a projection, then it is simple...
 912   if (!use->is_Proj()) {
 913 #ifndef PRODUCT
 914     if (trace_opto_pipelining()) {
 915       tty->print("#    out(): ");
 916       use->dump();
 917     }
 918 #endif
 919 
 920     uint use_pre_order = _bbs[use->_idx]->_pre_order;
 921 
 922     if (use_pre_order < def_pre_order)
 923       return 0;
 924 
 925     if (use_pre_order == def_pre_order && use->is_Phi())
 926       return 0;
 927 
 928     uint nlen = use->len();
 929     uint nl = _node_latency->at_grow(use->_idx);
 930 
 931     for ( uint j=0; j<nlen; j++ ) {
 932       if (use->in(j) == n) {
 933         // Change this if we want local latencies
 934         uint ul = use->latency(j);
 935         uint  l = ul + nl;
 936         if (latency < l) latency = l;
 937 #ifndef PRODUCT
 938         if (trace_opto_pipelining()) {
 939           tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, latency = %d",
 940                         nl, j, ul, l, latency);
 941         }
 942 #endif
 943       }
 944     }
 945   } else {
 946     // This is a projection, just grab the latency of the use(s)
 947     for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) {
 948       uint l = latency_from_use(use, def, use->fast_out(j));
 949       if (latency < l) latency = l;
 950     }
 951   }
 952 
 953   return latency;
 954 }
 955 
 956 //------------------------------latency_from_uses------------------------------
 957 // Compute the latency of this instruction relative to all of it's uses.
 958 // This computes a number that increases as we approach the beginning of the
 959 // routine.
 960 void PhaseCFG::latency_from_uses(Node *n) {
 961   // Set the latency for this instruction
 962 #ifndef PRODUCT
 963   if (trace_opto_pipelining()) {
 964     tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
 965                n->_idx, _node_latency->at_grow(n->_idx));
 966     dump();
 967   }
 968 #endif
 969   uint latency=0;
 970   const Node *def = n->is_Proj() ? n->in(0): n;
 971 
 972   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 973     uint l = latency_from_use(n, def, n->fast_out(i));
 974 
 975     if (latency < l) latency = l;
 976   }
 977 
 978   _node_latency->at_put_grow(n->_idx, latency);
 979 }
 980 
 981 //------------------------------hoist_to_cheaper_block-------------------------
 982 // Pick a block for node self, between early and LCA, that is a cheaper
 983 // alternative to LCA.
 984 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
 985   const double delta = 1+PROB_UNLIKELY_MAG(4);
 986   Block* least       = LCA;
 987   double least_freq  = least->_freq;
 988   uint target        = _node_latency->at_grow(self->_idx);
 989   uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
 990   uint end_latency   = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
 991   bool in_latency    = (target <= start_latency);
 992   const Block* root_block = _bbs[_root->_idx];
 993 
 994   // Turn off latency scheduling if scheduling is just plain off
 995   if (!C->do_scheduling())
 996     in_latency = true;
 997 
 998   // Do not hoist (to cover latency) instructions which target a
 999   // single register.  Hoisting stretches the live range of the
1000   // single register and may force spilling.
1001   MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
1002   if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty())
1003     in_latency = true;
1004 
1005 #ifndef PRODUCT
1006   if (trace_opto_pipelining()) {
1007     tty->print("# Find cheaper block for latency %d: ",
1008       _node_latency->at_grow(self->_idx));
1009     self->dump();
1010     tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1011       LCA->_pre_order,
1012       LCA->_nodes[0]->_idx,
1013       start_latency,
1014       LCA->_nodes[LCA->end_idx()]->_idx,
1015       end_latency,
1016       least_freq);
1017   }
1018 #endif
1019 
1020   // Walk up the dominator tree from LCA (Lowest common ancestor) to
1021   // the earliest legal location.  Capture the least execution frequency.
1022   while (LCA != early) {
1023     LCA = LCA->_idom;         // Follow up the dominator tree
1024 
1025     if (LCA == NULL) {
1026       // Bailout without retry
1027       C->record_method_not_compilable("late schedule failed: LCA == NULL");
1028       return least;
1029     }
1030 
1031     // Don't hoist machine instructions to the root basic block
1032     if (mach && LCA == root_block)
1033       break;
1034 
1035     uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx);
1036     uint end_idx   = LCA->end_idx();
1037     uint end_lat   = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx);
1038     double LCA_freq = LCA->_freq;
1039 #ifndef PRODUCT
1040     if (trace_opto_pipelining()) {
1041       tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1042         LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq);
1043     }
1044 #endif
1045     if (LCA_freq < least_freq              || // Better Frequency
1046         ( !in_latency                   &&    // No block containing latency
1047           LCA_freq < least_freq * delta &&    // No worse frequency
1048           target >= end_lat             &&    // within latency range
1049           !self->is_iteratively_computed() )  // But don't hoist IV increments
1050              // because they may end up above other uses of their phi forcing
1051              // their result register to be different from their input.
1052        ) {
1053       least = LCA;            // Found cheaper block
1054       least_freq = LCA_freq;
1055       start_latency = start_lat;
1056       end_latency = end_lat;
1057       if (target <= start_lat)
1058         in_latency = true;
1059     }
1060   }
1061 
1062 #ifndef PRODUCT
1063   if (trace_opto_pipelining()) {
1064     tty->print_cr("#  Choose block B%d with start latency=%d and freq=%g",
1065       least->_pre_order, start_latency, least_freq);
1066   }
1067 #endif
1068 
1069   // See if the latency needs to be updated
1070   if (target < end_latency) {
1071 #ifndef PRODUCT
1072     if (trace_opto_pipelining()) {
1073       tty->print_cr("#  Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
1074     }
1075 #endif
1076     _node_latency->at_put_grow(self->_idx, end_latency);
1077     partial_latency_of_defs(self);
1078   }
1079 
1080   return least;
1081 }
1082 
1083 
1084 //------------------------------schedule_late-----------------------------------
1085 // Now schedule all codes as LATE as possible.  This is the LCA in the
1086 // dominator tree of all USES of a value.  Pick the block with the least
1087 // loop nesting depth that is lowest in the dominator tree.
1088 extern const char must_clone[];
1089 void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) {
1090 #ifndef PRODUCT
1091   if (trace_opto_pipelining())
1092     tty->print("\n#---- schedule_late ----\n");
1093 #endif
1094 
1095   Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
1096   Node *self;

1238   schedule_pinned_nodes( visited );
1239 
1240   // Find the earliest Block any instruction can be placed in.  Some
1241   // instructions are pinned into Blocks.  Unpinned instructions can
1242   // appear in last block in which all their inputs occur.
1243   visited.Clear();
1244   Node_List stack(a);
1245   stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
1246   if (!schedule_early(visited, stack)) {
1247     // Bailout without retry
1248     C->record_method_not_compilable("early schedule failed");
1249     return;
1250   }
1251 
1252   // Build Def-Use edges.
1253   proj_list.push(_root);        // Add real root as another root
1254   proj_list.pop();
1255 
1256   // Compute the latency information (via backwards walk) for all the
1257   // instructions in the graph
1258   _node_latency = new GrowableArray<uint>(); // resource_area allocation

1259 
1260   if( C->do_scheduling() )
1261     ComputeLatenciesBackwards(visited, stack);
1262 
1263   // Now schedule all codes as LATE as possible.  This is the LCA in the
1264   // dominator tree of all USES of a value.  Pick the block with the least
1265   // loop nesting depth that is lowest in the dominator tree.
1266   // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
1267   schedule_late(visited, stack);
1268   if( C->failing() ) {
1269     // schedule_late fails only when graph is incorrect.
1270     assert(!VerifyGraphEdges, "verification should have failed");
1271     return;
1272   }
1273 
1274   unique = C->unique();
1275 
1276 #ifndef PRODUCT
1277   if (trace_opto_pipelining()) {
1278     tty->print("\n---- Detect implicit null checks ----\n");

1323       if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
1324         C->record_method_not_compilable("local schedule failed");
1325       }
1326       return;
1327     }
1328   }
1329 
1330   // If we inserted any instructions between a Call and his CatchNode,
1331   // clone the instructions on all paths below the Catch.
1332   for( i=0; i < _num_blocks; i++ )
1333     _blocks[i]->call_catch_cleanup(_bbs);
1334 
1335 #ifndef PRODUCT
1336   if (trace_opto_pipelining()) {
1337     tty->print("\n---- After GlobalCodeMotion ----\n");
1338     for (uint i = 0; i < _num_blocks; i++) {
1339       _blocks[i]->dump();
1340     }
1341   }
1342 #endif
1343   // Dead.
1344   _node_latency = (GrowableArray<uint> *)0xdeadbeef;
1345 }
1346 
1347 
1348 //------------------------------Estimate_Block_Frequency-----------------------
1349 // Estimate block frequencies based on IfNode probabilities.
1350 void PhaseCFG::Estimate_Block_Frequency() {
1351 
1352   // Force conditional branches leading to uncommon traps to be unlikely,
1353   // not because we get to the uncommon_trap with less relative frequency,
1354   // but because an uncommon_trap typically causes a deopt, so we only get
1355   // there once.
1356   if (C->do_freq_based_layout()) {
1357     Block_List worklist;
1358     Block* root_blk = _blocks[0];
1359     for (uint i = 1; i < root_blk->num_preds(); i++) {
1360       Block *pb = _bbs[root_blk->pred(i)->_idx];
1361       if (pb->has_uncommon_code()) {
1362         worklist.push(pb);
1363       }
1364     }

src/share/vm/opto/gcm.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File