824 #endif
825
826 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
827 Node *n;
828
829 // Walk over all the nodes from last to first
830 while (n = iter.next()) {
831 // Set the latency for the definitions of this instruction
832 partial_latency_of_defs(n);
833 }
834 } // end ComputeLatenciesBackwards
835
836 //------------------------------partial_latency_of_defs------------------------
837 // Compute the latency impact of this node on all defs. This computes
838 // a number that increases as we approach the beginning of the routine.
839 void PhaseCFG::partial_latency_of_defs(Node *n) {
840 // Set the latency for this instruction
841 #ifndef PRODUCT
842 if (trace_opto_pipelining()) {
843 tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
844 n->_idx, _node_latency.at_grow(n->_idx));
845 dump();
846 }
847 #endif
848
849 if (n->is_Proj())
850 n = n->in(0);
851
852 if (n->is_Root())
853 return;
854
855 uint nlen = n->len();
856 uint use_latency = _node_latency.at_grow(n->_idx);
857 uint use_pre_order = _bbs[n->_idx]->_pre_order;
858
859 for ( uint j=0; j<nlen; j++ ) {
860 Node *def = n->in(j);
861
862 if (!def || def == n)
863 continue;
864
865 // Walk backwards thru projections
866 if (def->is_Proj())
867 def = def->in(0);
868
869 #ifndef PRODUCT
870 if (trace_opto_pipelining()) {
871 tty->print("# in(%2d): ", j);
872 def->dump();
873 }
874 #endif
875
876 // If the defining block is not known, assume it is ok
877 Block *def_block = _bbs[def->_idx];
878 uint def_pre_order = def_block ? def_block->_pre_order : 0;
879
880 if ( (use_pre_order < def_pre_order) ||
881 (use_pre_order == def_pre_order && n->is_Phi()) )
882 continue;
883
884 uint delta_latency = n->latency(j);
885 uint current_latency = delta_latency + use_latency;
886
887 if (_node_latency.at_grow(def->_idx) < current_latency) {
888 _node_latency.at_put_grow(def->_idx, current_latency);
889 }
890
891 #ifndef PRODUCT
892 if (trace_opto_pipelining()) {
893 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
894 use_latency, j, delta_latency, current_latency, def->_idx,
895 _node_latency.at_grow(def->_idx));
896 }
897 #endif
898 }
899 }
900
901 //------------------------------latency_from_use-------------------------------
902 // Compute the latency of a specific use
903 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
904 // If self-reference, return no latency
905 if (use == n || use->is_Root())
906 return 0;
907
908 uint def_pre_order = _bbs[def->_idx]->_pre_order;
909 uint latency = 0;
910
911 // If the use is not a projection, then it is simple...
912 if (!use->is_Proj()) {
913 #ifndef PRODUCT
914 if (trace_opto_pipelining()) {
915 tty->print("# out(): ");
916 use->dump();
917 }
918 #endif
919
920 uint use_pre_order = _bbs[use->_idx]->_pre_order;
921
922 if (use_pre_order < def_pre_order)
923 return 0;
924
925 if (use_pre_order == def_pre_order && use->is_Phi())
926 return 0;
927
928 uint nlen = use->len();
929 uint nl = _node_latency.at_grow(use->_idx);
930
931 for ( uint j=0; j<nlen; j++ ) {
932 if (use->in(j) == n) {
933 // Change this if we want local latencies
934 uint ul = use->latency(j);
935 uint l = ul + nl;
936 if (latency < l) latency = l;
937 #ifndef PRODUCT
938 if (trace_opto_pipelining()) {
939 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, latency = %d",
940 nl, j, ul, l, latency);
941 }
942 #endif
943 }
944 }
945 } else {
946 // This is a projection, just grab the latency of the use(s)
947 for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) {
948 uint l = latency_from_use(use, def, use->fast_out(j));
949 if (latency < l) latency = l;
950 }
951 }
952
953 return latency;
954 }
955
956 //------------------------------latency_from_uses------------------------------
957 // Compute the latency of this instruction relative to all of it's uses.
958 // This computes a number that increases as we approach the beginning of the
959 // routine.
960 void PhaseCFG::latency_from_uses(Node *n) {
961 // Set the latency for this instruction
962 #ifndef PRODUCT
963 if (trace_opto_pipelining()) {
964 tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
965 n->_idx, _node_latency.at_grow(n->_idx));
966 dump();
967 }
968 #endif
969 uint latency=0;
970 const Node *def = n->is_Proj() ? n->in(0): n;
971
972 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
973 uint l = latency_from_use(n, def, n->fast_out(i));
974
975 if (latency < l) latency = l;
976 }
977
978 _node_latency.at_put_grow(n->_idx, latency);
979 }
980
981 //------------------------------hoist_to_cheaper_block-------------------------
982 // Pick a block for node self, between early and LCA, that is a cheaper
983 // alternative to LCA.
984 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
985 const double delta = 1+PROB_UNLIKELY_MAG(4);
986 Block* least = LCA;
987 double least_freq = least->_freq;
988 uint target = _node_latency.at_grow(self->_idx);
989 uint start_latency = _node_latency.at_grow(LCA->_nodes[0]->_idx);
990 uint end_latency = _node_latency.at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
991 bool in_latency = (target <= start_latency);
992 const Block* root_block = _bbs[_root->_idx];
993
994 // Turn off latency scheduling if scheduling is just plain off
995 if (!C->do_scheduling())
996 in_latency = true;
997
998 // Do not hoist (to cover latency) instructions which target a
999 // single register. Hoisting stretches the live range of the
1000 // single register and may force spilling.
1001 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
1002 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty())
1003 in_latency = true;
1004
1005 #ifndef PRODUCT
1006 if (trace_opto_pipelining()) {
1007 tty->print("# Find cheaper block for latency %d: ",
1008 _node_latency.at_grow(self->_idx));
1009 self->dump();
1010 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1011 LCA->_pre_order,
1012 LCA->_nodes[0]->_idx,
1013 start_latency,
1014 LCA->_nodes[LCA->end_idx()]->_idx,
1015 end_latency,
1016 least_freq);
1017 }
1018 #endif
1019
1020 // Walk up the dominator tree from LCA (Lowest common ancestor) to
1021 // the earliest legal location. Capture the least execution frequency.
1022 while (LCA != early) {
1023 LCA = LCA->_idom; // Follow up the dominator tree
1024
1025 if (LCA == NULL) {
1026 // Bailout without retry
1027 C->record_method_not_compilable("late schedule failed: LCA == NULL");
1028 return least;
1029 }
1030
1031 // Don't hoist machine instructions to the root basic block
1032 if (mach && LCA == root_block)
1033 break;
1034
1035 uint start_lat = _node_latency.at_grow(LCA->_nodes[0]->_idx);
1036 uint end_idx = LCA->end_idx();
1037 uint end_lat = _node_latency.at_grow(LCA->_nodes[end_idx]->_idx);
1038 double LCA_freq = LCA->_freq;
1039 #ifndef PRODUCT
1040 if (trace_opto_pipelining()) {
1041 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1042 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq);
1043 }
1044 #endif
1045 if (LCA_freq < least_freq || // Better Frequency
1046 ( !in_latency && // No block containing latency
1047 LCA_freq < least_freq * delta && // No worse frequency
1048 target >= end_lat && // within latency range
1049 !self->is_iteratively_computed() ) // But don't hoist IV increments
1050 // because they may end up above other uses of their phi forcing
1051 // their result register to be different from their input.
1052 ) {
1053 least = LCA; // Found cheaper block
1054 least_freq = LCA_freq;
1055 start_latency = start_lat;
1056 end_latency = end_lat;
1057 if (target <= start_lat)
1058 in_latency = true;
1059 }
1060 }
1061
1062 #ifndef PRODUCT
1063 if (trace_opto_pipelining()) {
1064 tty->print_cr("# Choose block B%d with start latency=%d and freq=%g",
1065 least->_pre_order, start_latency, least_freq);
1066 }
1067 #endif
1068
1069 // See if the latency needs to be updated
1070 if (target < end_latency) {
1071 #ifndef PRODUCT
1072 if (trace_opto_pipelining()) {
1073 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
1074 }
1075 #endif
1076 _node_latency.at_put_grow(self->_idx, end_latency);
1077 partial_latency_of_defs(self);
1078 }
1079
1080 return least;
1081 }
1082
1083
1084 //------------------------------schedule_late-----------------------------------
1085 // Now schedule all codes as LATE as possible. This is the LCA in the
1086 // dominator tree of all USES of a value. Pick the block with the least
1087 // loop nesting depth that is lowest in the dominator tree.
1088 extern const char must_clone[];
1089 void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) {
1090 #ifndef PRODUCT
1091 if (trace_opto_pipelining())
1092 tty->print("\n#---- schedule_late ----\n");
1093 #endif
1094
1095 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
1096 Node *self;
1238 schedule_pinned_nodes( visited );
1239
1240 // Find the earliest Block any instruction can be placed in. Some
1241 // instructions are pinned into Blocks. Unpinned instructions can
1242 // appear in last block in which all their inputs occur.
1243 visited.Clear();
1244 Node_List stack(a);
1245 stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
1246 if (!schedule_early(visited, stack)) {
1247 // Bailout without retry
1248 C->record_method_not_compilable("early schedule failed");
1249 return;
1250 }
1251
1252 // Build Def-Use edges.
1253 proj_list.push(_root); // Add real root as another root
1254 proj_list.pop();
1255
1256 // Compute the latency information (via backwards walk) for all the
1257 // instructions in the graph
1258 GrowableArray<uint> node_latency;
1259 _node_latency = node_latency;
1260
1261 if( C->do_scheduling() )
1262 ComputeLatenciesBackwards(visited, stack);
1263
1264 // Now schedule all codes as LATE as possible. This is the LCA in the
1265 // dominator tree of all USES of a value. Pick the block with the least
1266 // loop nesting depth that is lowest in the dominator tree.
1267 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
1268 schedule_late(visited, stack);
1269 if( C->failing() ) {
1270 // schedule_late fails only when graph is incorrect.
1271 assert(!VerifyGraphEdges, "verification should have failed");
1272 return;
1273 }
1274
1275 unique = C->unique();
1276
1277 #ifndef PRODUCT
1278 if (trace_opto_pipelining()) {
1279 tty->print("\n---- Detect implicit null checks ----\n");
1324 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
1325 C->record_method_not_compilable("local schedule failed");
1326 }
1327 return;
1328 }
1329 }
1330
1331 // If we inserted any instructions between a Call and his CatchNode,
1332 // clone the instructions on all paths below the Catch.
1333 for( i=0; i < _num_blocks; i++ )
1334 _blocks[i]->call_catch_cleanup(_bbs);
1335
1336 #ifndef PRODUCT
1337 if (trace_opto_pipelining()) {
1338 tty->print("\n---- After GlobalCodeMotion ----\n");
1339 for (uint i = 0; i < _num_blocks; i++) {
1340 _blocks[i]->dump();
1341 }
1342 }
1343 #endif
1344 }
1345
1346
1347 //------------------------------Estimate_Block_Frequency-----------------------
1348 // Estimate block frequencies based on IfNode probabilities.
1349 void PhaseCFG::Estimate_Block_Frequency() {
1350
1351 // Force conditional branches leading to uncommon traps to be unlikely,
1352 // not because we get to the uncommon_trap with less relative frequency,
1353 // but because an uncommon_trap typically causes a deopt, so we only get
1354 // there once.
1355 if (C->do_freq_based_layout()) {
1356 Block_List worklist;
1357 Block* root_blk = _blocks[0];
1358 for (uint i = 1; i < root_blk->num_preds(); i++) {
1359 Block *pb = _bbs[root_blk->pred(i)->_idx];
1360 if (pb->has_uncommon_code()) {
1361 worklist.push(pb);
1362 }
1363 }
|
824 #endif
825
826 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
827 Node *n;
828
829 // Walk over all the nodes from last to first
830 while (n = iter.next()) {
831 // Set the latency for the definitions of this instruction
832 partial_latency_of_defs(n);
833 }
834 } // end ComputeLatenciesBackwards
835
836 //------------------------------partial_latency_of_defs------------------------
837 // Compute the latency impact of this node on all defs. This computes
838 // a number that increases as we approach the beginning of the routine.
839 void PhaseCFG::partial_latency_of_defs(Node *n) {
840 // Set the latency for this instruction
841 #ifndef PRODUCT
842 if (trace_opto_pipelining()) {
843 tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
844 n->_idx, _node_latency->at_grow(n->_idx));
845 dump();
846 }
847 #endif
848
849 if (n->is_Proj())
850 n = n->in(0);
851
852 if (n->is_Root())
853 return;
854
855 uint nlen = n->len();
856 uint use_latency = _node_latency->at_grow(n->_idx);
857 uint use_pre_order = _bbs[n->_idx]->_pre_order;
858
859 for ( uint j=0; j<nlen; j++ ) {
860 Node *def = n->in(j);
861
862 if (!def || def == n)
863 continue;
864
865 // Walk backwards thru projections
866 if (def->is_Proj())
867 def = def->in(0);
868
869 #ifndef PRODUCT
870 if (trace_opto_pipelining()) {
871 tty->print("# in(%2d): ", j);
872 def->dump();
873 }
874 #endif
875
876 // If the defining block is not known, assume it is ok
877 Block *def_block = _bbs[def->_idx];
878 uint def_pre_order = def_block ? def_block->_pre_order : 0;
879
880 if ( (use_pre_order < def_pre_order) ||
881 (use_pre_order == def_pre_order && n->is_Phi()) )
882 continue;
883
884 uint delta_latency = n->latency(j);
885 uint current_latency = delta_latency + use_latency;
886
887 if (_node_latency->at_grow(def->_idx) < current_latency) {
888 _node_latency->at_put_grow(def->_idx, current_latency);
889 }
890
891 #ifndef PRODUCT
892 if (trace_opto_pipelining()) {
893 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
894 use_latency, j, delta_latency, current_latency, def->_idx,
895 _node_latency->at_grow(def->_idx));
896 }
897 #endif
898 }
899 }
900
901 //------------------------------latency_from_use-------------------------------
902 // Compute the latency of a specific use
903 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
904 // If self-reference, return no latency
905 if (use == n || use->is_Root())
906 return 0;
907
908 uint def_pre_order = _bbs[def->_idx]->_pre_order;
909 uint latency = 0;
910
911 // If the use is not a projection, then it is simple...
912 if (!use->is_Proj()) {
913 #ifndef PRODUCT
914 if (trace_opto_pipelining()) {
915 tty->print("# out(): ");
916 use->dump();
917 }
918 #endif
919
920 uint use_pre_order = _bbs[use->_idx]->_pre_order;
921
922 if (use_pre_order < def_pre_order)
923 return 0;
924
925 if (use_pre_order == def_pre_order && use->is_Phi())
926 return 0;
927
928 uint nlen = use->len();
929 uint nl = _node_latency->at_grow(use->_idx);
930
931 for ( uint j=0; j<nlen; j++ ) {
932 if (use->in(j) == n) {
933 // Change this if we want local latencies
934 uint ul = use->latency(j);
935 uint l = ul + nl;
936 if (latency < l) latency = l;
937 #ifndef PRODUCT
938 if (trace_opto_pipelining()) {
939 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, latency = %d",
940 nl, j, ul, l, latency);
941 }
942 #endif
943 }
944 }
945 } else {
946 // This is a projection, just grab the latency of the use(s)
947 for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) {
948 uint l = latency_from_use(use, def, use->fast_out(j));
949 if (latency < l) latency = l;
950 }
951 }
952
953 return latency;
954 }
955
956 //------------------------------latency_from_uses------------------------------
957 // Compute the latency of this instruction relative to all of it's uses.
958 // This computes a number that increases as we approach the beginning of the
959 // routine.
960 void PhaseCFG::latency_from_uses(Node *n) {
961 // Set the latency for this instruction
962 #ifndef PRODUCT
963 if (trace_opto_pipelining()) {
964 tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
965 n->_idx, _node_latency->at_grow(n->_idx));
966 dump();
967 }
968 #endif
969 uint latency=0;
970 const Node *def = n->is_Proj() ? n->in(0): n;
971
972 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
973 uint l = latency_from_use(n, def, n->fast_out(i));
974
975 if (latency < l) latency = l;
976 }
977
978 _node_latency->at_put_grow(n->_idx, latency);
979 }
980
981 //------------------------------hoist_to_cheaper_block-------------------------
982 // Pick a block for node self, between early and LCA, that is a cheaper
983 // alternative to LCA.
984 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
985 const double delta = 1+PROB_UNLIKELY_MAG(4);
986 Block* least = LCA;
987 double least_freq = least->_freq;
988 uint target = _node_latency->at_grow(self->_idx);
989 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
990 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
991 bool in_latency = (target <= start_latency);
992 const Block* root_block = _bbs[_root->_idx];
993
994 // Turn off latency scheduling if scheduling is just plain off
995 if (!C->do_scheduling())
996 in_latency = true;
997
998 // Do not hoist (to cover latency) instructions which target a
999 // single register. Hoisting stretches the live range of the
1000 // single register and may force spilling.
1001 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
1002 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty())
1003 in_latency = true;
1004
1005 #ifndef PRODUCT
1006 if (trace_opto_pipelining()) {
1007 tty->print("# Find cheaper block for latency %d: ",
1008 _node_latency->at_grow(self->_idx));
1009 self->dump();
1010 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1011 LCA->_pre_order,
1012 LCA->_nodes[0]->_idx,
1013 start_latency,
1014 LCA->_nodes[LCA->end_idx()]->_idx,
1015 end_latency,
1016 least_freq);
1017 }
1018 #endif
1019
1020 // Walk up the dominator tree from LCA (Lowest common ancestor) to
1021 // the earliest legal location. Capture the least execution frequency.
1022 while (LCA != early) {
1023 LCA = LCA->_idom; // Follow up the dominator tree
1024
1025 if (LCA == NULL) {
1026 // Bailout without retry
1027 C->record_method_not_compilable("late schedule failed: LCA == NULL");
1028 return least;
1029 }
1030
1031 // Don't hoist machine instructions to the root basic block
1032 if (mach && LCA == root_block)
1033 break;
1034
1035 uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx);
1036 uint end_idx = LCA->end_idx();
1037 uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx);
1038 double LCA_freq = LCA->_freq;
1039 #ifndef PRODUCT
1040 if (trace_opto_pipelining()) {
1041 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1042 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq);
1043 }
1044 #endif
1045 if (LCA_freq < least_freq || // Better Frequency
1046 ( !in_latency && // No block containing latency
1047 LCA_freq < least_freq * delta && // No worse frequency
1048 target >= end_lat && // within latency range
1049 !self->is_iteratively_computed() ) // But don't hoist IV increments
1050 // because they may end up above other uses of their phi forcing
1051 // their result register to be different from their input.
1052 ) {
1053 least = LCA; // Found cheaper block
1054 least_freq = LCA_freq;
1055 start_latency = start_lat;
1056 end_latency = end_lat;
1057 if (target <= start_lat)
1058 in_latency = true;
1059 }
1060 }
1061
1062 #ifndef PRODUCT
1063 if (trace_opto_pipelining()) {
1064 tty->print_cr("# Choose block B%d with start latency=%d and freq=%g",
1065 least->_pre_order, start_latency, least_freq);
1066 }
1067 #endif
1068
1069 // See if the latency needs to be updated
1070 if (target < end_latency) {
1071 #ifndef PRODUCT
1072 if (trace_opto_pipelining()) {
1073 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
1074 }
1075 #endif
1076 _node_latency->at_put_grow(self->_idx, end_latency);
1077 partial_latency_of_defs(self);
1078 }
1079
1080 return least;
1081 }
1082
1083
1084 //------------------------------schedule_late-----------------------------------
1085 // Now schedule all codes as LATE as possible. This is the LCA in the
1086 // dominator tree of all USES of a value. Pick the block with the least
1087 // loop nesting depth that is lowest in the dominator tree.
1088 extern const char must_clone[];
1089 void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) {
1090 #ifndef PRODUCT
1091 if (trace_opto_pipelining())
1092 tty->print("\n#---- schedule_late ----\n");
1093 #endif
1094
1095 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
1096 Node *self;
1238 schedule_pinned_nodes( visited );
1239
1240 // Find the earliest Block any instruction can be placed in. Some
1241 // instructions are pinned into Blocks. Unpinned instructions can
1242 // appear in last block in which all their inputs occur.
1243 visited.Clear();
1244 Node_List stack(a);
1245 stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
1246 if (!schedule_early(visited, stack)) {
1247 // Bailout without retry
1248 C->record_method_not_compilable("early schedule failed");
1249 return;
1250 }
1251
1252 // Build Def-Use edges.
1253 proj_list.push(_root); // Add real root as another root
1254 proj_list.pop();
1255
1256 // Compute the latency information (via backwards walk) for all the
1257 // instructions in the graph
1258 _node_latency = new GrowableArray<uint>(); // resource_area allocation
1259
1260 if( C->do_scheduling() )
1261 ComputeLatenciesBackwards(visited, stack);
1262
1263 // Now schedule all codes as LATE as possible. This is the LCA in the
1264 // dominator tree of all USES of a value. Pick the block with the least
1265 // loop nesting depth that is lowest in the dominator tree.
1266 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
1267 schedule_late(visited, stack);
1268 if( C->failing() ) {
1269 // schedule_late fails only when graph is incorrect.
1270 assert(!VerifyGraphEdges, "verification should have failed");
1271 return;
1272 }
1273
1274 unique = C->unique();
1275
1276 #ifndef PRODUCT
1277 if (trace_opto_pipelining()) {
1278 tty->print("\n---- Detect implicit null checks ----\n");
1323 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
1324 C->record_method_not_compilable("local schedule failed");
1325 }
1326 return;
1327 }
1328 }
1329
1330 // If we inserted any instructions between a Call and his CatchNode,
1331 // clone the instructions on all paths below the Catch.
1332 for( i=0; i < _num_blocks; i++ )
1333 _blocks[i]->call_catch_cleanup(_bbs);
1334
1335 #ifndef PRODUCT
1336 if (trace_opto_pipelining()) {
1337 tty->print("\n---- After GlobalCodeMotion ----\n");
1338 for (uint i = 0; i < _num_blocks; i++) {
1339 _blocks[i]->dump();
1340 }
1341 }
1342 #endif
1343 // Dead.
1344 _node_latency = (GrowableArray<uint> *)0xdeadbeef;
1345 }
1346
1347
1348 //------------------------------Estimate_Block_Frequency-----------------------
1349 // Estimate block frequencies based on IfNode probabilities.
1350 void PhaseCFG::Estimate_Block_Frequency() {
1351
1352 // Force conditional branches leading to uncommon traps to be unlikely,
1353 // not because we get to the uncommon_trap with less relative frequency,
1354 // but because an uncommon_trap typically causes a deopt, so we only get
1355 // there once.
1356 if (C->do_freq_based_layout()) {
1357 Block_List worklist;
1358 Block* root_blk = _blocks[0];
1359 for (uint i = 1; i < root_blk->num_preds(); i++) {
1360 Block *pb = _bbs[root_blk->pred(i)->_idx];
1361 if (pb->has_uncommon_code()) {
1362 worklist.push(pb);
1363 }
1364 }
|