504 assert(load->needs_anti_dependence_check(), "must be a load of some sort");
505 assert(LCA != NULL, "");
506 DEBUG_ONLY(Block* LCA_orig = LCA);
507
508 // Compute the alias index. Loads and stores with different alias indices
509 // do not need anti-dependence edges.
510 int load_alias_idx = C->get_alias_index(load->adr_type());
511 #ifdef ASSERT
512 if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 &&
513 (PrintOpto || VerifyAliases ||
514 PrintMiscellaneous && (WizardMode || Verbose))) {
515 // Load nodes should not consume all of memory.
516 // Reporting a bottom type indicates a bug in adlc.
517 // If some particular type of node validly consumes all of memory,
518 // sharpen the preceding "if" to exclude it, so we can catch bugs here.
519 tty->print_cr("*** Possible Anti-Dependence Bug: Load consumes all of memory.");
520 load->dump(2);
521 if (VerifyAliases) assert(load_alias_idx != Compile::AliasIdxBot, "");
522 }
523 #endif
524 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp),
525 "String compare is only known 'load' that does not conflict with any stores");
526 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals),
527 "String equals is a 'load' that does not conflict with any stores");
528 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf),
529 "String indexOf is a 'load' that does not conflict with any stores");
530 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOfChar),
531 "String indexOfChar is a 'load' that does not conflict with any stores");
532 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq),
533 "Arrays equals is a 'load' that does not conflict with any stores");
534 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_HasNegatives),
535 "HasNegatives is a 'load' that does not conflict with any stores");
536
537 if (!C->alias_type(load_alias_idx)->is_rewritable()) {
538 // It is impossible to spoil this load by putting stores before it,
539 // because we know that the stores will never update the value
540 // which 'load' must witness.
541 return LCA;
542 }
543
544 node_idx_t load_index = load->_idx;
545
546 // Note the earliest legal placement of 'load', as determined by
547 // by the unique point in the dom tree where all memory effects
548 // and other inputs are first available. (Computed by schedule_early.)
549 // For normal loads, 'early' is the shallowest place (dom graph wise)
550 // to look for anti-deps between this load and any store.
551 Block* early = get_block_for_node(load);
552
553 // If we are subsuming loads, compute an "early" block that only considers
554 // memory or address inputs. This block may be different than the
581 // Each of these stores is a possible definition of memory
582 // that 'load' needs to use. We need to force 'load'
583 // to occur before each such store. When the store is in
584 // the same block as 'load', we insert an anti-dependence
585 // edge load->store.
586
587 // The relevant stores "nearby" the load consist of a tree rooted
588 // at initial_mem, with internal nodes of type MergeMem.
589 // Therefore, the branches visited by the worklist are of this form:
590 // initial_mem -> (MergeMem ->)* store
591 // The anti-dependence constraints apply only to the fringe of this tree.
592
593 Node* initial_mem = load->in(MemNode::Memory);
594 worklist_store.push(initial_mem);
595 worklist_visited.push(initial_mem);
596 worklist_mem.push(NULL);
597 while (worklist_store.size() > 0) {
598 // Examine a nearby store to see if it might interfere with our load.
599 Node* mem = worklist_mem.pop();
600 Node* store = worklist_store.pop();
601 uint op = store->Opcode();
602
603 // MergeMems do not directly have anti-deps.
604 // Treat them as internal nodes in a forward tree of memory states,
605 // the leaves of which are each a 'possible-def'.
606 if (store == initial_mem // root (exclusive) of tree we are searching
607 || op == Op_MergeMem // internal node of tree we are searching
608 ) {
609 mem = store; // It's not a possibly interfering store.
610 if (store == initial_mem)
611 initial_mem = NULL; // only process initial memory once
612
613 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
614 store = mem->fast_out(i);
615 if (store->is_MergeMem()) {
616 // Be sure we don't get into combinatorial problems.
617 // (Allow phis to be repeated; they can merge two relevant states.)
618 uint j = worklist_visited.size();
619 for (; j > 0; j--) {
620 if (worklist_visited.at(j-1) == store) break;
621 }
622 if (j > 0) continue; // already on work list; do not repeat
623 worklist_visited.push(store);
624 }
625 worklist_mem.push(mem);
626 worklist_store.push(store);
627 }
628 continue;
629 }
630
631 if (op == Op_MachProj || op == Op_Catch) continue;
632 if (store->needs_anti_dependence_check()) continue; // not really a store
633
634 // Compute the alias index. Loads and stores with different alias
635 // indices do not need anti-dependence edges. Wide MemBar's are
636 // anti-dependent on everything (except immutable memories).
637 const TypePtr* adr_type = store->adr_type();
638 if (!C->can_alias(adr_type, load_alias_idx)) continue;
639
640 // Most slow-path runtime calls do NOT modify Java memory, but
641 // they can block and so write Raw memory.
642 if (store->is_Mach()) {
643 MachNode* mstore = store->as_Mach();
644 if (load_alias_idx != Compile::AliasIdxRaw) {
645 // Check for call into the runtime using the Java calling
646 // convention (and from there into a wrapper); it has no
647 // _method. Can't do this optimization for Native calls because
648 // they CAN write to Java memory.
649 if (mstore->ideal_Opcode() == Op_CallStaticJava) {
650 assert(mstore->is_MachSafePoint(), "");
651 MachSafePointNode* ms = (MachSafePointNode*) mstore;
652 assert(ms->is_MachCallJava(), "");
653 MachCallJavaNode* mcj = (MachCallJavaNode*) ms;
654 if (mcj->_method == NULL) {
655 // These runtime calls do not write to Java visible memory
656 // (other than Raw) and so do not require anti-dependence edges.
657 continue;
658 }
659 }
660 // Same for SafePoints: they read/write Raw but only read otherwise.
661 // This is basically a workaround for SafePoints only defining control
662 // instead of control + memory.
663 if (mstore->ideal_Opcode() == Op_SafePoint)
664 continue;
665 } else {
666 // Some raw memory, such as the load of "top" at an allocation,
667 // can be control dependent on the previous safepoint. See
668 // comments in GraphKit::allocate_heap() about control input.
669 // Inserting an anti-dep between such a safepoint and a use
670 // creates a cycle, and will cause a subsequent failure in
671 // local scheduling. (BugId 4919904)
672 // (%%% How can a control input be a safepoint and not a projection??)
673 if (mstore->ideal_Opcode() == Op_SafePoint && load->in(0) == mstore)
674 continue;
675 }
676 }
677
678 // Identify a block that the current load must be above,
679 // or else observe that 'store' is all the way up in the
680 // earliest legal block for 'load'. In the latter case,
681 // immediately insert an anti-dependence edge.
682 Block* store_block = get_block_for_node(store);
683 assert(store_block != NULL, "unused killing projections skipped above");
684
685 if (store->is_Phi()) {
686 // 'load' uses memory which is one (or more) of the Phi's inputs.
687 // It must be scheduled not before the Phi, but rather before
688 // each of the relevant Phi inputs.
689 //
690 // Instead of finding the LCA of all inputs to a Phi that match 'mem',
691 // we mark each corresponding predecessor block and do a combined
692 // hoisting operation later (raise_LCA_above_marks).
693 //
1210 if (self->is_top()) {
1211 // Top node goes in bb #2 with other constants.
1212 // It must be special-cased, because it has no out edges.
1213 early->add_inst(self);
1214 continue;
1215 }
1216
1217 // No uses, just terminate
1218 if (self->outcnt() == 0) {
1219 assert(self->is_MachProj(), "sanity");
1220 continue; // Must be a dead machine projection
1221 }
1222
1223 // If node is pinned in the block, then no scheduling can be done.
1224 if( self->pinned() ) // Pinned in block?
1225 continue;
1226
1227 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
1228 if (mach) {
1229 switch (mach->ideal_Opcode()) {
1230 case Op_CreateEx:
1231 // Don't move exception creation
1232 early->add_inst(self);
1233 continue;
1234 break;
1235 case Op_CheckCastPP:
1236 // Don't move CheckCastPP nodes away from their input, if the input
1237 // is a rawptr (5071820).
1238 Node *def = self->in(1);
1239 if (def != NULL && def->bottom_type()->base() == Type::RawPtr) {
1240 early->add_inst(self);
1241 #ifdef ASSERT
1242 _raw_oops.push(def);
1243 #endif
1244 continue;
1245 }
1246 break;
1247 }
1248 }
1249
1250 // Gather LCA of all uses
1251 Block *LCA = NULL;
1252 {
1253 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
1254 // For all uses, find LCA
1255 Node* use = self->fast_out(i);
1280 // Retry with subsume_loads == false
1281 // If this is the first failure, the sentinel string will "stick"
1282 // to the Compile object, and the C2Compiler will see it and retry.
1283 C->record_failure(C2Compiler::retry_no_subsuming_loads());
1284 } else {
1285 // Bailout without retry when (early->_dom_depth > LCA->_dom_depth)
1286 C->record_method_not_compilable("late schedule failed: incorrect graph");
1287 }
1288 return;
1289 }
1290
1291 // If there is no opportunity to hoist, then we're done.
1292 // In stress mode, try to hoist even the single operations.
1293 bool try_to_hoist = StressGCM || (LCA != early);
1294
1295 // Must clone guys stay next to use; no hoisting allowed.
1296 // Also cannot hoist guys that alter memory or are otherwise not
1297 // allocatable (hoisting can make a value live longer, leading to
1298 // anti and output dependency problems which are normally resolved
1299 // by the register allocator giving everyone a different register).
1300 if (mach != NULL && must_clone[mach->ideal_Opcode()])
1301 try_to_hoist = false;
1302
1303 Block* late = NULL;
1304 if (try_to_hoist) {
1305 // Now find the block with the least execution frequency.
1306 // Start at the latest schedule and work up to the earliest schedule
1307 // in the dominator tree. Thus the Node will dominate all its uses.
1308 late = hoist_to_cheaper_block(LCA, early, self);
1309 } else {
1310 // Just use the LCA of the uses.
1311 late = LCA;
1312 }
1313
1314 // Put the node into target block
1315 schedule_node_into_block(self, late);
1316
1317 #ifdef ASSERT
1318 if (self->needs_anti_dependence_check()) {
1319 // since precedence edges are only inserted when we're sure they
1320 // are needed make sure that after placement in a block we don't
1803 BlockProbPair bpp(et, new_prob);
1804 _exits.at_put(i, bpp);
1805 }
1806
1807 // Save the total, but guard against unreasonable probability,
1808 // as the value is used to estimate the loop trip count.
1809 // An infinite trip count would blur relative block
1810 // frequencies.
1811 if (exits_sum > 1.0f) exits_sum = 1.0;
1812 if (exits_sum < PROB_MIN) exits_sum = PROB_MIN;
1813 _exit_prob = exits_sum;
1814 }
1815 }
1816
1817 //------------------------------succ_prob-------------------------------------
1818 // Determine the probability of reaching successor 'i' from the receiver block.
1819 float Block::succ_prob(uint i) {
1820 int eidx = end_idx();
1821 Node *n = get_node(eidx); // Get ending Node
1822
1823 int op = n->Opcode();
1824 if (n->is_Mach()) {
1825 if (n->is_MachNullCheck()) {
1826 // Can only reach here if called after lcm. The original Op_If is gone,
1827 // so we attempt to infer the probability from one or both of the
1828 // successor blocks.
1829 assert(_num_succs == 2, "expecting 2 successors of a null check");
1830 // If either successor has only one predecessor, then the
1831 // probability estimate can be derived using the
1832 // relative frequency of the successor and this block.
1833 if (_succs[i]->num_preds() == 2) {
1834 return _succs[i]->_freq / _freq;
1835 } else if (_succs[1-i]->num_preds() == 2) {
1836 return 1 - (_succs[1-i]->_freq / _freq);
1837 } else {
1838 // Estimate using both successor frequencies
1839 float freq = _succs[i]->_freq;
1840 return freq / (freq + _succs[1-i]->_freq);
1841 }
1842 }
1843 op = n->as_Mach()->ideal_Opcode();
1844 }
1845
1846
1847 // Switch on branch type
1848 switch( op ) {
1849 case Op_CountedLoopEnd:
1850 case Op_If: {
1851 assert (i < 2, "just checking");
1852 // Conditionals pass on only part of their frequency
1853 float prob = n->as_MachIf()->_prob;
1854 assert(prob >= 0.0 && prob <= 1.0, "out of range probability");
1855 // If succ[i] is the FALSE branch, invert path info
1856 if( get_node(i + eidx + 1)->Opcode() == Op_IfFalse ) {
1857 return 1.0f - prob; // not taken
1858 } else {
1859 return prob; // taken
1860 }
1861 }
1862
1863 case Op_Jump:
1864 // Divide the frequency between all successors evenly
1865 return 1.0f/_num_succs;
1866
1867 case Op_Catch: {
1868 const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1869 if (ci->_con == CatchProjNode::fall_through_index) {
1870 // Fall-thru path gets the lion's share.
1871 return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs;
1872 } else {
1873 // Presume exceptional paths are equally unlikely
1874 return PROB_UNLIKELY_MAG(5);
1875 }
1876 }
1877
1878 case Op_Root:
1879 case Op_Goto:
1880 // Pass frequency straight thru to target
1881 return 1.0f;
1882
1883 case Op_NeverBranch:
1884 return 0.0f;
1885
1886 case Op_TailCall:
1887 case Op_TailJump:
1888 case Op_Return:
1889 case Op_Halt:
1890 case Op_Rethrow:
1891 // Do not push out freq to root block
1892 return 0.0f;
1893
1894 default:
1895 ShouldNotReachHere();
1896 }
1897
1898 return 0.0f;
1899 }
1900
1901 //------------------------------num_fall_throughs-----------------------------
1902 // Return the number of fall-through candidates for a block
1903 int Block::num_fall_throughs() {
1904 int eidx = end_idx();
1905 Node *n = get_node(eidx); // Get ending Node
1906
1907 int op = n->Opcode();
1908 if (n->is_Mach()) {
1909 if (n->is_MachNullCheck()) {
1910 // In theory, either side can fall-thru, for simplicity sake,
1911 // let's say only the false branch can now.
1912 return 1;
1913 }
1914 op = n->as_Mach()->ideal_Opcode();
1915 }
1916
1917 // Switch on branch type
1918 switch( op ) {
1919 case Op_CountedLoopEnd:
1920 case Op_If:
1921 return 2;
1922
1923 case Op_Root:
1924 case Op_Goto:
1925 return 1;
1926
1927 case Op_Catch: {
1928 for (uint i = 0; i < _num_succs; i++) {
1929 const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1930 if (ci->_con == CatchProjNode::fall_through_index) {
1931 return 1;
1932 }
1933 }
1934 return 0;
1935 }
1936
1937 case Op_Jump:
1938 case Op_NeverBranch:
1939 case Op_TailCall:
1940 case Op_TailJump:
1941 case Op_Return:
1942 case Op_Halt:
1943 case Op_Rethrow:
1944 return 0;
1945
1946 default:
1947 ShouldNotReachHere();
1948 }
1949
1950 return 0;
1951 }
1952
1953 //------------------------------succ_fall_through-----------------------------
1954 // Return true if a specific successor could be fall-through target.
1955 bool Block::succ_fall_through(uint i) {
1956 int eidx = end_idx();
1957 Node *n = get_node(eidx); // Get ending Node
1958
1959 int op = n->Opcode();
1960 if (n->is_Mach()) {
1961 if (n->is_MachNullCheck()) {
1962 // In theory, either side can fall-thru, for simplicity sake,
1963 // let's say only the false branch can now.
1964 return get_node(i + eidx + 1)->Opcode() == Op_IfFalse;
1965 }
1966 op = n->as_Mach()->ideal_Opcode();
1967 }
1968
1969 // Switch on branch type
1970 switch( op ) {
1971 case Op_CountedLoopEnd:
1972 case Op_If:
1973 case Op_Root:
1974 case Op_Goto:
1975 return true;
1976
1977 case Op_Catch: {
1978 const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1979 return ci->_con == CatchProjNode::fall_through_index;
1980 }
1981
1982 case Op_Jump:
1983 case Op_NeverBranch:
1984 case Op_TailCall:
1985 case Op_TailJump:
1986 case Op_Return:
1987 case Op_Halt:
1988 case Op_Rethrow:
1989 return false;
1990
1991 default:
1992 ShouldNotReachHere();
1993 }
1994
1995 return false;
1996 }
1997
1998 //------------------------------update_uncommon_branch------------------------
1999 // Update the probability of a two-branch to be uncommon
2000 void Block::update_uncommon_branch(Block* ub) {
2001 int eidx = end_idx();
2002 Node *n = get_node(eidx); // Get ending Node
2003
2004 int op = n->as_Mach()->ideal_Opcode();
2005
2006 assert(op == Op_CountedLoopEnd || op == Op_If, "must be a If");
2007 assert(num_fall_throughs() == 2, "must be a two way branch block");
2008
2009 // Which successor is ub?
2010 uint s;
2011 for (s = 0; s <_num_succs; s++) {
2012 if (_succs[s] == ub) break;
2013 }
2014 assert(s < 2, "uncommon successor must be found");
2015
2016 // If ub is the true path, make the proability small, else
2017 // ub is the false path, and make the probability large
2018 bool invert = (get_node(s + eidx + 1)->Opcode() == Op_IfFalse);
2019
2020 // Get existing probability
2021 float p = n->as_MachIf()->_prob;
2022
2023 if (invert) p = 1.0 - p;
2024 if (p > PROB_MIN) {
2025 p = PROB_MIN;
2026 }
2027 if (invert) p = 1.0 - p;
2028
2029 n->as_MachIf()->_prob = p;
2030 }
2031
2032 //------------------------------update_succ_freq-------------------------------
2033 // Update the appropriate frequency associated with block 'b', a successor of
2034 // a block in this loop.
2035 void CFGLoop::update_succ_freq(Block* b, double freq) {
2036 if (b->_loop == this) {
2037 if (b == head()) {
2038 // back branch within the loop
|
504 assert(load->needs_anti_dependence_check(), "must be a load of some sort");
505 assert(LCA != NULL, "");
506 DEBUG_ONLY(Block* LCA_orig = LCA);
507
508 // Compute the alias index. Loads and stores with different alias indices
509 // do not need anti-dependence edges.
510 int load_alias_idx = C->get_alias_index(load->adr_type());
511 #ifdef ASSERT
512 if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 &&
513 (PrintOpto || VerifyAliases ||
514 PrintMiscellaneous && (WizardMode || Verbose))) {
515 // Load nodes should not consume all of memory.
516 // Reporting a bottom type indicates a bug in adlc.
517 // If some particular type of node validly consumes all of memory,
518 // sharpen the preceding "if" to exclude it, so we can catch bugs here.
519 tty->print_cr("*** Possible Anti-Dependence Bug: Load consumes all of memory.");
520 load->dump(2);
521 if (VerifyAliases) assert(load_alias_idx != Compile::AliasIdxBot, "");
522 }
523 #endif
524 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_StrComp),
525 "String compare is only known 'load' that does not conflict with any stores");
526 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_StrEquals),
527 "String equals is a 'load' that does not conflict with any stores");
528 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_StrIndexOf),
529 "String indexOf is a 'load' that does not conflict with any stores");
530 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_StrIndexOfChar),
531 "String indexOfChar is a 'load' that does not conflict with any stores");
532 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_AryEq),
533 "Arrays equals is a 'load' that does not conflict with any stores");
534 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_HasNegatives),
535 "HasNegatives is a 'load' that does not conflict with any stores");
536
537 if (!C->alias_type(load_alias_idx)->is_rewritable()) {
538 // It is impossible to spoil this load by putting stores before it,
539 // because we know that the stores will never update the value
540 // which 'load' must witness.
541 return LCA;
542 }
543
544 node_idx_t load_index = load->_idx;
545
546 // Note the earliest legal placement of 'load', as determined by
547 // by the unique point in the dom tree where all memory effects
548 // and other inputs are first available. (Computed by schedule_early.)
549 // For normal loads, 'early' is the shallowest place (dom graph wise)
550 // to look for anti-deps between this load and any store.
551 Block* early = get_block_for_node(load);
552
553 // If we are subsuming loads, compute an "early" block that only considers
554 // memory or address inputs. This block may be different than the
581 // Each of these stores is a possible definition of memory
582 // that 'load' needs to use. We need to force 'load'
583 // to occur before each such store. When the store is in
584 // the same block as 'load', we insert an anti-dependence
585 // edge load->store.
586
587 // The relevant stores "nearby" the load consist of a tree rooted
588 // at initial_mem, with internal nodes of type MergeMem.
589 // Therefore, the branches visited by the worklist are of this form:
590 // initial_mem -> (MergeMem ->)* store
591 // The anti-dependence constraints apply only to the fringe of this tree.
592
593 Node* initial_mem = load->in(MemNode::Memory);
594 worklist_store.push(initial_mem);
595 worklist_visited.push(initial_mem);
596 worklist_mem.push(NULL);
597 while (worklist_store.size() > 0) {
598 // Examine a nearby store to see if it might interfere with our load.
599 Node* mem = worklist_mem.pop();
600 Node* store = worklist_store.pop();
601 Opcodes op = store->Opcode();
602
603 // MergeMems do not directly have anti-deps.
604 // Treat them as internal nodes in a forward tree of memory states,
605 // the leaves of which are each a 'possible-def'.
606 if (store == initial_mem // root (exclusive) of tree we are searching
607 || op == Opcodes::Op_MergeMem // internal node of tree we are searching
608 ) {
609 mem = store; // It's not a possibly interfering store.
610 if (store == initial_mem)
611 initial_mem = NULL; // only process initial memory once
612
613 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
614 store = mem->fast_out(i);
615 if (store->is_MergeMem()) {
616 // Be sure we don't get into combinatorial problems.
617 // (Allow phis to be repeated; they can merge two relevant states.)
618 uint j = worklist_visited.size();
619 for (; j > 0; j--) {
620 if (worklist_visited.at(j-1) == store) break;
621 }
622 if (j > 0) continue; // already on work list; do not repeat
623 worklist_visited.push(store);
624 }
625 worklist_mem.push(mem);
626 worklist_store.push(store);
627 }
628 continue;
629 }
630
631 if (op == Opcodes::Op_MachProj || op == Opcodes::Op_Catch) continue;
632 if (store->needs_anti_dependence_check()) continue; // not really a store
633
634 // Compute the alias index. Loads and stores with different alias
635 // indices do not need anti-dependence edges. Wide MemBar's are
636 // anti-dependent on everything (except immutable memories).
637 const TypePtr* adr_type = store->adr_type();
638 if (!C->can_alias(adr_type, load_alias_idx)) continue;
639
640 // Most slow-path runtime calls do NOT modify Java memory, but
641 // they can block and so write Raw memory.
642 if (store->is_Mach()) {
643 MachNode* mstore = store->as_Mach();
644 if (load_alias_idx != Compile::AliasIdxRaw) {
645 // Check for call into the runtime using the Java calling
646 // convention (and from there into a wrapper); it has no
647 // _method. Can't do this optimization for Native calls because
648 // they CAN write to Java memory.
649 if (mstore->ideal_Opcode() == Opcodes::Op_CallStaticJava) {
650 assert(mstore->is_MachSafePoint(), "");
651 MachSafePointNode* ms = (MachSafePointNode*) mstore;
652 assert(ms->is_MachCallJava(), "");
653 MachCallJavaNode* mcj = (MachCallJavaNode*) ms;
654 if (mcj->_method == NULL) {
655 // These runtime calls do not write to Java visible memory
656 // (other than Raw) and so do not require anti-dependence edges.
657 continue;
658 }
659 }
660 // Same for SafePoints: they read/write Raw but only read otherwise.
661 // This is basically a workaround for SafePoints only defining control
662 // instead of control + memory.
663 if (mstore->ideal_Opcode() == Opcodes::Op_SafePoint)
664 continue;
665 } else {
666 // Some raw memory, such as the load of "top" at an allocation,
667 // can be control dependent on the previous safepoint. See
668 // comments in GraphKit::allocate_heap() about control input.
669 // Inserting an anti-dep between such a safepoint and a use
670 // creates a cycle, and will cause a subsequent failure in
671 // local scheduling. (BugId 4919904)
672 // (%%% How can a control input be a safepoint and not a projection??)
673 if (mstore->ideal_Opcode() == Opcodes::Op_SafePoint && load->in(0) == mstore)
674 continue;
675 }
676 }
677
678 // Identify a block that the current load must be above,
679 // or else observe that 'store' is all the way up in the
680 // earliest legal block for 'load'. In the latter case,
681 // immediately insert an anti-dependence edge.
682 Block* store_block = get_block_for_node(store);
683 assert(store_block != NULL, "unused killing projections skipped above");
684
685 if (store->is_Phi()) {
686 // 'load' uses memory which is one (or more) of the Phi's inputs.
687 // It must be scheduled not before the Phi, but rather before
688 // each of the relevant Phi inputs.
689 //
690 // Instead of finding the LCA of all inputs to a Phi that match 'mem',
691 // we mark each corresponding predecessor block and do a combined
692 // hoisting operation later (raise_LCA_above_marks).
693 //
1210 if (self->is_top()) {
1211 // Top node goes in bb #2 with other constants.
1212 // It must be special-cased, because it has no out edges.
1213 early->add_inst(self);
1214 continue;
1215 }
1216
1217 // No uses, just terminate
1218 if (self->outcnt() == 0) {
1219 assert(self->is_MachProj(), "sanity");
1220 continue; // Must be a dead machine projection
1221 }
1222
1223 // If node is pinned in the block, then no scheduling can be done.
1224 if( self->pinned() ) // Pinned in block?
1225 continue;
1226
1227 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
1228 if (mach) {
1229 switch (mach->ideal_Opcode()) {
1230 case Opcodes::Op_CreateEx:
1231 // Don't move exception creation
1232 early->add_inst(self);
1233 continue;
1234 break;
1235 case Opcodes::Op_CheckCastPP:
1236 // Don't move CheckCastPP nodes away from their input, if the input
1237 // is a rawptr (5071820).
1238 Node *def = self->in(1);
1239 if (def != NULL && def->bottom_type()->base() == Type::RawPtr) {
1240 early->add_inst(self);
1241 #ifdef ASSERT
1242 _raw_oops.push(def);
1243 #endif
1244 continue;
1245 }
1246 break;
1247 }
1248 }
1249
1250 // Gather LCA of all uses
1251 Block *LCA = NULL;
1252 {
1253 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
1254 // For all uses, find LCA
1255 Node* use = self->fast_out(i);
1280 // Retry with subsume_loads == false
1281 // If this is the first failure, the sentinel string will "stick"
1282 // to the Compile object, and the C2Compiler will see it and retry.
1283 C->record_failure(C2Compiler::retry_no_subsuming_loads());
1284 } else {
1285 // Bailout without retry when (early->_dom_depth > LCA->_dom_depth)
1286 C->record_method_not_compilable("late schedule failed: incorrect graph");
1287 }
1288 return;
1289 }
1290
1291 // If there is no opportunity to hoist, then we're done.
1292 // In stress mode, try to hoist even the single operations.
1293 bool try_to_hoist = StressGCM || (LCA != early);
1294
1295 // Must clone guys stay next to use; no hoisting allowed.
1296 // Also cannot hoist guys that alter memory or are otherwise not
1297 // allocatable (hoisting can make a value live longer, leading to
1298 // anti and output dependency problems which are normally resolved
1299 // by the register allocator giving everyone a different register).
1300 if (mach != NULL && must_clone[static_cast<uint>(mach->ideal_Opcode())])
1301 try_to_hoist = false;
1302
1303 Block* late = NULL;
1304 if (try_to_hoist) {
1305 // Now find the block with the least execution frequency.
1306 // Start at the latest schedule and work up to the earliest schedule
1307 // in the dominator tree. Thus the Node will dominate all its uses.
1308 late = hoist_to_cheaper_block(LCA, early, self);
1309 } else {
1310 // Just use the LCA of the uses.
1311 late = LCA;
1312 }
1313
1314 // Put the node into target block
1315 schedule_node_into_block(self, late);
1316
1317 #ifdef ASSERT
1318 if (self->needs_anti_dependence_check()) {
1319 // since precedence edges are only inserted when we're sure they
1320 // are needed make sure that after placement in a block we don't
1803 BlockProbPair bpp(et, new_prob);
1804 _exits.at_put(i, bpp);
1805 }
1806
1807 // Save the total, but guard against unreasonable probability,
1808 // as the value is used to estimate the loop trip count.
1809 // An infinite trip count would blur relative block
1810 // frequencies.
1811 if (exits_sum > 1.0f) exits_sum = 1.0;
1812 if (exits_sum < PROB_MIN) exits_sum = PROB_MIN;
1813 _exit_prob = exits_sum;
1814 }
1815 }
1816
1817 //------------------------------succ_prob-------------------------------------
1818 // Determine the probability of reaching successor 'i' from the receiver block.
1819 float Block::succ_prob(uint i) {
1820 int eidx = end_idx();
1821 Node *n = get_node(eidx); // Get ending Node
1822
1823 Opcodes op = n->Opcode();
1824 if (n->is_Mach()) {
1825 if (n->is_MachNullCheck()) {
1826 // Can only reach here if called after lcm. The original Op_If is gone,
1827 // so we attempt to infer the probability from one or both of the
1828 // successor blocks.
1829 assert(_num_succs == 2, "expecting 2 successors of a null check");
1830 // If either successor has only one predecessor, then the
1831 // probability estimate can be derived using the
1832 // relative frequency of the successor and this block.
1833 if (_succs[i]->num_preds() == 2) {
1834 return _succs[i]->_freq / _freq;
1835 } else if (_succs[1-i]->num_preds() == 2) {
1836 return 1 - (_succs[1-i]->_freq / _freq);
1837 } else {
1838 // Estimate using both successor frequencies
1839 float freq = _succs[i]->_freq;
1840 return freq / (freq + _succs[1-i]->_freq);
1841 }
1842 }
1843 op = n->as_Mach()->ideal_Opcode();
1844 }
1845
1846
1847 // Switch on branch type
1848 switch( op ) {
1849 case Opcodes::Op_CountedLoopEnd:
1850 case Opcodes::Op_If: {
1851 assert (i < 2, "just checking");
1852 // Conditionals pass on only part of their frequency
1853 float prob = n->as_MachIf()->_prob;
1854 assert(prob >= 0.0 && prob <= 1.0, "out of range probability");
1855 // If succ[i] is the FALSE branch, invert path info
1856 if( get_node(i + eidx + 1)->Opcode() == Opcodes::Op_IfFalse ) {
1857 return 1.0f - prob; // not taken
1858 } else {
1859 return prob; // taken
1860 }
1861 }
1862
1863 case Opcodes::Op_Jump:
1864 // Divide the frequency between all successors evenly
1865 return 1.0f/_num_succs;
1866
1867 case Opcodes::Op_Catch: {
1868 const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1869 if (ci->_con == CatchProjNode::fall_through_index) {
1870 // Fall-thru path gets the lion's share.
1871 return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs;
1872 } else {
1873 // Presume exceptional paths are equally unlikely
1874 return PROB_UNLIKELY_MAG(5);
1875 }
1876 }
1877
1878 case Opcodes::Op_Root:
1879 case Opcodes::Op_Goto:
1880 // Pass frequency straight thru to target
1881 return 1.0f;
1882
1883 case Opcodes::Op_NeverBranch:
1884 return 0.0f;
1885
1886 case Opcodes::Op_TailCall:
1887 case Opcodes::Op_TailJump:
1888 case Opcodes::Op_Return:
1889 case Opcodes::Op_Halt:
1890 case Opcodes::Op_Rethrow:
1891 // Do not push out freq to root block
1892 return 0.0f;
1893
1894 default:
1895 ShouldNotReachHere();
1896 }
1897
1898 return 0.0f;
1899 }
1900
1901 //------------------------------num_fall_throughs-----------------------------
1902 // Return the number of fall-through candidates for a block
1903 int Block::num_fall_throughs() {
1904 int eidx = end_idx();
1905 Node *n = get_node(eidx); // Get ending Node
1906
1907 Opcodes op = n->Opcode();
1908 if (n->is_Mach()) {
1909 if (n->is_MachNullCheck()) {
1910 // In theory, either side can fall-thru, for simplicity sake,
1911 // let's say only the false branch can now.
1912 return 1;
1913 }
1914 op = n->as_Mach()->ideal_Opcode();
1915 }
1916
1917 // Switch on branch type
1918 switch( op ) {
1919 case Opcodes::Op_CountedLoopEnd:
1920 case Opcodes::Op_If:
1921 return 2;
1922
1923 case Opcodes::Op_Root:
1924 case Opcodes::Op_Goto:
1925 return 1;
1926
1927 case Opcodes::Op_Catch: {
1928 for (uint i = 0; i < _num_succs; i++) {
1929 const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1930 if (ci->_con == CatchProjNode::fall_through_index) {
1931 return 1;
1932 }
1933 }
1934 return 0;
1935 }
1936
1937 case Opcodes::Op_Jump:
1938 case Opcodes::Op_NeverBranch:
1939 case Opcodes::Op_TailCall:
1940 case Opcodes::Op_TailJump:
1941 case Opcodes::Op_Return:
1942 case Opcodes::Op_Halt:
1943 case Opcodes::Op_Rethrow:
1944 return 0;
1945
1946 default:
1947 ShouldNotReachHere();
1948 }
1949
1950 return 0;
1951 }
1952
1953 //------------------------------succ_fall_through-----------------------------
1954 // Return true if a specific successor could be fall-through target.
1955 bool Block::succ_fall_through(uint i) {
1956 int eidx = end_idx();
1957 Node *n = get_node(eidx); // Get ending Node
1958
1959 Opcodes op = n->Opcode();
1960 if (n->is_Mach()) {
1961 if (n->is_MachNullCheck()) {
1962 // In theory, either side can fall-thru, for simplicity sake,
1963 // let's say only the false branch can now.
1964 return get_node(i + eidx + 1)->Opcode() == Opcodes::Op_IfFalse;
1965 }
1966 op = n->as_Mach()->ideal_Opcode();
1967 }
1968
1969 // Switch on branch type
1970 switch( op ) {
1971 case Opcodes::Op_CountedLoopEnd:
1972 case Opcodes::Op_If:
1973 case Opcodes::Op_Root:
1974 case Opcodes::Op_Goto:
1975 return true;
1976
1977 case Opcodes::Op_Catch: {
1978 const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1979 return ci->_con == CatchProjNode::fall_through_index;
1980 }
1981
1982 case Opcodes::Op_Jump:
1983 case Opcodes::Op_NeverBranch:
1984 case Opcodes::Op_TailCall:
1985 case Opcodes::Op_TailJump:
1986 case Opcodes::Op_Return:
1987 case Opcodes::Op_Halt:
1988 case Opcodes::Op_Rethrow:
1989 return false;
1990
1991 default:
1992 ShouldNotReachHere();
1993 }
1994
1995 return false;
1996 }
1997
1998 //------------------------------update_uncommon_branch------------------------
1999 // Update the probability of a two-branch to be uncommon
2000 void Block::update_uncommon_branch(Block* ub) {
2001 int eidx = end_idx();
2002 Node *n = get_node(eidx); // Get ending Node
2003
2004 Opcodes op = n->as_Mach()->ideal_Opcode();
2005
2006 assert(op == Opcodes::Op_CountedLoopEnd || op == Opcodes::Op_If, "must be a If");
2007 assert(num_fall_throughs() == 2, "must be a two way branch block");
2008
2009 // Which successor is ub?
2010 uint s;
2011 for (s = 0; s <_num_succs; s++) {
2012 if (_succs[s] == ub) break;
2013 }
2014 assert(s < 2, "uncommon successor must be found");
2015
2016 // If ub is the true path, make the proability small, else
2017 // ub is the false path, and make the probability large
2018 bool invert = (get_node(s + eidx + 1)->Opcode() == Opcodes::Op_IfFalse);
2019
2020 // Get existing probability
2021 float p = n->as_MachIf()->_prob;
2022
2023 if (invert) p = 1.0 - p;
2024 if (p > PROB_MIN) {
2025 p = PROB_MIN;
2026 }
2027 if (invert) p = 1.0 - p;
2028
2029 n->as_MachIf()->_prob = p;
2030 }
2031
2032 //------------------------------update_succ_freq-------------------------------
2033 // Update the appropriate frequency associated with block 'b', a successor of
2034 // a block in this loop.
2035 void CFGLoop::update_succ_freq(Block* b, double freq) {
2036 if (b->_loop == this) {
2037 if (b == head()) {
2038 // back branch within the loop
|