< prev index next >

src/share/vm/opto/gcm.cpp

Print this page




 504   assert(load->needs_anti_dependence_check(), "must be a load of some sort");
 505   assert(LCA != NULL, "");
 506   DEBUG_ONLY(Block* LCA_orig = LCA);
 507 
 508   // Compute the alias index.  Loads and stores with different alias indices
 509   // do not need anti-dependence edges.
 510   int load_alias_idx = C->get_alias_index(load->adr_type());
 511 #ifdef ASSERT
 512   if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 &&
 513       (PrintOpto || VerifyAliases ||
 514        PrintMiscellaneous && (WizardMode || Verbose))) {
 515     // Load nodes should not consume all of memory.
 516     // Reporting a bottom type indicates a bug in adlc.
 517     // If some particular type of node validly consumes all of memory,
 518     // sharpen the preceding "if" to exclude it, so we can catch bugs here.
 519     tty->print_cr("*** Possible Anti-Dependence Bug:  Load consumes all of memory.");
 520     load->dump(2);
 521     if (VerifyAliases)  assert(load_alias_idx != Compile::AliasIdxBot, "");
 522   }
 523 #endif
 524   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp),
 525          "String compare is only known 'load' that does not conflict with any stores");
 526   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals),
 527          "String equals is a 'load' that does not conflict with any stores");
 528   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf),
 529          "String indexOf is a 'load' that does not conflict with any stores");
 530   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOfChar),
 531          "String indexOfChar is a 'load' that does not conflict with any stores");
 532   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq),
 533          "Arrays equals is a 'load' that does not conflict with any stores");
 534   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_HasNegatives),
 535          "HasNegatives is a 'load' that does not conflict with any stores");
 536 
 537   if (!C->alias_type(load_alias_idx)->is_rewritable()) {
 538     // It is impossible to spoil this load by putting stores before it,
 539     // because we know that the stores will never update the value
 540     // which 'load' must witness.
 541     return LCA;
 542   }
 543 
 544   node_idx_t load_index = load->_idx;
 545 
 546   // Note the earliest legal placement of 'load', as determined by
 547   // by the unique point in the dom tree where all memory effects
 548   // and other inputs are first available.  (Computed by schedule_early.)
 549   // For normal loads, 'early' is the shallowest place (dom graph wise)
 550   // to look for anti-deps between this load and any store.
 551   Block* early = get_block_for_node(load);
 552 
 553   // If we are subsuming loads, compute an "early" block that only considers
 554   // memory or address inputs. This block may be different than the


 581   // Each of these stores is a possible definition of memory
 582   // that 'load' needs to use.  We need to force 'load'
 583   // to occur before each such store.  When the store is in
 584   // the same block as 'load', we insert an anti-dependence
 585   // edge load->store.
 586 
 587   // The relevant stores "nearby" the load consist of a tree rooted
 588   // at initial_mem, with internal nodes of type MergeMem.
 589   // Therefore, the branches visited by the worklist are of this form:
 590   //    initial_mem -> (MergeMem ->)* store
 591   // The anti-dependence constraints apply only to the fringe of this tree.
 592 
 593   Node* initial_mem = load->in(MemNode::Memory);
 594   worklist_store.push(initial_mem);
 595   worklist_visited.push(initial_mem);
 596   worklist_mem.push(NULL);
 597   while (worklist_store.size() > 0) {
 598     // Examine a nearby store to see if it might interfere with our load.
 599     Node* mem   = worklist_mem.pop();
 600     Node* store = worklist_store.pop();
 601     uint op = store->Opcode();
 602 
 603     // MergeMems do not directly have anti-deps.
 604     // Treat them as internal nodes in a forward tree of memory states,
 605     // the leaves of which are each a 'possible-def'.
 606     if (store == initial_mem    // root (exclusive) of tree we are searching
 607         || op == Op_MergeMem    // internal node of tree we are searching
 608         ) {
 609       mem = store;   // It's not a possibly interfering store.
 610       if (store == initial_mem)
 611         initial_mem = NULL;  // only process initial memory once
 612 
 613       for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
 614         store = mem->fast_out(i);
 615         if (store->is_MergeMem()) {
 616           // Be sure we don't get into combinatorial problems.
 617           // (Allow phis to be repeated; they can merge two relevant states.)
 618           uint j = worklist_visited.size();
 619           for (; j > 0; j--) {
 620             if (worklist_visited.at(j-1) == store)  break;
 621           }
 622           if (j > 0)  continue; // already on work list; do not repeat
 623           worklist_visited.push(store);
 624         }
 625         worklist_mem.push(mem);
 626         worklist_store.push(store);
 627       }
 628       continue;
 629     }
 630 
 631     if (op == Op_MachProj || op == Op_Catch)   continue;
 632     if (store->needs_anti_dependence_check())  continue;  // not really a store
 633 
 634     // Compute the alias index.  Loads and stores with different alias
 635     // indices do not need anti-dependence edges.  Wide MemBar's are
 636     // anti-dependent on everything (except immutable memories).
 637     const TypePtr* adr_type = store->adr_type();
 638     if (!C->can_alias(adr_type, load_alias_idx))  continue;
 639 
 640     // Most slow-path runtime calls do NOT modify Java memory, but
 641     // they can block and so write Raw memory.
 642     if (store->is_Mach()) {
 643       MachNode* mstore = store->as_Mach();
 644       if (load_alias_idx != Compile::AliasIdxRaw) {
 645         // Check for call into the runtime using the Java calling
 646         // convention (and from there into a wrapper); it has no
 647         // _method.  Can't do this optimization for Native calls because
 648         // they CAN write to Java memory.
 649         if (mstore->ideal_Opcode() == Op_CallStaticJava) {
 650           assert(mstore->is_MachSafePoint(), "");
 651           MachSafePointNode* ms = (MachSafePointNode*) mstore;
 652           assert(ms->is_MachCallJava(), "");
 653           MachCallJavaNode* mcj = (MachCallJavaNode*) ms;
 654           if (mcj->_method == NULL) {
 655             // These runtime calls do not write to Java visible memory
 656             // (other than Raw) and so do not require anti-dependence edges.
 657             continue;
 658           }
 659         }
 660         // Same for SafePoints: they read/write Raw but only read otherwise.
 661         // This is basically a workaround for SafePoints only defining control
 662         // instead of control + memory.
 663         if (mstore->ideal_Opcode() == Op_SafePoint)
 664           continue;
 665       } else {
 666         // Some raw memory, such as the load of "top" at an allocation,
 667         // can be control dependent on the previous safepoint. See
 668         // comments in GraphKit::allocate_heap() about control input.
 669         // Inserting an anti-dep between such a safepoint and a use
 670         // creates a cycle, and will cause a subsequent failure in
 671         // local scheduling.  (BugId 4919904)
 672         // (%%% How can a control input be a safepoint and not a projection??)
 673         if (mstore->ideal_Opcode() == Op_SafePoint && load->in(0) == mstore)
 674           continue;
 675       }
 676     }
 677 
 678     // Identify a block that the current load must be above,
 679     // or else observe that 'store' is all the way up in the
 680     // earliest legal block for 'load'.  In the latter case,
 681     // immediately insert an anti-dependence edge.
 682     Block* store_block = get_block_for_node(store);
 683     assert(store_block != NULL, "unused killing projections skipped above");
 684 
 685     if (store->is_Phi()) {
 686       // 'load' uses memory which is one (or more) of the Phi's inputs.
 687       // It must be scheduled not before the Phi, but rather before
 688       // each of the relevant Phi inputs.
 689       //
 690       // Instead of finding the LCA of all inputs to a Phi that match 'mem',
 691       // we mark each corresponding predecessor block and do a combined
 692       // hoisting operation later (raise_LCA_above_marks).
 693       //


1210     if (self->is_top()) {
1211       // Top node goes in bb #2 with other constants.
1212       // It must be special-cased, because it has no out edges.
1213       early->add_inst(self);
1214       continue;
1215     }
1216 
1217     // No uses, just terminate
1218     if (self->outcnt() == 0) {
1219       assert(self->is_MachProj(), "sanity");
1220       continue;                   // Must be a dead machine projection
1221     }
1222 
1223     // If node is pinned in the block, then no scheduling can be done.
1224     if( self->pinned() )          // Pinned in block?
1225       continue;
1226 
1227     MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
1228     if (mach) {
1229       switch (mach->ideal_Opcode()) {
1230       case Op_CreateEx:
1231         // Don't move exception creation
1232         early->add_inst(self);
1233         continue;
1234         break;
1235       case Op_CheckCastPP:
1236         // Don't move CheckCastPP nodes away from their input, if the input
1237         // is a rawptr (5071820).
1238         Node *def = self->in(1);
1239         if (def != NULL && def->bottom_type()->base() == Type::RawPtr) {
1240           early->add_inst(self);
1241 #ifdef ASSERT
1242           _raw_oops.push(def);
1243 #endif
1244           continue;
1245         }
1246         break;
1247       }
1248     }
1249 
1250     // Gather LCA of all uses
1251     Block *LCA = NULL;
1252     {
1253       for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
1254         // For all uses, find LCA
1255         Node* use = self->fast_out(i);


1280         // Retry with subsume_loads == false
1281         // If this is the first failure, the sentinel string will "stick"
1282         // to the Compile object, and the C2Compiler will see it and retry.
1283         C->record_failure(C2Compiler::retry_no_subsuming_loads());
1284       } else {
1285         // Bailout without retry when (early->_dom_depth > LCA->_dom_depth)
1286         C->record_method_not_compilable("late schedule failed: incorrect graph");
1287       }
1288       return;
1289     }
1290 
1291     // If there is no opportunity to hoist, then we're done.
1292     // In stress mode, try to hoist even the single operations.
1293     bool try_to_hoist = StressGCM || (LCA != early);
1294 
1295     // Must clone guys stay next to use; no hoisting allowed.
1296     // Also cannot hoist guys that alter memory or are otherwise not
1297     // allocatable (hoisting can make a value live longer, leading to
1298     // anti and output dependency problems which are normally resolved
1299     // by the register allocator giving everyone a different register).
1300     if (mach != NULL && must_clone[mach->ideal_Opcode()])
1301       try_to_hoist = false;
1302 
1303     Block* late = NULL;
1304     if (try_to_hoist) {
1305       // Now find the block with the least execution frequency.
1306       // Start at the latest schedule and work up to the earliest schedule
1307       // in the dominator tree.  Thus the Node will dominate all its uses.
1308       late = hoist_to_cheaper_block(LCA, early, self);
1309     } else {
1310       // Just use the LCA of the uses.
1311       late = LCA;
1312     }
1313 
1314     // Put the node into target block
1315     schedule_node_into_block(self, late);
1316 
1317 #ifdef ASSERT
1318     if (self->needs_anti_dependence_check()) {
1319       // since precedence edges are only inserted when we're sure they
1320       // are needed make sure that after placement in a block we don't


1803       BlockProbPair bpp(et, new_prob);
1804       _exits.at_put(i, bpp);
1805     }
1806 
1807     // Save the total, but guard against unreasonable probability,
1808     // as the value is used to estimate the loop trip count.
1809     // An infinite trip count would blur relative block
1810     // frequencies.
1811     if (exits_sum > 1.0f) exits_sum = 1.0;
1812     if (exits_sum < PROB_MIN) exits_sum = PROB_MIN;
1813     _exit_prob = exits_sum;
1814   }
1815 }
1816 
1817 //------------------------------succ_prob-------------------------------------
1818 // Determine the probability of reaching successor 'i' from the receiver block.
1819 float Block::succ_prob(uint i) {
1820   int eidx = end_idx();
1821   Node *n = get_node(eidx);  // Get ending Node
1822 
1823   int op = n->Opcode();
1824   if (n->is_Mach()) {
1825     if (n->is_MachNullCheck()) {
1826       // Can only reach here if called after lcm. The original Op_If is gone,
1827       // so we attempt to infer the probability from one or both of the
1828       // successor blocks.
1829       assert(_num_succs == 2, "expecting 2 successors of a null check");
1830       // If either successor has only one predecessor, then the
1831       // probability estimate can be derived using the
1832       // relative frequency of the successor and this block.
1833       if (_succs[i]->num_preds() == 2) {
1834         return _succs[i]->_freq / _freq;
1835       } else if (_succs[1-i]->num_preds() == 2) {
1836         return 1 - (_succs[1-i]->_freq / _freq);
1837       } else {
1838         // Estimate using both successor frequencies
1839         float freq = _succs[i]->_freq;
1840         return freq / (freq + _succs[1-i]->_freq);
1841       }
1842     }
1843     op = n->as_Mach()->ideal_Opcode();
1844   }
1845 
1846 
1847   // Switch on branch type
1848   switch( op ) {
1849   case Op_CountedLoopEnd:
1850   case Op_If: {
1851     assert (i < 2, "just checking");
1852     // Conditionals pass on only part of their frequency
1853     float prob  = n->as_MachIf()->_prob;
1854     assert(prob >= 0.0 && prob <= 1.0, "out of range probability");
1855     // If succ[i] is the FALSE branch, invert path info
1856     if( get_node(i + eidx + 1)->Opcode() == Op_IfFalse ) {
1857       return 1.0f - prob; // not taken
1858     } else {
1859       return prob; // taken
1860     }
1861   }
1862 
1863   case Op_Jump:
1864     // Divide the frequency between all successors evenly
1865     return 1.0f/_num_succs;
1866 
1867   case Op_Catch: {
1868     const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1869     if (ci->_con == CatchProjNode::fall_through_index) {
1870       // Fall-thru path gets the lion's share.
1871       return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs;
1872     } else {
1873       // Presume exceptional paths are equally unlikely
1874       return PROB_UNLIKELY_MAG(5);
1875     }
1876   }
1877 
1878   case Op_Root:
1879   case Op_Goto:
1880     // Pass frequency straight thru to target
1881     return 1.0f;
1882 
1883   case Op_NeverBranch:
1884     return 0.0f;
1885 
1886   case Op_TailCall:
1887   case Op_TailJump:
1888   case Op_Return:
1889   case Op_Halt:
1890   case Op_Rethrow:
1891     // Do not push out freq to root block
1892     return 0.0f;
1893 
1894   default:
1895     ShouldNotReachHere();
1896   }
1897 
1898   return 0.0f;
1899 }
1900 
1901 //------------------------------num_fall_throughs-----------------------------
1902 // Return the number of fall-through candidates for a block
1903 int Block::num_fall_throughs() {
1904   int eidx = end_idx();
1905   Node *n = get_node(eidx);  // Get ending Node
1906 
1907   int op = n->Opcode();
1908   if (n->is_Mach()) {
1909     if (n->is_MachNullCheck()) {
1910       // In theory, either side can fall-thru, for simplicity sake,
1911       // let's say only the false branch can now.
1912       return 1;
1913     }
1914     op = n->as_Mach()->ideal_Opcode();
1915   }
1916 
1917   // Switch on branch type
1918   switch( op ) {
1919   case Op_CountedLoopEnd:
1920   case Op_If:
1921     return 2;
1922 
1923   case Op_Root:
1924   case Op_Goto:
1925     return 1;
1926 
1927   case Op_Catch: {
1928     for (uint i = 0; i < _num_succs; i++) {
1929       const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1930       if (ci->_con == CatchProjNode::fall_through_index) {
1931         return 1;
1932       }
1933     }
1934     return 0;
1935   }
1936 
1937   case Op_Jump:
1938   case Op_NeverBranch:
1939   case Op_TailCall:
1940   case Op_TailJump:
1941   case Op_Return:
1942   case Op_Halt:
1943   case Op_Rethrow:
1944     return 0;
1945 
1946   default:
1947     ShouldNotReachHere();
1948   }
1949 
1950   return 0;
1951 }
1952 
1953 //------------------------------succ_fall_through-----------------------------
1954 // Return true if a specific successor could be fall-through target.
1955 bool Block::succ_fall_through(uint i) {
1956   int eidx = end_idx();
1957   Node *n = get_node(eidx);  // Get ending Node
1958 
1959   int op = n->Opcode();
1960   if (n->is_Mach()) {
1961     if (n->is_MachNullCheck()) {
1962       // In theory, either side can fall-thru, for simplicity sake,
1963       // let's say only the false branch can now.
1964       return get_node(i + eidx + 1)->Opcode() == Op_IfFalse;
1965     }
1966     op = n->as_Mach()->ideal_Opcode();
1967   }
1968 
1969   // Switch on branch type
1970   switch( op ) {
1971   case Op_CountedLoopEnd:
1972   case Op_If:
1973   case Op_Root:
1974   case Op_Goto:
1975     return true;
1976 
1977   case Op_Catch: {
1978     const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1979     return ci->_con == CatchProjNode::fall_through_index;
1980   }
1981 
1982   case Op_Jump:
1983   case Op_NeverBranch:
1984   case Op_TailCall:
1985   case Op_TailJump:
1986   case Op_Return:
1987   case Op_Halt:
1988   case Op_Rethrow:
1989     return false;
1990 
1991   default:
1992     ShouldNotReachHere();
1993   }
1994 
1995   return false;
1996 }
1997 
1998 //------------------------------update_uncommon_branch------------------------
1999 // Update the probability of a two-branch to be uncommon
2000 void Block::update_uncommon_branch(Block* ub) {
2001   int eidx = end_idx();
2002   Node *n = get_node(eidx);  // Get ending Node
2003 
2004   int op = n->as_Mach()->ideal_Opcode();
2005 
2006   assert(op == Op_CountedLoopEnd || op == Op_If, "must be a If");
2007   assert(num_fall_throughs() == 2, "must be a two way branch block");
2008 
2009   // Which successor is ub?
2010   uint s;
2011   for (s = 0; s <_num_succs; s++) {
2012     if (_succs[s] == ub) break;
2013   }
2014   assert(s < 2, "uncommon successor must be found");
2015 
2016   // If ub is the true path, make the proability small, else
2017   // ub is the false path, and make the probability large
2018   bool invert = (get_node(s + eidx + 1)->Opcode() == Op_IfFalse);
2019 
2020   // Get existing probability
2021   float p = n->as_MachIf()->_prob;
2022 
2023   if (invert) p = 1.0 - p;
2024   if (p > PROB_MIN) {
2025     p = PROB_MIN;
2026   }
2027   if (invert) p = 1.0 - p;
2028 
2029   n->as_MachIf()->_prob = p;
2030 }
2031 
2032 //------------------------------update_succ_freq-------------------------------
2033 // Update the appropriate frequency associated with block 'b', a successor of
2034 // a block in this loop.
2035 void CFGLoop::update_succ_freq(Block* b, double freq) {
2036   if (b->_loop == this) {
2037     if (b == head()) {
2038       // back branch within the loop




 504   assert(load->needs_anti_dependence_check(), "must be a load of some sort");
 505   assert(LCA != NULL, "");
 506   DEBUG_ONLY(Block* LCA_orig = LCA);
 507 
 508   // Compute the alias index.  Loads and stores with different alias indices
 509   // do not need anti-dependence edges.
 510   int load_alias_idx = C->get_alias_index(load->adr_type());
 511 #ifdef ASSERT
 512   if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 &&
 513       (PrintOpto || VerifyAliases ||
 514        PrintMiscellaneous && (WizardMode || Verbose))) {
 515     // Load nodes should not consume all of memory.
 516     // Reporting a bottom type indicates a bug in adlc.
 517     // If some particular type of node validly consumes all of memory,
 518     // sharpen the preceding "if" to exclude it, so we can catch bugs here.
 519     tty->print_cr("*** Possible Anti-Dependence Bug:  Load consumes all of memory.");
 520     load->dump(2);
 521     if (VerifyAliases)  assert(load_alias_idx != Compile::AliasIdxBot, "");
 522   }
 523 #endif
 524   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_StrComp),
 525          "String compare is only known 'load' that does not conflict with any stores");
 526   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_StrEquals),
 527          "String equals is a 'load' that does not conflict with any stores");
 528   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_StrIndexOf),
 529          "String indexOf is a 'load' that does not conflict with any stores");
 530   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_StrIndexOfChar),
 531          "String indexOfChar is a 'load' that does not conflict with any stores");
 532   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_AryEq),
 533          "Arrays equals is a 'load' that does not conflict with any stores");
 534   assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Opcodes::Op_HasNegatives),
 535          "HasNegatives is a 'load' that does not conflict with any stores");
 536 
 537   if (!C->alias_type(load_alias_idx)->is_rewritable()) {
 538     // It is impossible to spoil this load by putting stores before it,
 539     // because we know that the stores will never update the value
 540     // which 'load' must witness.
 541     return LCA;
 542   }
 543 
 544   node_idx_t load_index = load->_idx;
 545 
 546   // Note the earliest legal placement of 'load', as determined by
 547   // by the unique point in the dom tree where all memory effects
 548   // and other inputs are first available.  (Computed by schedule_early.)
 549   // For normal loads, 'early' is the shallowest place (dom graph wise)
 550   // to look for anti-deps between this load and any store.
 551   Block* early = get_block_for_node(load);
 552 
 553   // If we are subsuming loads, compute an "early" block that only considers
 554   // memory or address inputs. This block may be different than the


 581   // Each of these stores is a possible definition of memory
 582   // that 'load' needs to use.  We need to force 'load'
 583   // to occur before each such store.  When the store is in
 584   // the same block as 'load', we insert an anti-dependence
 585   // edge load->store.
 586 
 587   // The relevant stores "nearby" the load consist of a tree rooted
 588   // at initial_mem, with internal nodes of type MergeMem.
 589   // Therefore, the branches visited by the worklist are of this form:
 590   //    initial_mem -> (MergeMem ->)* store
 591   // The anti-dependence constraints apply only to the fringe of this tree.
 592 
 593   Node* initial_mem = load->in(MemNode::Memory);
 594   worklist_store.push(initial_mem);
 595   worklist_visited.push(initial_mem);
 596   worklist_mem.push(NULL);
 597   while (worklist_store.size() > 0) {
 598     // Examine a nearby store to see if it might interfere with our load.
 599     Node* mem   = worklist_mem.pop();
 600     Node* store = worklist_store.pop();
 601     Opcodes op = store->Opcode();
 602 
 603     // MergeMems do not directly have anti-deps.
 604     // Treat them as internal nodes in a forward tree of memory states,
 605     // the leaves of which are each a 'possible-def'.
 606     if (store == initial_mem    // root (exclusive) of tree we are searching
 607         || op == Opcodes::Op_MergeMem    // internal node of tree we are searching
 608         ) {
 609       mem = store;   // It's not a possibly interfering store.
 610       if (store == initial_mem)
 611         initial_mem = NULL;  // only process initial memory once
 612 
 613       for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
 614         store = mem->fast_out(i);
 615         if (store->is_MergeMem()) {
 616           // Be sure we don't get into combinatorial problems.
 617           // (Allow phis to be repeated; they can merge two relevant states.)
 618           uint j = worklist_visited.size();
 619           for (; j > 0; j--) {
 620             if (worklist_visited.at(j-1) == store)  break;
 621           }
 622           if (j > 0)  continue; // already on work list; do not repeat
 623           worklist_visited.push(store);
 624         }
 625         worklist_mem.push(mem);
 626         worklist_store.push(store);
 627       }
 628       continue;
 629     }
 630 
 631     if (op == Opcodes::Op_MachProj || op == Opcodes::Op_Catch)   continue;
 632     if (store->needs_anti_dependence_check())  continue;  // not really a store
 633 
 634     // Compute the alias index.  Loads and stores with different alias
 635     // indices do not need anti-dependence edges.  Wide MemBar's are
 636     // anti-dependent on everything (except immutable memories).
 637     const TypePtr* adr_type = store->adr_type();
 638     if (!C->can_alias(adr_type, load_alias_idx))  continue;
 639 
 640     // Most slow-path runtime calls do NOT modify Java memory, but
 641     // they can block and so write Raw memory.
 642     if (store->is_Mach()) {
 643       MachNode* mstore = store->as_Mach();
 644       if (load_alias_idx != Compile::AliasIdxRaw) {
 645         // Check for call into the runtime using the Java calling
 646         // convention (and from there into a wrapper); it has no
 647         // _method.  Can't do this optimization for Native calls because
 648         // they CAN write to Java memory.
 649         if (mstore->ideal_Opcode() == Opcodes::Op_CallStaticJava) {
 650           assert(mstore->is_MachSafePoint(), "");
 651           MachSafePointNode* ms = (MachSafePointNode*) mstore;
 652           assert(ms->is_MachCallJava(), "");
 653           MachCallJavaNode* mcj = (MachCallJavaNode*) ms;
 654           if (mcj->_method == NULL) {
 655             // These runtime calls do not write to Java visible memory
 656             // (other than Raw) and so do not require anti-dependence edges.
 657             continue;
 658           }
 659         }
 660         // Same for SafePoints: they read/write Raw but only read otherwise.
 661         // This is basically a workaround for SafePoints only defining control
 662         // instead of control + memory.
 663         if (mstore->ideal_Opcode() == Opcodes::Op_SafePoint)
 664           continue;
 665       } else {
 666         // Some raw memory, such as the load of "top" at an allocation,
 667         // can be control dependent on the previous safepoint. See
 668         // comments in GraphKit::allocate_heap() about control input.
 669         // Inserting an anti-dep between such a safepoint and a use
 670         // creates a cycle, and will cause a subsequent failure in
 671         // local scheduling.  (BugId 4919904)
 672         // (%%% How can a control input be a safepoint and not a projection??)
 673         if (mstore->ideal_Opcode() == Opcodes::Op_SafePoint && load->in(0) == mstore)
 674           continue;
 675       }
 676     }
 677 
 678     // Identify a block that the current load must be above,
 679     // or else observe that 'store' is all the way up in the
 680     // earliest legal block for 'load'.  In the latter case,
 681     // immediately insert an anti-dependence edge.
 682     Block* store_block = get_block_for_node(store);
 683     assert(store_block != NULL, "unused killing projections skipped above");
 684 
 685     if (store->is_Phi()) {
 686       // 'load' uses memory which is one (or more) of the Phi's inputs.
 687       // It must be scheduled not before the Phi, but rather before
 688       // each of the relevant Phi inputs.
 689       //
 690       // Instead of finding the LCA of all inputs to a Phi that match 'mem',
 691       // we mark each corresponding predecessor block and do a combined
 692       // hoisting operation later (raise_LCA_above_marks).
 693       //


1210     if (self->is_top()) {
1211       // Top node goes in bb #2 with other constants.
1212       // It must be special-cased, because it has no out edges.
1213       early->add_inst(self);
1214       continue;
1215     }
1216 
1217     // No uses, just terminate
1218     if (self->outcnt() == 0) {
1219       assert(self->is_MachProj(), "sanity");
1220       continue;                   // Must be a dead machine projection
1221     }
1222 
1223     // If node is pinned in the block, then no scheduling can be done.
1224     if( self->pinned() )          // Pinned in block?
1225       continue;
1226 
1227     MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
1228     if (mach) {
1229       switch (mach->ideal_Opcode()) {
1230       case Opcodes::Op_CreateEx:
1231         // Don't move exception creation
1232         early->add_inst(self);
1233         continue;
1234         break;
1235       case Opcodes::Op_CheckCastPP:
1236         // Don't move CheckCastPP nodes away from their input, if the input
1237         // is a rawptr (5071820).
1238         Node *def = self->in(1);
1239         if (def != NULL && def->bottom_type()->base() == Type::RawPtr) {
1240           early->add_inst(self);
1241 #ifdef ASSERT
1242           _raw_oops.push(def);
1243 #endif
1244           continue;
1245         }
1246         break;
1247       }
1248     }
1249 
1250     // Gather LCA of all uses
1251     Block *LCA = NULL;
1252     {
1253       for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
1254         // For all uses, find LCA
1255         Node* use = self->fast_out(i);


1280         // Retry with subsume_loads == false
1281         // If this is the first failure, the sentinel string will "stick"
1282         // to the Compile object, and the C2Compiler will see it and retry.
1283         C->record_failure(C2Compiler::retry_no_subsuming_loads());
1284       } else {
1285         // Bailout without retry when (early->_dom_depth > LCA->_dom_depth)
1286         C->record_method_not_compilable("late schedule failed: incorrect graph");
1287       }
1288       return;
1289     }
1290 
1291     // If there is no opportunity to hoist, then we're done.
1292     // In stress mode, try to hoist even the single operations.
1293     bool try_to_hoist = StressGCM || (LCA != early);
1294 
1295     // Must clone guys stay next to use; no hoisting allowed.
1296     // Also cannot hoist guys that alter memory or are otherwise not
1297     // allocatable (hoisting can make a value live longer, leading to
1298     // anti and output dependency problems which are normally resolved
1299     // by the register allocator giving everyone a different register).
1300     if (mach != NULL && must_clone[static_cast<uint>(mach->ideal_Opcode())])
1301       try_to_hoist = false;
1302 
1303     Block* late = NULL;
1304     if (try_to_hoist) {
1305       // Now find the block with the least execution frequency.
1306       // Start at the latest schedule and work up to the earliest schedule
1307       // in the dominator tree.  Thus the Node will dominate all its uses.
1308       late = hoist_to_cheaper_block(LCA, early, self);
1309     } else {
1310       // Just use the LCA of the uses.
1311       late = LCA;
1312     }
1313 
1314     // Put the node into target block
1315     schedule_node_into_block(self, late);
1316 
1317 #ifdef ASSERT
1318     if (self->needs_anti_dependence_check()) {
1319       // since precedence edges are only inserted when we're sure they
1320       // are needed make sure that after placement in a block we don't


1803       BlockProbPair bpp(et, new_prob);
1804       _exits.at_put(i, bpp);
1805     }
1806 
1807     // Save the total, but guard against unreasonable probability,
1808     // as the value is used to estimate the loop trip count.
1809     // An infinite trip count would blur relative block
1810     // frequencies.
1811     if (exits_sum > 1.0f) exits_sum = 1.0;
1812     if (exits_sum < PROB_MIN) exits_sum = PROB_MIN;
1813     _exit_prob = exits_sum;
1814   }
1815 }
1816 
1817 //------------------------------succ_prob-------------------------------------
1818 // Determine the probability of reaching successor 'i' from the receiver block.
1819 float Block::succ_prob(uint i) {
1820   int eidx = end_idx();
1821   Node *n = get_node(eidx);  // Get ending Node
1822 
1823   Opcodes op = n->Opcode();
1824   if (n->is_Mach()) {
1825     if (n->is_MachNullCheck()) {
1826       // Can only reach here if called after lcm. The original Op_If is gone,
1827       // so we attempt to infer the probability from one or both of the
1828       // successor blocks.
1829       assert(_num_succs == 2, "expecting 2 successors of a null check");
1830       // If either successor has only one predecessor, then the
1831       // probability estimate can be derived using the
1832       // relative frequency of the successor and this block.
1833       if (_succs[i]->num_preds() == 2) {
1834         return _succs[i]->_freq / _freq;
1835       } else if (_succs[1-i]->num_preds() == 2) {
1836         return 1 - (_succs[1-i]->_freq / _freq);
1837       } else {
1838         // Estimate using both successor frequencies
1839         float freq = _succs[i]->_freq;
1840         return freq / (freq + _succs[1-i]->_freq);
1841       }
1842     }
1843     op = n->as_Mach()->ideal_Opcode();
1844   }
1845 
1846 
1847   // Switch on branch type
1848   switch( op ) {
1849   case Opcodes::Op_CountedLoopEnd:
1850   case Opcodes::Op_If: {
1851     assert (i < 2, "just checking");
1852     // Conditionals pass on only part of their frequency
1853     float prob  = n->as_MachIf()->_prob;
1854     assert(prob >= 0.0 && prob <= 1.0, "out of range probability");
1855     // If succ[i] is the FALSE branch, invert path info
1856     if( get_node(i + eidx + 1)->Opcode() == Opcodes::Op_IfFalse ) {
1857       return 1.0f - prob; // not taken
1858     } else {
1859       return prob; // taken
1860     }
1861   }
1862 
1863   case Opcodes::Op_Jump:
1864     // Divide the frequency between all successors evenly
1865     return 1.0f/_num_succs;
1866 
1867   case Opcodes::Op_Catch: {
1868     const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1869     if (ci->_con == CatchProjNode::fall_through_index) {
1870       // Fall-thru path gets the lion's share.
1871       return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs;
1872     } else {
1873       // Presume exceptional paths are equally unlikely
1874       return PROB_UNLIKELY_MAG(5);
1875     }
1876   }
1877 
1878   case Opcodes::Op_Root:
1879   case Opcodes::Op_Goto:
1880     // Pass frequency straight thru to target
1881     return 1.0f;
1882 
1883   case Opcodes::Op_NeverBranch:
1884     return 0.0f;
1885 
1886   case Opcodes::Op_TailCall:
1887   case Opcodes::Op_TailJump:
1888   case Opcodes::Op_Return:
1889   case Opcodes::Op_Halt:
1890   case Opcodes::Op_Rethrow:
1891     // Do not push out freq to root block
1892     return 0.0f;
1893 
1894   default:
1895     ShouldNotReachHere();
1896   }
1897 
1898   return 0.0f;
1899 }
1900 
1901 //------------------------------num_fall_throughs-----------------------------
1902 // Return the number of fall-through candidates for a block
1903 int Block::num_fall_throughs() {
1904   int eidx = end_idx();
1905   Node *n = get_node(eidx);  // Get ending Node
1906 
1907   Opcodes op = n->Opcode();
1908   if (n->is_Mach()) {
1909     if (n->is_MachNullCheck()) {
1910       // In theory, either side can fall-thru, for simplicity sake,
1911       // let's say only the false branch can now.
1912       return 1;
1913     }
1914     op = n->as_Mach()->ideal_Opcode();
1915   }
1916 
1917   // Switch on branch type
1918   switch( op ) {
1919   case Opcodes::Op_CountedLoopEnd:
1920   case Opcodes::Op_If:
1921     return 2;
1922 
1923   case Opcodes::Op_Root:
1924   case Opcodes::Op_Goto:
1925     return 1;
1926 
1927   case Opcodes::Op_Catch: {
1928     for (uint i = 0; i < _num_succs; i++) {
1929       const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1930       if (ci->_con == CatchProjNode::fall_through_index) {
1931         return 1;
1932       }
1933     }
1934     return 0;
1935   }
1936 
1937   case Opcodes::Op_Jump:
1938   case Opcodes::Op_NeverBranch:
1939   case Opcodes::Op_TailCall:
1940   case Opcodes::Op_TailJump:
1941   case Opcodes::Op_Return:
1942   case Opcodes::Op_Halt:
1943   case Opcodes::Op_Rethrow:
1944     return 0;
1945 
1946   default:
1947     ShouldNotReachHere();
1948   }
1949 
1950   return 0;
1951 }
1952 
1953 //------------------------------succ_fall_through-----------------------------
1954 // Return true if a specific successor could be fall-through target.
1955 bool Block::succ_fall_through(uint i) {
1956   int eidx = end_idx();
1957   Node *n = get_node(eidx);  // Get ending Node
1958 
1959   Opcodes op = n->Opcode();
1960   if (n->is_Mach()) {
1961     if (n->is_MachNullCheck()) {
1962       // In theory, either side can fall-thru, for simplicity sake,
1963       // let's say only the false branch can now.
1964       return get_node(i + eidx + 1)->Opcode() == Opcodes::Op_IfFalse;
1965     }
1966     op = n->as_Mach()->ideal_Opcode();
1967   }
1968 
1969   // Switch on branch type
1970   switch( op ) {
1971   case Opcodes::Op_CountedLoopEnd:
1972   case Opcodes::Op_If:
1973   case Opcodes::Op_Root:
1974   case Opcodes::Op_Goto:
1975     return true;
1976 
1977   case Opcodes::Op_Catch: {
1978     const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
1979     return ci->_con == CatchProjNode::fall_through_index;
1980   }
1981 
1982   case Opcodes::Op_Jump:
1983   case Opcodes::Op_NeverBranch:
1984   case Opcodes::Op_TailCall:
1985   case Opcodes::Op_TailJump:
1986   case Opcodes::Op_Return:
1987   case Opcodes::Op_Halt:
1988   case Opcodes::Op_Rethrow:
1989     return false;
1990 
1991   default:
1992     ShouldNotReachHere();
1993   }
1994 
1995   return false;
1996 }
1997 
1998 //------------------------------update_uncommon_branch------------------------
1999 // Update the probability of a two-branch to be uncommon
2000 void Block::update_uncommon_branch(Block* ub) {
2001   int eidx = end_idx();
2002   Node *n = get_node(eidx);  // Get ending Node
2003 
2004   Opcodes op = n->as_Mach()->ideal_Opcode();
2005 
2006   assert(op == Opcodes::Op_CountedLoopEnd || op == Opcodes::Op_If, "must be a If");
2007   assert(num_fall_throughs() == 2, "must be a two way branch block");
2008 
2009   // Which successor is ub?
2010   uint s;
2011   for (s = 0; s <_num_succs; s++) {
2012     if (_succs[s] == ub) break;
2013   }
2014   assert(s < 2, "uncommon successor must be found");
2015 
2016   // If ub is the true path, make the proability small, else
2017   // ub is the false path, and make the probability large
2018   bool invert = (get_node(s + eidx + 1)->Opcode() == Opcodes::Op_IfFalse);
2019 
2020   // Get existing probability
2021   float p = n->as_MachIf()->_prob;
2022 
2023   if (invert) p = 1.0 - p;
2024   if (p > PROB_MIN) {
2025     p = PROB_MIN;
2026   }
2027   if (invert) p = 1.0 - p;
2028 
2029   n->as_MachIf()->_prob = p;
2030 }
2031 
2032 //------------------------------update_succ_freq-------------------------------
2033 // Update the appropriate frequency associated with block 'b', a successor of
2034 // a block in this loop.
2035 void CFGLoop::update_succ_freq(Block* b, double freq) {
2036   if (b->_loop == this) {
2037     if (b == head()) {
2038       // back branch within the loop


< prev index next >