< prev index next >

src/hotspot/share/opto/macro.cpp

Print this page
rev 51766 : 8210829: Modularize allocations in C2


1290     transform_later(toobig_iff);
1291     // Plug the failing-too-big test into the slow-path region
1292     Node *toobig_true = new IfTrueNode( toobig_iff );
1293     transform_later(toobig_true);
1294     slow_region    ->init_req( too_big_or_final_path, toobig_true );
1295     toobig_false = new IfFalseNode( toobig_iff );
1296     transform_later(toobig_false);
1297   } else {         // No initial test, just fall into next case
1298     toobig_false = ctrl;
1299     debug_only(slow_region = NodeSentinel);
1300   }
1301 
1302   Node *slow_mem = mem;  // save the current memory state for slow path
1303   // generate the fast allocation code unless we know that the initial test will always go slow
1304   if (!always_slow) {
1305     // Fast path modifies only raw memory.
1306     if (mem->is_MergeMem()) {
1307       mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
1308     }
1309 
1310     Node* eden_top_adr;
1311     Node* eden_end_adr;
1312 
1313     set_eden_pointers(eden_top_adr, eden_end_adr);
1314 
1315     // Load Eden::end.  Loop invariant and hoisted.
1316     //
1317     // Note: We set the control input on "eden_end" and "old_eden_top" when using
1318     //       a TLAB to work around a bug where these values were being moved across
1319     //       a safepoint.  These are not oops, so they cannot be include in the oop
1320     //       map, but they can be changed by a GC.   The proper way to fix this would
1321     //       be to set the raw memory state when generating a  SafepointNode.  However
1322     //       this will require extensive changes to the loop optimization in order to
1323     //       prevent a degradation of the optimization.
1324     //       See comment in memnode.hpp, around line 227 in class LoadPNode.
1325     Node *eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
1326 
1327     // allocate the Region and Phi nodes for the result
1328     result_region = new RegionNode(3);
1329     result_phi_rawmem = new PhiNode(result_region, Type::MEMORY, TypeRawPtr::BOTTOM);
1330     result_phi_rawoop = new PhiNode(result_region, TypeRawPtr::BOTTOM);
1331     result_phi_i_o    = new PhiNode(result_region, Type::ABIO); // I/O is used for Prefetch
1332 
1333     // We need a Region for the loop-back contended case.
1334     enum { fall_in_path = 1, contended_loopback_path = 2 };
1335     Node *contended_region;
1336     Node *contended_phi_rawmem;
1337     if (UseTLAB) {
1338       contended_region = toobig_false;
1339       contended_phi_rawmem = mem;
1340     } else {
1341       contended_region = new RegionNode(3);
1342       contended_phi_rawmem = new PhiNode(contended_region, Type::MEMORY, TypeRawPtr::BOTTOM);
1343       // Now handle the passing-too-big test.  We fall into the contended
1344       // loop-back merge point.
1345       contended_region    ->init_req(fall_in_path, toobig_false);
1346       contended_phi_rawmem->init_req(fall_in_path, mem);
1347       transform_later(contended_region);
1348       transform_later(contended_phi_rawmem);
1349     }
1350 
1351     // Load(-locked) the heap top.
1352     // See note above concerning the control input when using a TLAB
1353     Node *old_eden_top = UseTLAB
1354       ? new LoadPNode      (ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered)
1355       : new LoadPLockedNode(contended_region, contended_phi_rawmem, eden_top_adr, MemNode::acquire);
1356 
1357     transform_later(old_eden_top);
1358     // Add to heap top to get a new heap top
1359     Node *new_eden_top = new AddPNode(top(), old_eden_top, size_in_bytes);
1360     transform_later(new_eden_top);
1361     // Check for needing a GC; compare against heap end
1362     Node *needgc_cmp = new CmpPNode(new_eden_top, eden_end);
1363     transform_later(needgc_cmp);
1364     Node *needgc_bol = new BoolNode(needgc_cmp, BoolTest::ge);
1365     transform_later(needgc_bol);
1366     IfNode *needgc_iff = new IfNode(contended_region, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
1367     transform_later(needgc_iff);
1368 
1369     // Plug the failing-heap-space-need-gc test into the slow-path region
1370     Node *needgc_true = new IfTrueNode(needgc_iff);
1371     transform_later(needgc_true);
1372     if (initial_slow_test) {
1373       slow_region->init_req(need_gc_path, needgc_true);
1374       // This completes all paths into the slow merge point
1375       transform_later(slow_region);
1376     } else {                      // No initial slow path needed!
1377       // Just fall from the need-GC path straight into the VM call.
1378       slow_region = needgc_true;
1379     }
1380     // No need for a GC.  Setup for the Store-Conditional
1381     Node *needgc_false = new IfFalseNode(needgc_iff);
1382     transform_later(needgc_false);
1383 
1384     // Grab regular I/O before optional prefetch may change it.
1385     // Slow-path does no I/O so just set it to the original I/O.
1386     result_phi_i_o->init_req(slow_result_path, i_o);
1387 
1388     i_o = prefetch_allocation(i_o, needgc_false, contended_phi_rawmem,
1389                               old_eden_top, new_eden_top, length);
1390 
1391     // Name successful fast-path variables
1392     Node* fast_oop = old_eden_top;
1393     Node* fast_oop_ctrl;
1394     Node* fast_oop_rawmem;
1395 
1396     // Store (-conditional) the modified eden top back down.
1397     // StorePConditional produces flags for a test PLUS a modified raw
1398     // memory state.
1399     if (UseTLAB) {
1400       Node* store_eden_top =
1401         new StorePNode(needgc_false, contended_phi_rawmem, eden_top_adr,
1402                               TypeRawPtr::BOTTOM, new_eden_top, MemNode::unordered);
1403       transform_later(store_eden_top);
1404       fast_oop_ctrl = needgc_false; // No contention, so this is the fast path
1405       fast_oop_rawmem = store_eden_top;
1406     } else {
1407       Node* store_eden_top =
1408         new StorePConditionalNode(needgc_false, contended_phi_rawmem, eden_top_adr,
1409                                          new_eden_top, fast_oop/*old_eden_top*/);
1410       transform_later(store_eden_top);
1411       Node *contention_check = new BoolNode(store_eden_top, BoolTest::ne);
1412       transform_later(contention_check);
1413       store_eden_top = new SCMemProjNode(store_eden_top);
1414       transform_later(store_eden_top);
1415 
1416       // If not using TLABs, check to see if there was contention.
1417       IfNode *contention_iff = new IfNode (needgc_false, contention_check, PROB_MIN, COUNT_UNKNOWN);
1418       transform_later(contention_iff);
1419       Node *contention_true = new IfTrueNode(contention_iff);
1420       transform_later(contention_true);
1421       // If contention, loopback and try again.
1422       contended_region->init_req(contended_loopback_path, contention_true);
1423       contended_phi_rawmem->init_req(contended_loopback_path, store_eden_top);
1424 
1425       // Fast-path succeeded with no contention!
1426       Node *contention_false = new IfFalseNode(contention_iff);
1427       transform_later(contention_false);
1428       fast_oop_ctrl = contention_false;
1429 
1430       // Bump total allocated bytes for this thread
1431       Node* thread = new ThreadLocalNode();
1432       transform_later(thread);
1433       Node* alloc_bytes_adr = basic_plus_adr(top()/*not oop*/, thread,
1434                                              in_bytes(JavaThread::allocated_bytes_offset()));
1435       Node* alloc_bytes = make_load(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
1436                                     0, TypeLong::LONG, T_LONG);
1437 #ifdef _LP64
1438       Node* alloc_size = size_in_bytes;
1439 #else
1440       Node* alloc_size = new ConvI2LNode(size_in_bytes);
1441       transform_later(alloc_size);
1442 #endif
1443       Node* new_alloc_bytes = new AddLNode(alloc_bytes, alloc_size);
1444       transform_later(new_alloc_bytes);
1445       fast_oop_rawmem = make_store(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
1446                                    0, new_alloc_bytes, T_LONG);
1447     }
1448 
1449     InitializeNode* init = alloc->initialization();
1450     fast_oop_rawmem = initialize_object(alloc,
1451                                         fast_oop_ctrl, fast_oop_rawmem, fast_oop,
1452                                         klass_node, length, size_in_bytes);
1453 
1454     // If initialization is performed by an array copy, any required
1455     // MemBarStoreStore was already added. If the object does not
1456     // escape no need for a MemBarStoreStore. If the object does not
1457     // escape in its initializer and memory barrier (MemBarStoreStore or
1458     // stronger) is already added at exit of initializer, also no need
1459     // for a MemBarStoreStore. Otherwise we need a MemBarStoreStore
1460     // so that stores that initialize this object can't be reordered
1461     // with a subsequent store that makes this object accessible by
1462     // other threads.
1463     // Other threads include java threads and JVM internal threads
1464     // (for example concurrent GC threads). Current concurrent GC
1465     // implementation: CMS and G1 will not scan newly created object,
1466     // so it's safe to skip storestore barrier when allocation does


1757     if (!init->is_complete()) {
1758       // Try to win by zeroing only what the init does not store.
1759       // We can also try to do some peephole optimizations,
1760       // such as combining some adjacent subword stores.
1761       rawmem = init->complete_stores(control, rawmem, object,
1762                                      header_size, size_in_bytes, &_igvn);
1763     }
1764     // We have no more use for this link, since the AllocateNode goes away:
1765     init->set_req(InitializeNode::RawAddress, top());
1766     // (If we keep the link, it just confuses the register allocator,
1767     // who thinks he sees a real use of the address by the membar.)
1768   }
1769 
1770   return rawmem;
1771 }
1772 
1773 // Generate prefetch instructions for next allocations.
1774 Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
1775                                         Node*& contended_phi_rawmem,
1776                                         Node* old_eden_top, Node* new_eden_top,
1777                                         Node* length) {
1778    enum { fall_in_path = 1, pf_path = 2 };
1779    if( UseTLAB && AllocatePrefetchStyle == 2 ) {
1780       // Generate prefetch allocation with watermark check.
1781       // As an allocation hits the watermark, we will prefetch starting
1782       // at a "distance" away from watermark.
1783 
1784       Node *pf_region = new RegionNode(3);
1785       Node *pf_phi_rawmem = new PhiNode( pf_region, Type::MEMORY,
1786                                                 TypeRawPtr::BOTTOM );
1787       // I/O is used for Prefetch
1788       Node *pf_phi_abio = new PhiNode( pf_region, Type::ABIO );
1789 
1790       Node *thread = new ThreadLocalNode();
1791       transform_later(thread);
1792 
1793       Node *eden_pf_adr = new AddPNode( top()/*not oop*/, thread,
1794                    _igvn.MakeConX(in_bytes(JavaThread::tlab_pf_top_offset())) );
1795       transform_later(eden_pf_adr);
1796 
1797       Node *old_pf_wm = new LoadPNode(needgc_false,


1815 
1816       Node *need_pf_false = new IfFalseNode( need_pf_iff );
1817       transform_later(need_pf_false);
1818 
1819       Node *new_pf_wmt = new AddPNode( top(), old_pf_wm,
1820                                     _igvn.MakeConX(AllocatePrefetchDistance) );
1821       transform_later(new_pf_wmt );
1822       new_pf_wmt->set_req(0, need_pf_true);
1823 
1824       Node *store_new_wmt = new StorePNode(need_pf_true,
1825                                        contended_phi_rawmem, eden_pf_adr,
1826                                        TypeRawPtr::BOTTOM, new_pf_wmt,
1827                                        MemNode::unordered);
1828       transform_later(store_new_wmt);
1829 
1830       // adding prefetches
1831       pf_phi_abio->init_req( fall_in_path, i_o );
1832 
1833       Node *prefetch_adr;
1834       Node *prefetch;
1835       uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
1836       uint step_size = AllocatePrefetchStepSize;
1837       uint distance = 0;
1838 
1839       for ( uint i = 0; i < lines; i++ ) {
1840         prefetch_adr = new AddPNode( old_pf_wm, new_pf_wmt,
1841                                             _igvn.MakeConX(distance) );
1842         transform_later(prefetch_adr);
1843         prefetch = new PrefetchAllocationNode( i_o, prefetch_adr );
1844         transform_later(prefetch);
1845         distance += step_size;
1846         i_o = prefetch;
1847       }
1848       pf_phi_abio->set_req( pf_path, i_o );
1849 
1850       pf_region->init_req( fall_in_path, need_pf_false );
1851       pf_region->init_req( pf_path, need_pf_true );
1852 
1853       pf_phi_rawmem->init_req( fall_in_path, contended_phi_rawmem );
1854       pf_phi_rawmem->init_req( pf_path, store_new_wmt );
1855 
1856       transform_later(pf_region);
1857       transform_later(pf_phi_rawmem);
1858       transform_later(pf_phi_abio);
1859 
1860       needgc_false = pf_region;
1861       contended_phi_rawmem = pf_phi_rawmem;
1862       i_o = pf_phi_abio;
1863    } else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
1864       // Insert a prefetch instruction for each allocation.
1865       // This code is used to generate 1 prefetch instruction per cache line.
1866 
1867       // Generate several prefetch instructions.
1868       uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
1869       uint step_size = AllocatePrefetchStepSize;
1870       uint distance = AllocatePrefetchDistance;
1871 
1872       // Next cache address.
1873       Node *cache_adr = new AddPNode(old_eden_top, old_eden_top,
1874                                      _igvn.MakeConX(step_size + distance));
1875       transform_later(cache_adr);
1876       cache_adr = new CastP2XNode(needgc_false, cache_adr);
1877       transform_later(cache_adr);
1878       // Address is aligned to execute prefetch to the beginning of cache line size
1879       // (it is important when BIS instruction is used on SPARC as prefetch).
1880       Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
1881       cache_adr = new AndXNode(cache_adr, mask);
1882       transform_later(cache_adr);
1883       cache_adr = new CastX2PNode(cache_adr);
1884       transform_later(cache_adr);
1885 
1886       // Prefetch
1887       Node *prefetch = new PrefetchAllocationNode( contended_phi_rawmem, cache_adr );
1888       prefetch->set_req(0, needgc_false);
1889       transform_later(prefetch);
1890       contended_phi_rawmem = prefetch;
1891       Node *prefetch_adr;
1892       distance = step_size;
1893       for ( uint i = 1; i < lines; i++ ) {
1894         prefetch_adr = new AddPNode( cache_adr, cache_adr,
1895                                             _igvn.MakeConX(distance) );
1896         transform_later(prefetch_adr);
1897         prefetch = new PrefetchAllocationNode( contended_phi_rawmem, prefetch_adr );
1898         transform_later(prefetch);
1899         distance += step_size;
1900         contended_phi_rawmem = prefetch;
1901       }
1902    } else if( AllocatePrefetchStyle > 0 ) {
1903       // Insert a prefetch for each allocation only on the fast-path
1904       Node *prefetch_adr;
1905       Node *prefetch;
1906       // Generate several prefetch instructions.
1907       uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
1908       uint step_size = AllocatePrefetchStepSize;
1909       uint distance = AllocatePrefetchDistance;
1910       for ( uint i = 0; i < lines; i++ ) {
1911         prefetch_adr = new AddPNode( old_eden_top, new_eden_top,
1912                                             _igvn.MakeConX(distance) );
1913         transform_later(prefetch_adr);
1914         prefetch = new PrefetchAllocationNode( i_o, prefetch_adr );
1915         // Do not let it float too high, since if eden_top == eden_end,
1916         // both might be null.
1917         if( i == 0 ) { // Set control for first prefetch, next follows it
1918           prefetch->init_req(0, needgc_false);
1919         }
1920         transform_later(prefetch);
1921         distance += step_size;
1922         i_o = prefetch;
1923       }
1924    }
1925    return i_o;
1926 }
1927 




1290     transform_later(toobig_iff);
1291     // Plug the failing-too-big test into the slow-path region
1292     Node *toobig_true = new IfTrueNode( toobig_iff );
1293     transform_later(toobig_true);
1294     slow_region    ->init_req( too_big_or_final_path, toobig_true );
1295     toobig_false = new IfFalseNode( toobig_iff );
1296     transform_later(toobig_false);
1297   } else {         // No initial test, just fall into next case
1298     toobig_false = ctrl;
1299     debug_only(slow_region = NodeSentinel);
1300   }
1301 
1302   Node *slow_mem = mem;  // save the current memory state for slow path
1303   // generate the fast allocation code unless we know that the initial test will always go slow
1304   if (!always_slow) {
1305     // Fast path modifies only raw memory.
1306     if (mem->is_MergeMem()) {
1307       mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
1308     }
1309 

















1310     // allocate the Region and Phi nodes for the result
1311     result_region = new RegionNode(3);
1312     result_phi_rawmem = new PhiNode(result_region, Type::MEMORY, TypeRawPtr::BOTTOM);
1313     result_phi_rawoop = new PhiNode(result_region, TypeRawPtr::BOTTOM);
1314     result_phi_i_o    = new PhiNode(result_region, Type::ABIO); // I/O is used for Prefetch
1315 



















































1316     // Grab regular I/O before optional prefetch may change it.
1317     // Slow-path does no I/O so just set it to the original I/O.
1318     result_phi_i_o->init_req(slow_result_path, i_o);
1319 
1320     Node* needgc_ctrl = NULL;


1321     // Name successful fast-path variables

1322     Node* fast_oop_ctrl;
1323     Node* fast_oop_rawmem;
1324 
1325     intx prefetch_lines = length != NULL ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
































1326 
1327     BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
1328     Node* fast_oop = bs->obj_allocate(this, ctrl, mem, toobig_false, size_in_bytes, i_o, needgc_ctrl,
1329                                       fast_oop_ctrl, fast_oop_rawmem,
1330                                       prefetch_lines);
1331 
1332     if (initial_slow_test) {
1333       slow_region->init_req(need_gc_path, needgc_ctrl);
1334       // This completes all paths into the slow merge point
1335       transform_later(slow_region);
1336     } else {                      // No initial slow path needed!
1337       // Just fall from the need-GC path straight into the VM call.
1338       slow_region = needgc_ctrl;





1339     }
1340 
1341     InitializeNode* init = alloc->initialization();
1342     fast_oop_rawmem = initialize_object(alloc,
1343                                         fast_oop_ctrl, fast_oop_rawmem, fast_oop,
1344                                         klass_node, length, size_in_bytes);
1345 
1346     // If initialization is performed by an array copy, any required
1347     // MemBarStoreStore was already added. If the object does not
1348     // escape no need for a MemBarStoreStore. If the object does not
1349     // escape in its initializer and memory barrier (MemBarStoreStore or
1350     // stronger) is already added at exit of initializer, also no need
1351     // for a MemBarStoreStore. Otherwise we need a MemBarStoreStore
1352     // so that stores that initialize this object can't be reordered
1353     // with a subsequent store that makes this object accessible by
1354     // other threads.
1355     // Other threads include java threads and JVM internal threads
1356     // (for example concurrent GC threads). Current concurrent GC
1357     // implementation: CMS and G1 will not scan newly created object,
1358     // so it's safe to skip storestore barrier when allocation does


1649     if (!init->is_complete()) {
1650       // Try to win by zeroing only what the init does not store.
1651       // We can also try to do some peephole optimizations,
1652       // such as combining some adjacent subword stores.
1653       rawmem = init->complete_stores(control, rawmem, object,
1654                                      header_size, size_in_bytes, &_igvn);
1655     }
1656     // We have no more use for this link, since the AllocateNode goes away:
1657     init->set_req(InitializeNode::RawAddress, top());
1658     // (If we keep the link, it just confuses the register allocator,
1659     // who thinks he sees a real use of the address by the membar.)
1660   }
1661 
1662   return rawmem;
1663 }
1664 
1665 // Generate prefetch instructions for next allocations.
1666 Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
1667                                         Node*& contended_phi_rawmem,
1668                                         Node* old_eden_top, Node* new_eden_top,
1669                                         intx lines) {
1670    enum { fall_in_path = 1, pf_path = 2 };
1671    if( UseTLAB && AllocatePrefetchStyle == 2 ) {
1672       // Generate prefetch allocation with watermark check.
1673       // As an allocation hits the watermark, we will prefetch starting
1674       // at a "distance" away from watermark.
1675 
1676       Node *pf_region = new RegionNode(3);
1677       Node *pf_phi_rawmem = new PhiNode( pf_region, Type::MEMORY,
1678                                                 TypeRawPtr::BOTTOM );
1679       // I/O is used for Prefetch
1680       Node *pf_phi_abio = new PhiNode( pf_region, Type::ABIO );
1681 
1682       Node *thread = new ThreadLocalNode();
1683       transform_later(thread);
1684 
1685       Node *eden_pf_adr = new AddPNode( top()/*not oop*/, thread,
1686                    _igvn.MakeConX(in_bytes(JavaThread::tlab_pf_top_offset())) );
1687       transform_later(eden_pf_adr);
1688 
1689       Node *old_pf_wm = new LoadPNode(needgc_false,


1707 
1708       Node *need_pf_false = new IfFalseNode( need_pf_iff );
1709       transform_later(need_pf_false);
1710 
1711       Node *new_pf_wmt = new AddPNode( top(), old_pf_wm,
1712                                     _igvn.MakeConX(AllocatePrefetchDistance) );
1713       transform_later(new_pf_wmt );
1714       new_pf_wmt->set_req(0, need_pf_true);
1715 
1716       Node *store_new_wmt = new StorePNode(need_pf_true,
1717                                        contended_phi_rawmem, eden_pf_adr,
1718                                        TypeRawPtr::BOTTOM, new_pf_wmt,
1719                                        MemNode::unordered);
1720       transform_later(store_new_wmt);
1721 
1722       // adding prefetches
1723       pf_phi_abio->init_req( fall_in_path, i_o );
1724 
1725       Node *prefetch_adr;
1726       Node *prefetch;

1727       uint step_size = AllocatePrefetchStepSize;
1728       uint distance = 0;
1729 
1730       for ( uint i = 0; i < lines; i++ ) {
1731         prefetch_adr = new AddPNode( old_pf_wm, new_pf_wmt,
1732                                             _igvn.MakeConX(distance) );
1733         transform_later(prefetch_adr);
1734         prefetch = new PrefetchAllocationNode( i_o, prefetch_adr );
1735         transform_later(prefetch);
1736         distance += step_size;
1737         i_o = prefetch;
1738       }
1739       pf_phi_abio->set_req( pf_path, i_o );
1740 
1741       pf_region->init_req( fall_in_path, need_pf_false );
1742       pf_region->init_req( pf_path, need_pf_true );
1743 
1744       pf_phi_rawmem->init_req( fall_in_path, contended_phi_rawmem );
1745       pf_phi_rawmem->init_req( pf_path, store_new_wmt );
1746 
1747       transform_later(pf_region);
1748       transform_later(pf_phi_rawmem);
1749       transform_later(pf_phi_abio);
1750 
1751       needgc_false = pf_region;
1752       contended_phi_rawmem = pf_phi_rawmem;
1753       i_o = pf_phi_abio;
1754    } else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
1755       // Insert a prefetch instruction for each allocation.
1756       // This code is used to generate 1 prefetch instruction per cache line.
1757 
1758       // Generate several prefetch instructions.

1759       uint step_size = AllocatePrefetchStepSize;
1760       uint distance = AllocatePrefetchDistance;
1761 
1762       // Next cache address.
1763       Node *cache_adr = new AddPNode(old_eden_top, old_eden_top,
1764                                      _igvn.MakeConX(step_size + distance));
1765       transform_later(cache_adr);
1766       cache_adr = new CastP2XNode(needgc_false, cache_adr);
1767       transform_later(cache_adr);
1768       // Address is aligned to execute prefetch to the beginning of cache line size
1769       // (it is important when BIS instruction is used on SPARC as prefetch).
1770       Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
1771       cache_adr = new AndXNode(cache_adr, mask);
1772       transform_later(cache_adr);
1773       cache_adr = new CastX2PNode(cache_adr);
1774       transform_later(cache_adr);
1775 
1776       // Prefetch
1777       Node *prefetch = new PrefetchAllocationNode( contended_phi_rawmem, cache_adr );
1778       prefetch->set_req(0, needgc_false);
1779       transform_later(prefetch);
1780       contended_phi_rawmem = prefetch;
1781       Node *prefetch_adr;
1782       distance = step_size;
1783       for ( uint i = 1; i < lines; i++ ) {
1784         prefetch_adr = new AddPNode( cache_adr, cache_adr,
1785                                             _igvn.MakeConX(distance) );
1786         transform_later(prefetch_adr);
1787         prefetch = new PrefetchAllocationNode( contended_phi_rawmem, prefetch_adr );
1788         transform_later(prefetch);
1789         distance += step_size;
1790         contended_phi_rawmem = prefetch;
1791       }
1792    } else if( AllocatePrefetchStyle > 0 ) {
1793       // Insert a prefetch for each allocation only on the fast-path
1794       Node *prefetch_adr;
1795       Node *prefetch;
1796       // Generate several prefetch instructions.

1797       uint step_size = AllocatePrefetchStepSize;
1798       uint distance = AllocatePrefetchDistance;
1799       for ( uint i = 0; i < lines; i++ ) {
1800         prefetch_adr = new AddPNode( old_eden_top, new_eden_top,
1801                                             _igvn.MakeConX(distance) );
1802         transform_later(prefetch_adr);
1803         prefetch = new PrefetchAllocationNode( i_o, prefetch_adr );
1804         // Do not let it float too high, since if eden_top == eden_end,
1805         // both might be null.
1806         if( i == 0 ) { // Set control for first prefetch, next follows it
1807           prefetch->init_req(0, needgc_false);
1808         }
1809         transform_later(prefetch);
1810         distance += step_size;
1811         i_o = prefetch;
1812       }
1813    }
1814    return i_o;
1815 }
1816 


< prev index next >