573 574 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage. 575 // Replace with a 1-in-10 exit guess. 576 void adjust_loop_exit_prob( PhaseIdealLoop *phase ); 577 578 // Return TRUE or FALSE if the loop should never be RCE'd or aligned. 579 // Useful for unrolling loops with NO array accesses. 580 bool policy_peel_only( PhaseIdealLoop *phase ) const; 581 582 // Return TRUE or FALSE if the loop should be unswitched -- clone 583 // loop with an invariant test 584 bool policy_unswitching( PhaseIdealLoop *phase ) const; 585 586 // Micro-benchmark spamming. Remove empty loops. 587 bool do_remove_empty_loop( PhaseIdealLoop *phase ); 588 589 // Convert one iteration loop into normal code. 590 bool do_one_iteration_loop( PhaseIdealLoop *phase ); 591 592 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can 593 // make some loop-invariant test (usually a null-check) happen before the 594 // loop. 595 bool policy_peeling( PhaseIdealLoop *phase ) const; 596 597 // Return TRUE or FALSE if the loop should be maximally unrolled. Stash any 598 // known trip count in the counted loop node. 599 bool policy_maximally_unroll( PhaseIdealLoop *phase ) const; 600 601 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if 602 // the loop is a CountedLoop and the body is small enough. 603 bool policy_unroll(PhaseIdealLoop *phase); 604 605 // Loop analyses to map to a maximal superword unrolling for vectorization. 606 void policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct); 607 608 // Return TRUE or FALSE if the loop should be range-check-eliminated. 609 // Gather a list of IF tests that are dominated by iteration splitting; 610 // also gather the end of the first split and the start of the 2nd split. 611 bool policy_range_check( PhaseIdealLoop *phase ) const; 612 613 // Return TRUE or FALSE if the loop should be cache-line aligned. 614 // Gather the expression that does the alignment. Note that only 615 // one array base can be aligned in a loop (unless the VM guarantees 616 // mutual alignment). Note that if we vectorize short memory ops 617 // into longer memory ops, we may want to increase alignment. 618 bool policy_align( PhaseIdealLoop *phase ) const; 619 620 // Return TRUE if "iff" is a range check. 621 bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const; 622 623 // Compute loop trip count if possible 624 void compute_trip_count(PhaseIdealLoop* phase); 625 626 // Compute loop trip count from profile data 627 float compute_profile_trip_cnt_helper(Node* n); 628 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); 629 630 // Reassociate invariant expressions. 631 void reassociate_invariants(PhaseIdealLoop *phase); 632 // Reassociate invariant add and subtract expressions. 633 Node* reassociate_add_sub(Node* n1, PhaseIdealLoop *phase); 634 // Return nonzero index of invariant operand if invariant and variant 635 // are combined with an Add or Sub. Helper for reassociate_invariants. 636 int is_invariant_addition(Node* n, PhaseIdealLoop *phase); 637 638 // Return true if n is invariant 639 bool is_invariant(Node* n) const; 640 641 // Put loop body on igvn work list 642 void record_for_igvn(); 1339 void sink_use( Node *use, Node *post_loop ); 1340 Node *place_near_use( Node *useblock ) const; 1341 Node* try_move_store_before_loop(Node* n, Node *n_ctrl); 1342 void try_move_store_after_loop(Node* n); 1343 bool identical_backtoback_ifs(Node *n); 1344 bool can_split_if(Node *n_ctrl); 1345 1346 // Determine if a method is too big for a/another round of split-if, based on 1347 // a magic (approximate) ratio derived from the equally magic constant 35000, 1348 // previously used for this purpose (but without relating to the node limit). 1349 bool must_throttle_split_if() { 1350 uint threshold = C->max_node_limit() * 2 / 5; 1351 return C->live_nodes() > threshold; 1352 } 1353 1354 // A simplistic node request tracking mechanism, where 1355 // = UINT_MAX Request not valid or made final. 1356 // < UINT_MAX Nodes currently requested (estimate). 1357 uint _nodes_required; 1358 1359 bool exceeding_node_budget(uint required = 0) { 1360 assert(C->live_nodes() < C->max_node_limit(), "sanity"); 1361 uint available = C->max_node_limit() - C->live_nodes(); 1362 return available < required + _nodes_required; 1363 } 1364 1365 uint require_nodes(uint require) { 1366 precond(require > 0); 1367 _nodes_required += MAX2(100u, require); // Keep requests at minimum 100. 1368 return _nodes_required; 1369 } 1370 1371 bool may_require_nodes(uint require) { 1372 return !exceeding_node_budget(require) && require_nodes(require) > 0; 1373 } 1374 1375 void require_nodes_begin() { 1376 assert(_nodes_required == UINT_MAX, "Bad state (begin)."); 1377 _nodes_required = 0; 1378 } 1379 1380 // Final check that the requested nodes did not exceed the limit and that 1381 // the request was reasonably correct with respect to the number of new 1382 // nodes introduced by any transform since the last 'begin'. 1383 void require_nodes_final_check(uint live_at_begin) { 1384 uint required = _nodes_required; 1385 require_nodes_final(); 1386 uint delta = C->live_nodes() - live_at_begin; 1387 // Assert is disabled, see JDK-8223911 and related issues. 1388 assert(true || delta <= 2 * required, "Bad node estimate (actual: %d, request: %d)", 1389 delta, required); 1390 } 1391 1392 void require_nodes_final() { 1393 assert(_nodes_required < UINT_MAX, "Bad state (final)."); 1394 assert(!exceeding_node_budget(), "Too many NODES required!"); 1395 _nodes_required = UINT_MAX; 1396 } 1397 1398 bool _created_loop_node; 1399 1400 public: 1401 uint nodes_required() const { return _nodes_required; } 1402 1403 void set_created_loop_node() { _created_loop_node = true; } 1404 bool created_loop_node() { return _created_loop_node; } 1405 void register_new_node( Node *n, Node *blk ); 1406 1407 #ifdef ASSERT 1408 void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA); 1409 #endif 1410 1411 #ifndef PRODUCT 1412 void dump( ) const; 1413 void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const; 1414 void verify() const; // Major slow :-) 1415 void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const; 1416 IdealLoopTree *get_loop_idx(Node* n) const { 1417 // Dead nodes have no loop, so return the top level loop instead 1418 return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root; 1419 } 1420 // Print some stats 1421 static void print_statistics(); 1422 static int _loop_invokes; // Count of PhaseIdealLoop invokes 1423 static int _loop_work; // Sum of PhaseIdealLoop x _unique 1424 #endif 1425 void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const; 1426 }; 1427 1428 1429 class AutoNodeBudget : public StackObj 1430 { 1431 public: 1432 enum budget_check_t { BUDGET_CHECK, NO_BUDGET_CHECK }; 1433 1434 AutoNodeBudget(PhaseIdealLoop* phase, budget_check_t chk = BUDGET_CHECK) 1435 : _phase(phase), 1436 _check_at_final(chk == BUDGET_CHECK), 1437 _nodes_at_begin(0) 1438 { 1439 precond(_phase != NULL); 1440 1441 _nodes_at_begin = _phase->C->live_nodes(); 1442 _phase->require_nodes_begin(); 1443 } 1444 1445 ~AutoNodeBudget() { 1446 if (_check_at_final) { 1447 #ifndef PRODUCT 1448 if (TraceLoopOpts) { 1449 uint request = _phase->nodes_required(); 1450 1451 if (request > 0) { 1452 uint delta = _phase->C->live_nodes() - _nodes_at_begin; 1453 1454 if (request < delta) { 1455 tty->print_cr("Exceeding node budget: %d < %d", request, delta); 1456 } 1457 } 1458 } 1459 #endif 1460 _phase->require_nodes_final_check(_nodes_at_begin); 1461 } else { 1462 _phase->require_nodes_final(); 1463 } 1464 } 1465 1466 private: 1467 PhaseIdealLoop* _phase; 1468 bool _check_at_final; 1469 uint _nodes_at_begin; 1470 }; 1471 1472 // The Estimated Loop Clone Size: CloneFactor * (BodySize + BC) + CC, where BC 1473 // and CC are totally ad-hoc/magic "body" and "clone" constants, respectively, 1474 // used to ensure that node usage estimates made are on the safe side, for the 1475 // most part. 1476 static inline uint est_loop_clone_sz(uint fact, uint size) { 1477 uint const bc = 31; 1478 uint const cc = 41; 1479 uint estimate = fact * (size + bc) + cc; 1480 return (estimate - cc) / fact == size + bc ? estimate : UINT_MAX; 1481 } 1482 1483 1484 // This kit may be used for making of a reserved copy of a loop before this loop 1485 // goes under non-reversible changes. 1486 // 1487 // Function create_reserve() creates a reserved copy (clone) of the loop. 1488 // The reserved copy is created by calling 1489 // PhaseIdealLoop::create_reserve_version_of_loop - see there how 1490 // the original and reserved loops are connected in the outer graph. 1491 // If create_reserve succeeded, it returns 'true' and _has_reserved is set to 'true'. 1492 // 1493 // By default the reserved copy (clone) of the loop is created as dead code - it is 1494 // dominated in the outer loop by this node chain: 1495 // intcon(1)->If->IfFalse->reserved_copy. 1496 // The original loop is dominated by the the same node chain but IfTrue projection: 1497 // intcon(0)->If->IfTrue->original_loop. 1498 // 1499 // In this implementation of CountedLoopReserveKit the ctor includes create_reserve() 1500 // and the dtor, checks _use_new value. 1501 // If _use_new == false, it "switches" control to reserved copy of the loop | 573 574 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage. 575 // Replace with a 1-in-10 exit guess. 576 void adjust_loop_exit_prob( PhaseIdealLoop *phase ); 577 578 // Return TRUE or FALSE if the loop should never be RCE'd or aligned. 579 // Useful for unrolling loops with NO array accesses. 580 bool policy_peel_only( PhaseIdealLoop *phase ) const; 581 582 // Return TRUE or FALSE if the loop should be unswitched -- clone 583 // loop with an invariant test 584 bool policy_unswitching( PhaseIdealLoop *phase ) const; 585 586 // Micro-benchmark spamming. Remove empty loops. 587 bool do_remove_empty_loop( PhaseIdealLoop *phase ); 588 589 // Convert one iteration loop into normal code. 590 bool do_one_iteration_loop( PhaseIdealLoop *phase ); 591 592 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can 593 // move some loop-invariant test (usually a null-check) before the loop. 594 bool policy_peeling(PhaseIdealLoop *phase); 595 596 uint estimate_peeling(PhaseIdealLoop *phase); 597 598 // Return TRUE or FALSE if the loop should be maximally unrolled. Stash any 599 // known trip count in the counted loop node. 600 bool policy_maximally_unroll(PhaseIdealLoop *phase) const; 601 602 // Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll 603 // if the loop is a counted loop and the loop body is small enough. 604 bool policy_unroll(PhaseIdealLoop *phase); 605 606 // Loop analyses to map to a maximal superword unrolling for vectorization. 607 void policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct); 608 609 // Return TRUE or FALSE if the loop should be range-check-eliminated. 610 // Gather a list of IF tests that are dominated by iteration splitting; 611 // also gather the end of the first split and the start of the 2nd split. 612 bool policy_range_check( PhaseIdealLoop *phase ) const; 613 614 // Return TRUE or FALSE if the loop should be cache-line aligned. 615 // Gather the expression that does the alignment. Note that only 616 // one array base can be aligned in a loop (unless the VM guarantees 617 // mutual alignment). Note that if we vectorize short memory ops 618 // into longer memory ops, we may want to increase alignment. 619 bool policy_align( PhaseIdealLoop *phase ) const; 620 621 // Return TRUE if "iff" is a range check. 622 bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const; 623 624 // Estimate the number of nodes required when cloning a loop (body). 625 uint est_loop_clone_sz(uint factor) const; 626 627 // Compute loop trip count if possible 628 void compute_trip_count(PhaseIdealLoop* phase); 629 630 // Compute loop trip count from profile data 631 float compute_profile_trip_cnt_helper(Node* n); 632 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); 633 634 // Reassociate invariant expressions. 635 void reassociate_invariants(PhaseIdealLoop *phase); 636 // Reassociate invariant add and subtract expressions. 637 Node* reassociate_add_sub(Node* n1, PhaseIdealLoop *phase); 638 // Return nonzero index of invariant operand if invariant and variant 639 // are combined with an Add or Sub. Helper for reassociate_invariants. 640 int is_invariant_addition(Node* n, PhaseIdealLoop *phase); 641 642 // Return true if n is invariant 643 bool is_invariant(Node* n) const; 644 645 // Put loop body on igvn work list 646 void record_for_igvn(); 1343 void sink_use( Node *use, Node *post_loop ); 1344 Node *place_near_use( Node *useblock ) const; 1345 Node* try_move_store_before_loop(Node* n, Node *n_ctrl); 1346 void try_move_store_after_loop(Node* n); 1347 bool identical_backtoback_ifs(Node *n); 1348 bool can_split_if(Node *n_ctrl); 1349 1350 // Determine if a method is too big for a/another round of split-if, based on 1351 // a magic (approximate) ratio derived from the equally magic constant 35000, 1352 // previously used for this purpose (but without relating to the node limit). 1353 bool must_throttle_split_if() { 1354 uint threshold = C->max_node_limit() * 2 / 5; 1355 return C->live_nodes() > threshold; 1356 } 1357 1358 // A simplistic node request tracking mechanism, where 1359 // = UINT_MAX Request not valid or made final. 1360 // < UINT_MAX Nodes currently requested (estimate). 1361 uint _nodes_required; 1362 1363 enum { REQUIRE_MIN = 70 }; 1364 1365 uint nodes_required() const { return _nodes_required; } 1366 1367 // Given the _currently_ available number of nodes, check whether there is 1368 // "room" for an additional request or not, considering the already required 1369 // number of nodes. Return TRUE if the new request is exceeding the node 1370 // budget limit, otherwise return FALSE. Note that this interpretation will 1371 // act pessimistic on additional requests when new nodes have already been 1372 // generated since the 'begin'. This behaviour fits with the intention that 1373 // node estimates/requests should be made upfront. 1374 bool exceeding_node_budget(uint required = 0) { 1375 assert(C->live_nodes() < C->max_node_limit(), "sanity"); 1376 uint available = C->max_node_limit() - C->live_nodes(); 1377 return available < required + _nodes_required; 1378 } 1379 1380 uint require_nodes(uint require, uint minreq = REQUIRE_MIN) { 1381 precond(require > 0); 1382 _nodes_required += MAX2(require, minreq); 1383 return _nodes_required; 1384 } 1385 1386 bool may_require_nodes(uint require, uint minreq = REQUIRE_MIN) { 1387 return !exceeding_node_budget(require) && require_nodes(require, minreq) > 0; 1388 } 1389 1390 uint require_nodes_begin() { 1391 assert(_nodes_required == UINT_MAX, "Bad state (begin)."); 1392 _nodes_required = 0; 1393 return C->live_nodes(); 1394 } 1395 1396 // When a node request is final, optionally check that the requested number 1397 // of nodes was reasonably correct with respect to the number of new nodes 1398 // introduced since the last 'begin'. Always check that we have not exceeded 1399 // the maximum node limit. 1400 void require_nodes_final(uint live_at_begin, bool check_estimate) { 1401 assert(_nodes_required < UINT_MAX, "Bad state (final)."); 1402 1403 if (check_estimate) { 1404 // Assert that the node budget request was not off by too much (x2). 1405 // Should this be the case we _surely_ need to improve the estimates 1406 // used in our budget calculations. 1407 assert(C->live_nodes() - live_at_begin <= 2 * _nodes_required, 1408 "Bad node estimate: actual = %d >> request = %d", 1409 C->live_nodes() - live_at_begin, _nodes_required); 1410 } 1411 // Assert that we have stayed within the node budget limit. 1412 assert(C->live_nodes() < C->max_node_limit(), 1413 "Exceeding node budget limit: %d + %d > %d (request = %d)", 1414 C->live_nodes() - live_at_begin, live_at_begin, 1415 C->max_node_limit(), _nodes_required); 1416 1417 _nodes_required = UINT_MAX; 1418 } 1419 1420 bool _created_loop_node; 1421 1422 public: 1423 void set_created_loop_node() { _created_loop_node = true; } 1424 bool created_loop_node() { return _created_loop_node; } 1425 void register_new_node( Node *n, Node *blk ); 1426 1427 #ifdef ASSERT 1428 void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA); 1429 #endif 1430 1431 #ifndef PRODUCT 1432 void dump( ) const; 1433 void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const; 1434 void verify() const; // Major slow :-) 1435 void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const; 1436 IdealLoopTree *get_loop_idx(Node* n) const { 1437 // Dead nodes have no loop, so return the top level loop instead 1438 return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root; 1439 } 1440 // Print some stats 1441 static void print_statistics(); 1442 static int _loop_invokes; // Count of PhaseIdealLoop invokes 1443 static int _loop_work; // Sum of PhaseIdealLoop x _unique 1444 #endif 1445 void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const; 1446 }; 1447 1448 1449 class AutoNodeBudget : public StackObj 1450 { 1451 public: 1452 enum budget_check_t { BUDGET_CHECK, NO_BUDGET_CHECK }; 1453 1454 AutoNodeBudget(PhaseIdealLoop* phase, budget_check_t chk = BUDGET_CHECK) 1455 : _phase(phase), 1456 _check_at_final(chk == BUDGET_CHECK), 1457 _nodes_at_begin(0) 1458 { 1459 precond(_phase != NULL); 1460 1461 _nodes_at_begin = _phase->require_nodes_begin(); 1462 } 1463 1464 ~AutoNodeBudget() { 1465 #ifndef PRODUCT 1466 if (TraceLoopOpts) { 1467 uint request = _phase->nodes_required(); 1468 uint delta = _phase->C->live_nodes() - _nodes_at_begin; 1469 1470 if (request < delta) { 1471 tty->print_cr("Exceeding node budget: %d < %d", request, delta); 1472 } else { 1473 uint const REQUIRE_MIN = PhaseIdealLoop::REQUIRE_MIN; 1474 // Identify the worst estimates as "poor" ones. 1475 if (request > REQUIRE_MIN && delta > 0) { 1476 if ((delta > REQUIRE_MIN && request > 3 * delta) || 1477 (delta <= REQUIRE_MIN && request > 10 * delta)) { 1478 tty->print_cr("Poor node estimate: %d >> %d", request, delta); 1479 } 1480 } 1481 } 1482 } 1483 #endif // PRODUCT 1484 _phase->require_nodes_final(_nodes_at_begin, _check_at_final); 1485 } 1486 1487 private: 1488 PhaseIdealLoop* _phase; 1489 bool _check_at_final; 1490 uint _nodes_at_begin; 1491 }; 1492 1493 1494 // This kit may be used for making of a reserved copy of a loop before this loop 1495 // goes under non-reversible changes. 1496 // 1497 // Function create_reserve() creates a reserved copy (clone) of the loop. 1498 // The reserved copy is created by calling 1499 // PhaseIdealLoop::create_reserve_version_of_loop - see there how 1500 // the original and reserved loops are connected in the outer graph. 1501 // If create_reserve succeeded, it returns 'true' and _has_reserved is set to 'true'. 1502 // 1503 // By default the reserved copy (clone) of the loop is created as dead code - it is 1504 // dominated in the outer loop by this node chain: 1505 // intcon(1)->If->IfFalse->reserved_copy. 1506 // The original loop is dominated by the the same node chain but IfTrue projection: 1507 // intcon(0)->If->IfTrue->original_loop. 1508 // 1509 // In this implementation of CountedLoopReserveKit the ctor includes create_reserve() 1510 // and the dtor, checks _use_new value. 1511 // If _use_new == false, it "switches" control to reserved copy of the loop |