src/share/vm/opto/loopTransform.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 7029152 Sdiff src/share/vm/opto

src/share/vm/opto/loopTransform.cpp

Print this page




 379     if (!has_ctrl(nnn))
 380       set_idom(nnn, idom(nnn), dd-1);
 381     // While we're at it, remove any SafePoints from the peeled code
 382     if( old->Opcode() == Op_SafePoint ) {
 383       Node *nnn = old_new[old->_idx];
 384       lazy_replace(nnn,nnn->in(TypeFunc::Control));
 385     }
 386   }
 387 
 388   // Now force out all loop-invariant dominating tests.  The optimizer
 389   // finds some, but we _know_ they are all useless.
 390   peeled_dom_test_elim(loop,old_new);
 391 
 392   loop->record_for_igvn();
 393 }
 394 
 395 //------------------------------policy_maximally_unroll------------------------
 396 // Return exact loop trip count, or 0 if not maximally unrolling
 397 bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const {
 398   CountedLoopNode *cl = _head->as_CountedLoop();
 399   assert( cl->is_normal_loop(), "" );
 400 
 401   Node *init_n = cl->init_trip();
 402   Node *limit_n = cl->limit();
 403 
 404   // Non-constant bounds
 405   if( init_n   == NULL || !init_n->is_Con()  ||
 406       limit_n  == NULL || !limit_n->is_Con() ||
 407       // protect against stride not being a constant
 408       !cl->stride_is_con() ) {
 409     return false;
 410   }
 411   int init   = init_n->get_int();
 412   int limit  = limit_n->get_int();
 413   int span   = limit - init;
 414   int stride = cl->stride_con();
 415 
 416   if (init >= limit || stride > span) {
 417     // return a false (no maximally unroll) and the regular unroll/peel
 418     // route will make a small mess which CCP will fold away.
 419     return false;
 420   }
 421   uint trip_count = span/stride;   // trip_count can be greater than 2 Gig.
 422   assert( (int)trip_count*stride == span, "must divide evenly" );
 423 
 424   // Real policy: if we maximally unroll, does it get too big?
 425   // Allow the unrolled mess to get larger than standard loop
 426   // size.  After all, it will no longer be a loop.
 427   uint body_size    = _body.size();
 428   uint unroll_limit = (uint)LoopUnrollLimit * 4;
 429   assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
 430   cl->set_trip_count(trip_count);
 431   if( trip_count <= unroll_limit && body_size <= unroll_limit ) {


















 432     uint new_body_size = body_size * trip_count;
 433     if (new_body_size <= unroll_limit &&
 434         body_size == new_body_size / trip_count &&
 435         // Unrolling can result in a large amount of node construction
 436         new_body_size < MaxNodeLimit - phase->C->unique()) {
 437       return true;    // maximally unroll
 438     }
 439   }
 440 
 441   return false;               // Do not maximally unroll
 442 }
 443 
 444 
 445 //------------------------------policy_unroll----------------------------------
 446 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
 447 // the loop is a CountedLoop and the body is small enough.
 448 bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
 449 
 450   CountedLoopNode *cl = _head->as_CountedLoop();
 451   assert( cl->is_normal_loop() || cl->is_main_loop(), "" );
 452 
 453   // protect against stride not being a constant
 454   if( !cl->stride_is_con() ) return false;
 455 
 456   // protect against over-unrolling
 457   if( cl->trip_count() <= 1 ) return false;
 458 
 459   int future_unroll_ct = cl->unrolled_count() * 2;
 460 
 461   // Don't unroll if the next round of unrolling would push us
 462   // over the expected trip count of the loop.  One is subtracted
 463   // from the expected trip count because the pre-loop normally
 464   // executes 1 iteration.
 465   if (UnrollLimitForProfileCheck > 0 &&
 466       cl->profile_trip_cnt() != COUNT_UNKNOWN &&
 467       future_unroll_ct        > UnrollLimitForProfileCheck &&
 468       (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
 469     return false;
 470   }
 471 
 472   // When unroll count is greater than LoopUnrollMin, don't unroll if:
 473   //   the residual iterations are more than 10% of the trip count
 474   //   and rounds of "unroll,optimize" are not making significant progress
 475   //   Progress defined as current size less than 20% larger than previous size.
 476   if (UseSuperWord && cl->node_count_before_unroll() > 0 &&
 477       future_unroll_ct > LoopUnrollMin &&
 478       (future_unroll_ct - 1) * 10.0 > cl->profile_trip_cnt() &&
 479       1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
 480     return false;
 481   }
 482 
 483   Node *init_n = cl->init_trip();
 484   Node *limit_n = cl->limit();
 485   // Non-constant bounds.
 486   // Protect against over-unrolling when init or/and limit are not constant
 487   // (so that trip_count's init value is maxint) but iv range is known.
 488   if( init_n   == NULL || !init_n->is_Con()  ||
 489       limit_n  == NULL || !limit_n->is_Con() ) {
 490     Node* phi = cl->phi();
 491     if( phi != NULL ) {
 492       assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.");
 493       const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
 494       int next_stride = cl->stride_con() * 2; // stride after this unroll
 495       if( next_stride > 0 ) {
 496         if( iv_type->_lo + next_stride <= iv_type->_lo || // overflow
 497             iv_type->_lo + next_stride >  iv_type->_hi ) {
 498           return false;  // over-unrolling
 499         }
 500       } else if( next_stride < 0 ) {
 501         if( iv_type->_hi + next_stride >= iv_type->_hi || // overflow
 502             iv_type->_hi + next_stride <  iv_type->_lo ) {
 503           return false;  // over-unrolling
 504         }
 505       }
 506     }
 507   }
 508 
 509   // Adjust body_size to determine if we unroll or not
 510   uint body_size = _body.size();
 511   // Key test to unroll CaffeineMark's Logic test
 512   int xors_in_loop = 0;
 513   // Also count ModL, DivL and MulL which expand mightly
 514   for( uint k = 0; k < _body.size(); k++ ) {
 515     switch( _body.at(k)->Opcode() ) {

 516     case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test
 517     case Op_ModL: body_size += 30; break;
 518     case Op_DivL: body_size += 30; break;
 519     case Op_MulL: body_size += 10; break;







 520     }

 521   }
 522 
 523   // Check for being too big
 524   if( body_size > (uint)LoopUnrollLimit ) {
 525     if( xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
 526     // Normal case: loop too big
 527     return false;
 528   }
 529 
 530   // Check for stride being a small enough constant
 531   if( abs(cl->stride_con()) > (1<<3) ) return false;
 532 
 533   // Unroll once!  (Each trip will soon do double iterations)
 534   return true;
 535 }
 536 
 537 //------------------------------policy_align-----------------------------------
 538 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 539 // expression that does the alignment.  Note that only one array base can be
 540 // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
 541 // if we vectorize short memory ops into longer memory ops, we may want to
 542 // increase alignment.
 543 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
 544   return false;
 545 }
 546 
 547 //------------------------------policy_range_check-----------------------------
 548 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 549 // Actually we do iteration-splitting, a more powerful form of RCE.
 550 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
 551   if( !RangeCheckElimination ) return false;




 379     if (!has_ctrl(nnn))
 380       set_idom(nnn, idom(nnn), dd-1);
 381     // While we're at it, remove any SafePoints from the peeled code
 382     if( old->Opcode() == Op_SafePoint ) {
 383       Node *nnn = old_new[old->_idx];
 384       lazy_replace(nnn,nnn->in(TypeFunc::Control));
 385     }
 386   }
 387 
 388   // Now force out all loop-invariant dominating tests.  The optimizer
 389   // finds some, but we _know_ they are all useless.
 390   peeled_dom_test_elim(loop,old_new);
 391 
 392   loop->record_for_igvn();
 393 }
 394 
 395 //------------------------------policy_maximally_unroll------------------------
 396 // Return exact loop trip count, or 0 if not maximally unrolling
 397 bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const {
 398   CountedLoopNode *cl = _head->as_CountedLoop();
 399   assert(cl->is_normal_loop(), "");
 400 
 401   Node *init_n = cl->init_trip();
 402   Node *limit_n = cl->limit();
 403 
 404   // Non-constant bounds
 405   if (init_n   == NULL || !init_n->is_Con()  ||
 406       limit_n  == NULL || !limit_n->is_Con() ||
 407       // protect against stride not being a constant
 408       !cl->stride_is_con()) {
 409     return false;
 410   }
 411   int init   = init_n->get_int();
 412   int limit  = limit_n->get_int();
 413   int span   = limit - init;
 414   int stride = cl->stride_con();
 415 
 416   if (init >= limit || stride > span) {
 417     // return a false (no maximally unroll) and the regular unroll/peel
 418     // route will make a small mess which CCP will fold away.
 419     return false;
 420   }
 421   uint trip_count = span/stride;   // trip_count can be greater than 2 Gig.
 422   assert( (int)trip_count*stride == span, "must divide evenly" );
 423 
 424   // Real policy: if we maximally unroll, does it get too big?
 425   // Allow the unrolled mess to get larger than standard loop
 426   // size.  After all, it will no longer be a loop.
 427   uint body_size    = _body.size();
 428   uint unroll_limit = (uint)LoopUnrollLimit * 4;
 429   assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
 430   cl->set_trip_count(trip_count);
 431   if (trip_count > unroll_limit || body_size > unroll_limit) {
 432     return false;
 433   }
 434 
 435   // Do not unroll a loop with String intrinsics code.
 436   // String intrinsics are large and have loops.
 437   for (uint k = 0; k < _body.size(); k++) {
 438     Node* n = _body.at(k);
 439     switch (n->Opcode()) {
 440       case Op_StrComp:
 441       case Op_StrEquals:
 442       case Op_StrIndexOf:
 443       case Op_AryEq: {
 444         return false;
 445       }
 446     } // switch
 447   }
 448 
 449   if (body_size <= unroll_limit) {
 450     uint new_body_size = body_size * trip_count;
 451     if (new_body_size <= unroll_limit &&
 452         body_size == new_body_size / trip_count &&
 453         // Unrolling can result in a large amount of node construction
 454         new_body_size < MaxNodeLimit - phase->C->unique()) {
 455       return true;    // maximally unroll
 456     }
 457   }
 458 
 459   return false;               // Do not maximally unroll
 460 }
 461 
 462 
 463 //------------------------------policy_unroll----------------------------------
 464 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
 465 // the loop is a CountedLoop and the body is small enough.
 466 bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
 467 
 468   CountedLoopNode *cl = _head->as_CountedLoop();
 469   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 470 
 471   // protect against stride not being a constant
 472   if (!cl->stride_is_con()) return false;
 473 
 474   // protect against over-unrolling
 475   if (cl->trip_count() <= 1) return false;
 476 
 477   int future_unroll_ct = cl->unrolled_count() * 2;
 478 
 479   // Don't unroll if the next round of unrolling would push us
 480   // over the expected trip count of the loop.  One is subtracted
 481   // from the expected trip count because the pre-loop normally
 482   // executes 1 iteration.
 483   if (UnrollLimitForProfileCheck > 0 &&
 484       cl->profile_trip_cnt() != COUNT_UNKNOWN &&
 485       future_unroll_ct        > UnrollLimitForProfileCheck &&
 486       (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
 487     return false;
 488   }
 489 
 490   // When unroll count is greater than LoopUnrollMin, don't unroll if:
 491   //   the residual iterations are more than 10% of the trip count
 492   //   and rounds of "unroll,optimize" are not making significant progress
 493   //   Progress defined as current size less than 20% larger than previous size.
 494   if (UseSuperWord && cl->node_count_before_unroll() > 0 &&
 495       future_unroll_ct > LoopUnrollMin &&
 496       (future_unroll_ct - 1) * 10.0 > cl->profile_trip_cnt() &&
 497       1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
 498     return false;
 499   }
 500 
 501   Node *init_n = cl->init_trip();
 502   Node *limit_n = cl->limit();
 503   // Non-constant bounds.
 504   // Protect against over-unrolling when init or/and limit are not constant
 505   // (so that trip_count's init value is maxint) but iv range is known.
 506   if (init_n   == NULL || !init_n->is_Con()  ||
 507       limit_n  == NULL || !limit_n->is_Con()) {
 508     Node* phi = cl->phi();
 509     if (phi != NULL) {
 510       assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.");
 511       const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
 512       int next_stride = cl->stride_con() * 2; // stride after this unroll
 513       if (next_stride > 0) {
 514         if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow
 515             iv_type->_lo + next_stride >  iv_type->_hi) {
 516           return false;  // over-unrolling
 517         }
 518       } else if (next_stride < 0) {
 519         if (iv_type->_hi + next_stride >= iv_type->_hi || // overflow
 520             iv_type->_hi + next_stride <  iv_type->_lo) {
 521           return false;  // over-unrolling
 522         }
 523       }
 524     }
 525   }
 526 
 527   // Adjust body_size to determine if we unroll or not
 528   uint body_size = _body.size();
 529   // Key test to unroll CaffeineMark's Logic test
 530   int xors_in_loop = 0;
 531   // Also count ModL, DivL and MulL which expand mightly
 532   for (uint k = 0; k < _body.size(); k++) {
 533     Node* n = _body.at(k);
 534     switch (n->Opcode()) {
 535       case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test
 536       case Op_ModL: body_size += 30; break;
 537       case Op_DivL: body_size += 30; break;
 538       case Op_MulL: body_size += 10; break;
 539       case Op_StrComp:
 540       case Op_StrEquals:
 541       case Op_StrIndexOf:
 542       case Op_AryEq: {
 543         // Do not unroll a loop with String intrinsics code.
 544         // String intrinsics are large and have loops.
 545         return false;
 546       }
 547     } // switch
 548   }
 549 
 550   // Check for being too big
 551   if (body_size > (uint)LoopUnrollLimit) {
 552     if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
 553     // Normal case: loop too big
 554     return false;
 555   }
 556 
 557   // Check for stride being a small enough constant
 558   if (abs(cl->stride_con()) > (1<<3)) return false;
 559 
 560   // Unroll once!  (Each trip will soon do double iterations)
 561   return true;
 562 }
 563 
 564 //------------------------------policy_align-----------------------------------
 565 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 566 // expression that does the alignment.  Note that only one array base can be
 567 // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
 568 // if we vectorize short memory ops into longer memory ops, we may want to
 569 // increase alignment.
 570 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
 571   return false;
 572 }
 573 
 574 //------------------------------policy_range_check-----------------------------
 575 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 576 // Actually we do iteration-splitting, a more powerful form of RCE.
 577 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
 578   if( !RangeCheckElimination ) return false;


src/share/vm/opto/loopTransform.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File