495 // Check for ops pinned in an arm of the diamond. 496 // Can't remove the control flow in this case 497 if (lp->outcnt() > 1) return NULL; 498 if (rp->outcnt() > 1) return NULL; 499 500 IdealLoopTree* r_loop = get_loop(region); 501 assert(r_loop == get_loop(iff), "sanity"); 502 // Always convert to CMOVE if all results are used only outside this loop. 503 bool used_inside_loop = (r_loop == _ltree_root); 504 505 // Check profitability 506 int cost = 0; 507 int phis = 0; 508 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 509 Node *out = region->fast_out(i); 510 if (!out->is_Phi()) continue; // Ignore other control edges, etc 511 phis++; 512 PhiNode* phi = out->as_Phi(); 513 BasicType bt = phi->type()->basic_type(); 514 switch (bt) { 515 case T_FLOAT: 516 case T_DOUBLE: { 517 cost += Matcher::float_cmove_cost(); // Could be very expensive 518 break; 519 } 520 case T_LONG: { 521 cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's 522 } 523 case T_INT: // These all CMOV fine 524 case T_ADDRESS: { // (RawPtr) 525 cost++; 526 break; 527 } 528 case T_NARROWOOP: // Fall through 529 case T_OBJECT: { // Base oops are OK, but not derived oops 530 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); 531 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a 532 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus 533 // CMOVE'ing a derived pointer requires we also CMOVE the base. If we 534 // have a Phi for the base here that we convert to a CMOVE all is well 535 // and good. But if the base is dead, we'll not make a CMOVE. Later 536 // the allocator will have to produce a base by creating a CMOVE of the 556 for (uint k = 1; k < inp->req(); k++) 557 if (get_ctrl(inp->in(k)) == proj) 558 cost += ConditionalMoveLimit; // Too much speculative goo 559 } 560 } 561 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. 562 // This will likely Split-If, a higher-payoff operation. 563 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { 564 Node* use = phi->fast_out(k); 565 if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr()) 566 cost += ConditionalMoveLimit; 567 // Is there a use inside the loop? 568 // Note: check only basic types since CMoveP is pinned. 569 if (!used_inside_loop && is_java_primitive(bt)) { 570 IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use); 571 if (r_loop == u_loop || r_loop->is_member(u_loop)) { 572 used_inside_loop = true; 573 } 574 } 575 } 576 } 577 Node* bol = iff->in(1); 578 assert(bol->Opcode() == Op_Bool, ""); 579 int cmp_op = bol->in(1)->Opcode(); 580 // It is expensive to generate flags from a float compare. 581 // Avoid duplicated float compare. 582 if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; 583 584 float infrequent_prob = PROB_UNLIKELY_MAG(3); 585 // Ignore cost and blocks frequency if CMOVE can be moved outside the loop. 586 if (used_inside_loop) { 587 if (cost >= ConditionalMoveLimit) return NULL; // Too much goo 588 589 // BlockLayoutByFrequency optimization moves infrequent branch 590 // from hot path. No point in CMOV'ing in such case (110 is used 591 // instead of 100 to take into account not exactness of float value). 592 if (BlockLayoutByFrequency) { 593 infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f); 594 } 595 } 596 // Check for highly predictable branch. No point in CMOV'ing if 597 // we are going to predict accurately all the time. 598 if (iff->_prob < infrequent_prob || 599 iff->_prob > (1.0f - infrequent_prob)) 600 return NULL; 601 602 // -------------- 603 // Now replace all Phis with CMOV's 604 Node *cmov_ctrl = iff->in(0); 605 uint flip = (lp->Opcode() == Op_IfTrue); 606 while (1) { 607 PhiNode* phi = NULL; 608 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 609 Node *out = region->fast_out(i); 610 if (out->is_Phi()) { 611 phi = out->as_Phi(); 612 break; 613 } 614 } 615 if (phi == NULL) break; 616 #ifndef PRODUCT 617 if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV"); 618 #endif | 495 // Check for ops pinned in an arm of the diamond. 496 // Can't remove the control flow in this case 497 if (lp->outcnt() > 1) return NULL; 498 if (rp->outcnt() > 1) return NULL; 499 500 IdealLoopTree* r_loop = get_loop(region); 501 assert(r_loop == get_loop(iff), "sanity"); 502 // Always convert to CMOVE if all results are used only outside this loop. 503 bool used_inside_loop = (r_loop == _ltree_root); 504 505 // Check profitability 506 int cost = 0; 507 int phis = 0; 508 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 509 Node *out = region->fast_out(i); 510 if (!out->is_Phi()) continue; // Ignore other control edges, etc 511 phis++; 512 PhiNode* phi = out->as_Phi(); 513 BasicType bt = phi->type()->basic_type(); 514 switch (bt) { 515 case T_DOUBLE: 516 if (C->use_cmove()) { 517 continue; //TODO: maybe we want to add some cost 518 } 519 case T_FLOAT: { 520 cost += Matcher::float_cmove_cost(); // Could be very expensive 521 break; 522 } 523 case T_LONG: { 524 cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's 525 } 526 case T_INT: // These all CMOV fine 527 case T_ADDRESS: { // (RawPtr) 528 cost++; 529 break; 530 } 531 case T_NARROWOOP: // Fall through 532 case T_OBJECT: { // Base oops are OK, but not derived oops 533 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); 534 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a 535 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus 536 // CMOVE'ing a derived pointer requires we also CMOVE the base. If we 537 // have a Phi for the base here that we convert to a CMOVE all is well 538 // and good. But if the base is dead, we'll not make a CMOVE. Later 539 // the allocator will have to produce a base by creating a CMOVE of the 559 for (uint k = 1; k < inp->req(); k++) 560 if (get_ctrl(inp->in(k)) == proj) 561 cost += ConditionalMoveLimit; // Too much speculative goo 562 } 563 } 564 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. 565 // This will likely Split-If, a higher-payoff operation. 566 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { 567 Node* use = phi->fast_out(k); 568 if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr()) 569 cost += ConditionalMoveLimit; 570 // Is there a use inside the loop? 571 // Note: check only basic types since CMoveP is pinned. 572 if (!used_inside_loop && is_java_primitive(bt)) { 573 IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use); 574 if (r_loop == u_loop || r_loop->is_member(u_loop)) { 575 used_inside_loop = true; 576 } 577 } 578 } 579 }//for 580 Node* bol = iff->in(1); 581 assert(bol->Opcode() == Op_Bool, ""); 582 int cmp_op = bol->in(1)->Opcode(); 583 // It is expensive to generate flags from a float compare. 584 // Avoid duplicated float compare. 585 if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; 586 587 float infrequent_prob = PROB_UNLIKELY_MAG(3); 588 // Ignore cost and blocks frequency if CMOVE can be moved outside the loop. 589 if (used_inside_loop) { 590 if (cost >= ConditionalMoveLimit) return NULL; // Too much goo 591 592 // BlockLayoutByFrequency optimization moves infrequent branch 593 // from hot path. No point in CMOV'ing in such case (110 is used 594 // instead of 100 to take into account not exactness of float value). 595 if (BlockLayoutByFrequency) { 596 infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f); 597 } 598 } 599 // Check for highly predictable branch. No point in CMOV'ing if 600 // we are going to predict accurately all the time. 601 if (C->use_cmove() && cmp_op == Op_CmpD) ;//keep going 602 else if (iff->_prob < infrequent_prob || 603 iff->_prob > (1.0f - infrequent_prob)) 604 return NULL; 605 606 // -------------- 607 // Now replace all Phis with CMOV's 608 Node *cmov_ctrl = iff->in(0); 609 uint flip = (lp->Opcode() == Op_IfTrue); 610 while (1) { 611 PhiNode* phi = NULL; 612 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 613 Node *out = region->fast_out(i); 614 if (out->is_Phi()) { 615 phi = out->as_Phi(); 616 break; 617 } 618 } 619 if (phi == NULL) break; 620 #ifndef PRODUCT 621 if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV"); 622 #endif |