--- old/src/share/vm/opto/loopTransform.cpp 2015-06-25 14:11:29.213584100 -0700 +++ new/src/share/vm/opto/loopTransform.cpp 2015-06-25 14:11:29.013564100 -0700 @@ -280,6 +280,10 @@ || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit() ) { return false; // too large to safely clone } + + // check for vectorized loops, any peeling done was already applied + if (_head->is_CountedLoop() && _head->as_CountedLoop()->ignore_slp()) return false; + while( test != _head ) { // Scan till run off top of loop if( test->is_If() ) { // Test? Node *ctrl = phase->get_ctrl(test->in(1)); @@ -656,7 +660,12 @@ _local_loop_unroll_limit = LoopUnrollLimit; _local_loop_unroll_factor = 4; int future_unroll_ct = cl->unrolled_count() * 2; - if (future_unroll_ct > LoopMaxUnroll) return false; + if (!cl->ignore_slp()) { + if (future_unroll_ct > LoopMaxUnroll) return false; + } else { + // obey user constraints on vector mapped loops with additional unrolling applied + if ((future_unroll_ct / cl->slp_max_unroll()) > LoopMaxUnroll) return false; + } // Check for initial stride being a small enough constant if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false; @@ -759,13 +768,19 @@ if (LoopMaxUnroll > _local_loop_unroll_factor) { // Once policy_slp_analysis succeeds, mark the loop with the // maximal unroll factor so that we minimize analysis passes - if ((future_unroll_ct > _local_loop_unroll_factor) || - (body_size > (uint)_local_loop_unroll_limit)) { + if (future_unroll_ct >= _local_loop_unroll_factor) { policy_unroll_slp_analysis(cl, phase, future_unroll_ct); } } } + int slp_max_unroll_factor = cl->slp_max_unroll(); + if (cl->has_passed_slp()) { + if (slp_max_unroll_factor >= future_unroll_ct) return true; + // Normal case: loop too big + return false; + } + // Check for being too big if (body_size > (uint)_local_loop_unroll_limit) { if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; @@ -773,6 +788,10 @@ return false; } + if(cl->ignore_slp()) { + NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct)); + } + // Unroll once! (Each trip will soon do double iterations) return true; } @@ -780,28 +799,24 @@ void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) { // Enable this functionality target by target as needed if (SuperWordLoopUnrollAnalysis) { - if (!cl->has_passed_slp()) { + if (!cl->was_slp_analyzed()) { SuperWord sw(phase); sw.transform_loop(this, false); // If the loop is slp canonical analyze it if (sw.early_return() == false) { - sw.unrolling_analysis(cl, _local_loop_unroll_factor); + sw.unrolling_analysis(_local_loop_unroll_factor); } } - int slp_max_unroll_factor = cl->slp_max_unroll(); - if ((slp_max_unroll_factor > 4) && - (slp_max_unroll_factor >= future_unroll_ct)) { - int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor; - if (new_limit > LoopUnrollLimit) { -#ifndef PRODUCT - if (TraceSuperWordLoopUnrollAnalysis) { - tty->print_cr("slp analysis is applying unroll limit %d, the original limit was %d\n", - new_limit, _local_loop_unroll_limit); + if (cl->has_passed_slp()) { + int slp_max_unroll_factor = cl->slp_max_unroll(); + if (slp_max_unroll_factor >= future_unroll_ct) { + int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor; + if (new_limit > LoopUnrollLimit) { + NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit)); + _local_loop_unroll_limit = new_limit; } -#endif - _local_loop_unroll_limit = new_limit; } } } @@ -830,6 +845,9 @@ if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. Node *trip_counter = cl->phi(); + // check for vectorized loops, some opts are no longer needed + if (cl->ignore_slp()) return false; + // Check loop body for tests of trip-counter plus loop-invariant vs // loop-invariant. for (uint i = 0; i < _body.size(); i++) { @@ -880,6 +898,8 @@ // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful // for unrolling loops with NO array accesses. bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const { + // check for vectorized loops, any peeling done was already applied + if (_head->is_CountedLoop() && _head->as_CountedLoop()->ignore_slp()) return false; for( uint i = 0; i < _body.size(); i++ ) if( _body[i]->is_Mem() ) --- old/src/share/vm/opto/loopUnswitch.cpp 2015-06-25 14:11:30.643727100 -0700 +++ new/src/share/vm/opto/loopUnswitch.cpp 2015-06-25 14:11:30.433706100 -0700 @@ -61,6 +61,12 @@ if (!_head->is_Loop()) { return false; } + + // check for vectorized loops, any unswitching was already applied + if (_head->is_CountedLoop() && _head->as_CountedLoop()->ignore_slp()) { + return false; + } + int nodes_left = phase->C->max_node_limit() - phase->C->live_nodes(); if ((int)(2 * _body.size()) > nodes_left) { return false; // Too speculative if running low on nodes. --- old/src/share/vm/opto/loopnode.cpp 2015-06-25 14:11:31.918854600 -0700 +++ new/src/share/vm/opto/loopnode.cpp 2015-06-25 14:11:31.718834600 -0700 @@ -2317,7 +2317,11 @@ // Reassociate invariants and prep for split_thru_phi for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { IdealLoopTree* lpt = iter.current(); - if (!lpt->is_counted() || !lpt->is_inner()) continue; + bool is_counted = lpt->is_counted(); + if (!is_counted || !lpt->is_inner()) continue; + + // check for vectorized loops, any reassociation of invariants was already done + if (is_counted && lpt->_head->as_CountedLoop()->ignore_slp()) continue; lpt->reassociate_invariants(this); --- old/src/share/vm/opto/loopnode.hpp 2015-06-25 14:11:33.299992700 -0700 +++ new/src/share/vm/opto/loopnode.hpp 2015-06-25 14:11:33.103973100 -0700 @@ -64,7 +64,9 @@ PartialPeelLoop=32, PartialPeelFailed=64, HasReductions=128, - PassedSlpAnalysis=256 }; + WasSlpAnalyzed=256, + PassedSlpAnalysis=512, + NoMoreSlp=1024 }; char _unswitch_count; enum { _unswitch_max=3 }; @@ -80,7 +82,9 @@ int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; } void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; } void mark_has_reductions() { _loop_flags |= HasReductions; } + void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; } void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; } + void mark_no_slp() { _loop_flags |= NoMoreSlp; } int unswitch_max() { return _unswitch_max; } int unswitch_count() { return _unswitch_count; } @@ -212,7 +216,9 @@ int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; } + int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; } int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; } + int ignore_slp () const { return (_loop_flags&NoMoreSlp) == NoMoreSlp; } int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; } @@ -235,6 +241,9 @@ void set_nonexact_trip_count() { _loop_flags &= ~HasExactTripCount; } + void set_notpassed_slp() { + _loop_flags &= ~PassedSlpAnalysis; + } void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; } float profile_trip_cnt() { return _profile_trip_cnt; } --- old/src/share/vm/opto/superword.cpp 2015-06-25 14:11:34.655128200 -0700 +++ new/src/share/vm/opto/superword.cpp 2015-06-25 14:11:34.456108300 -0700 @@ -100,6 +100,10 @@ return; } + // We only re-enter slp when we vector mapped a queried loop and we want to + // continue unrolling, in this case, slp is not subsequently done. + if (cl->ignore_slp()) return; + // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) CountedLoopEndNode* pre_end = get_pre_loop_end(cl); if (pre_end == NULL) return; @@ -121,12 +125,13 @@ } //------------------------------early unrolling analysis------------------------------ -void SuperWord::unrolling_analysis(CountedLoopNode *cl, int &local_loop_unroll_factor) { +void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) { bool is_slp = true; ResourceMark rm; size_t ignored_size = lpt()->_body.size(); int *ignored_loop_nodes = NEW_RESOURCE_ARRAY(int, ignored_size); Node_Stack nstack((int)ignored_size); + CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); Node *cl_exit = cl->loopexit(); // First clear the entries @@ -241,13 +246,9 @@ // If a max vector exists which is not larger than _local_loop_unroll_factor // stop looking, we already have the max vector to map to. - if (cur_max_vector <= local_loop_unroll_factor) { + if (cur_max_vector < local_loop_unroll_factor) { is_slp = false; -#ifndef PRODUCT - if (TraceSuperWordLoopUnrollAnalysis) { - tty->print_cr("slp analysis fails: unroll limit equals max vector\n"); - } -#endif + NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis fails: unroll limit greater than max vector\n")); break; } @@ -260,8 +261,9 @@ } if (is_slp) { local_loop_unroll_factor = max_vector; + cl->mark_passed_slp(); } - cl->mark_passed_slp(); + cl->mark_was_slp(); cl->set_slp_max_unroll(local_loop_unroll_factor); } } @@ -1750,7 +1752,9 @@ } Compile* C = _phase->C; + CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); uint max_vlen_in_bytes = 0; + uint max_vlen = 0; for (int i = 0; i < _block.length(); i++) { Node* n = _block.at(i); Node_List* p = my_pack(n); @@ -1833,6 +1837,7 @@ _igvn._worklist.push(vn); if (vlen_in_bytes > max_vlen_in_bytes) { + max_vlen = vlen; max_vlen_in_bytes = vlen_in_bytes; } #ifdef ASSERT @@ -1844,6 +1849,18 @@ } } C->set_max_vector_size(max_vlen_in_bytes); + if (SuperWordLoopUnrollAnalysis) { + if (cl->has_passed_slp()) { + int slp_max_unroll_factor = cl->slp_max_unroll(); + if (slp_max_unroll_factor == max_vlen) { + NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte)); + // For atomic unrolled loops which are vector mapped, instigate more unrolling. + cl->set_notpassed_slp(); + C->set_major_progress(); + cl->mark_no_slp(); + } + } + } } //------------------------------vector_opd--------------------------- --- old/src/share/vm/opto/superword.hpp 2015-06-25 14:11:36.145277200 -0700 +++ new/src/share/vm/opto/superword.hpp 2015-06-25 14:11:35.939256600 -0700 @@ -241,7 +241,7 @@ void transform_loop(IdealLoopTree* lpt, bool do_optimization); - void unrolling_analysis(CountedLoopNode *cl, int &local_loop_unroll_factor); + void unrolling_analysis(int &local_loop_unroll_factor); // Accessors for SWPointer PhaseIdealLoop* phase() { return _phase; }