--- old/src/share/vm/opto/c2_globals.hpp 2015-10-05 17:22:14.454465000 -0700 +++ new/src/share/vm/opto/c2_globals.hpp 2015-10-05 17:22:14.321938500 -0700 @@ -326,6 +326,9 @@ product(bool, SuperWordReductions, true, \ "Enable reductions support in superword.") \ \ + product(bool, DoReserveCopyInSuperWord, true, \ + "Create reserve copy of graph in SuperWord.") \ + \ notproduct(bool, TraceSuperWord, false, \ "Trace superword transforms") \ \ --- old/src/share/vm/opto/loopUnswitch.cpp 2015-10-05 17:22:15.835741200 -0700 +++ new/src/share/vm/opto/loopUnswitch.cpp 2015-10-05 17:22:15.683710800 -0700 @@ -263,3 +263,136 @@ return iffast; } + +LoopNode* PhaseIdealLoop::create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk) { + Node_List old_new; + LoopNode* head = loop->_head->as_Loop(); + bool counted_loop = head->is_CountedLoop(); + Node* entry = head->in(LoopNode::EntryControl); + _igvn.rehash_node_delayed(entry); + IdealLoopTree* outer_loop = loop->_parent; + + ConINode* const_1 = _igvn.intcon(1); + set_ctrl(const_1, C->root()); + IfNode* iff = new IfNode(entry, const_1, PROB_MAX, COUNT_UNKNOWN); + register_node(iff, outer_loop, entry, dom_depth(entry)); + ProjNode* iffast = new IfTrueNode(iff); + register_node(iffast, outer_loop, iff, dom_depth(iff)); + ProjNode* ifslow = new IfFalseNode(iff); + register_node(ifslow, outer_loop, iff, dom_depth(iff)); + + // Clone the loop body. The clone becomes the fast loop. The + // original pre-header will (illegally) have 3 control users + // (old & new loops & new if). + clone_loop(loop, old_new, dom_depth(head), iff); + assert(old_new[head->_idx]->is_Loop(), "" ); + + LoopNode* slow_head = old_new[head->_idx]->as_Loop(); + +#ifndef PRODUCT + if (TraceLoopOpts) { + tty->print_cr("PhaseIdealLoop::create_reserve_version_of_loop:"); + tty->print("\t iff = %d, ", iff->_idx); iff->dump(); + tty->print("\t iffast = %d, ", iffast->_idx); iffast->dump(); + tty->print("\t ifslow = %d, ", ifslow->_idx); ifslow->dump(); + tty->print("\t before replace_input_of: head = %d, ", head->_idx); head->dump(); + tty->print("\t before replace_input_of: slow_head = %d, ", slow_head->_idx); slow_head->dump(); + } +#endif + + // Fast (true) control + _igvn.replace_input_of(head, LoopNode::EntryControl, iffast); + // Slow (false) control + _igvn.replace_input_of(slow_head, LoopNode::EntryControl, ifslow); + + recompute_dom_depth(); + + lk->set_iff(iff); + +#ifndef PRODUCT + if (TraceLoopOpts ) { + tty->print("\t after replace_input_of: head = %d, ", head->_idx); head->dump(); + tty->print("\t after replace_input_of: slow_head = %d, ", slow_head->_idx); slow_head->dump(); + } +#endif + + return slow_head->as_Loop(); +} + +CountedLoopReserveKit::CountedLoopReserveKit(PhaseIdealLoop* phase, IdealLoopTree *loop, bool active = true) : + _phase(phase), + _lpt(loop), + _lp(NULL), + _iff(NULL), + _lp_reserved(NULL), + _has_reserved(false), + _use_new(false), + _active(active) + { + create_reserve(); + }; + +CountedLoopReserveKit::~CountedLoopReserveKit() { + if (!_active) { + return; + } + + if (_has_reserved && !_use_new) { + // intcon(0)->iff-node reverts CF to the reserved copy + ConINode* const_0 = _phase->_igvn.intcon(0); + _phase->set_ctrl(const_0, _phase->C->root()); + _iff->set_req(1, const_0); + + #ifndef PRODUCT + if (TraceLoopOpts) { + tty->print_cr("CountedLoopReserveKit::~CountedLoopReserveKit()"); + tty->print("\t discard loop %d and revert to the reserved loop clone %d: ", _lp->_idx, _lp_reserved->_idx); + _lp_reserved->dump(); + } + #endif + } +} + +bool CountedLoopReserveKit::create_reserve() { + if (!_active) { + return false; + } + + if(!_lpt->_head->is_CountedLoop()) { + NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not counted loop", _lpt->_head->_idx);}) + return false; + } + CountedLoopNode *cl = _lpt->_head->as_CountedLoop(); + if (!cl->is_valid_counted_loop()) { + NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not valid counted loop", cl->_idx);}) + return false; // skip malformed counted loop + } + if (!cl->is_main_loop()) { + NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not main loop", cl->_idx);}) + return false; // skip normal, pre, and post loops + } + + _lp = _lpt->_head->as_Loop(); + _lp_reserved = _phase->create_reserve_version_of_loop(_lpt, this); + + if (!_lp_reserved->is_CountedLoop()) { + return false; + } + + Node* ifslow_pred = _lp_reserved->as_CountedLoop()->in(LoopNode::EntryControl); + + if (!ifslow_pred->is_IfFalse()) { + return false; + } + + Node* iff = ifslow_pred->in(0); + if (!iff->is_If() || iff != _iff) { + return false; + } + + if (iff->in(1)->Opcode() != Op_ConI) { + return false; + } + + return _has_reserved = true; +} --- old/src/share/vm/opto/loopnode.hpp 2015-10-05 17:22:17.203014600 -0700 +++ new/src/share/vm/opto/loopnode.hpp 2015-10-05 17:22:17.072488500 -0700 @@ -38,6 +38,7 @@ class LoopNode; class Node; class PhaseIdealLoop; +class CountedLoopReserveKit; class VectorSet; class Invariance; struct small_cache; @@ -529,6 +530,8 @@ class PhaseIdealLoop : public PhaseTransform { friend class IdealLoopTree; friend class SuperWord; + friend class CountedLoopReserveKit; + // Pre-computed def-use info PhaseIterGVN &_igvn; @@ -965,6 +968,16 @@ ProjNode* create_slow_version_of_loop(IdealLoopTree *loop, Node_List &old_new); + // Clone a loop and return the clone head (clone_loop_head). + // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse, + // This routine was created for usage in CountedLoopReserveKit. + // + // int(1) -> If -> IfTrue -> original_loop_head + // | + // V + // IfFalse -> clone_loop_head (returned by function pointer) + // + LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk); // Clone loop with an invariant test (that does not exit) and // insert a clone of the test that selects which version to // execute. @@ -1117,6 +1130,68 @@ #endif }; +// This kit may be used for making of a reserved copy of a loop before this loop +// goes under non-reversible changes. +// +// Function create_reserve() creates a reserved copy (clone) of the loop. +// The reserved copy is created by calling +// PhaseIdealLoop::create_reserve_version_of_loop - see there how +// the original and reserved loops are connected in the outer graph. +// If create_reserve succeeded, it returns 'true' and _has_reserved is set to 'true'. +// +// By default the reserved copy (clone) of the loop is created as dead code - it is +// dominated in the outer loop by this node chain: +// intcon(1)->If->IfFalse->reserved_copy. +// The original loop is dominated by the the same node chain but IfTrue projection: +// intcon(1)->If->IfTrue->original_loop. +// +// In this implementation of CountedLoopReserveKit the ctor includes create_reserve() +// and the dtor, checks _use_new value. +// If _use_new == false, it "switches" control to reserved copy of the loop +// by simple replacing of node intcon(1) with node intcon(0). +// +// Here is a proposed example of usage (see also SuperWord::output in superword.cpp). +// +// void CountedLoopReserveKit_example() +// { +// CountedLoopReserveKit lrk((phase, lpt, DoReserveCopy = true); // create local object +// if (DoReserveCopy && !lrk.has_reserved()) { +// return; //failed to create reserved loop copy +// } +// ... +// //something is wrong, switch to original loop +/// if(something_is_wrong) return; // ~CountedLoopReserveKit makes the switch +// ... +// //everything worked ok, return with the newly modified loop +// lrk.use_new(); +// return; // ~CountedLoopReserveKit does nothing once use_new() was called +// } +// +// Keep in mind, that by default if create_reserve() is not followed by use_new() +// the dtor will "switch to the original" loop. +// NOTE. You you modify outside of the original loop this class is no help. +// +class CountedLoopReserveKit { + private: + PhaseIdealLoop* _phase; + IdealLoopTree* _lpt; + LoopNode* _lp; + IfNode* _iff; + LoopNode* _lp_reserved; + bool _has_reserved; + bool _use_new; + const bool _active; //may be set to false in ctor, then the object is dummy + + public: + CountedLoopReserveKit(PhaseIdealLoop* phase, IdealLoopTree *loop, bool active); + ~CountedLoopReserveKit(); + void use_new() {_use_new = true;} + void set_iff(IfNode* x) {_iff = x;} + bool has_reserved() const { return _active && _has_reserved;} + private: + bool create_reserve(); +};// class CountedLoopReserveKit + inline Node* IdealLoopTree::tail() { // Handle lazy update of _tail field Node *n = _tail; --- old/src/share/vm/opto/superword.cpp 2015-10-05 17:22:18.596793300 -0700 +++ new/src/share/vm/opto/superword.cpp 2015-10-05 17:22:18.444262800 -0700 @@ -81,6 +81,10 @@ if (_phase->C->method() != NULL) { _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug); } + _CountedLoopReserveKit_debug = 0; + if (_phase->C->method() != NULL) { + _phase->C->method()->has_option_value("DoReserveCopyInSuperWordDebug", _CountedLoopReserveKit_debug); + } #endif } @@ -1763,6 +1767,22 @@ } } +#ifndef PRODUCT +void SuperWord::print_loop(bool whole) { + Node_Stack stack(_arena, _phase->C->unique() >> 2); + Node_List rpo_list; + VectorSet visited(_arena); + visited.set(lpt()->_head->_idx); + _phase->rpo(lpt()->_head, stack, visited, rpo_list); + _phase->dump(lpt(), rpo_list.size(), rpo_list ); + if(whole) { + tty->print_cr("\n Whole loop tree"); + _phase->dump(); + tty->print_cr(" End of whole loop tree\n"); + } +} +#endif + //------------------------------output--------------------------- // Convert packs into vector node operations void SuperWord::output() { @@ -1770,7 +1790,7 @@ #ifndef PRODUCT if (TraceLoopOpts) { - tty->print("SuperWord "); + tty->print("SuperWord::output "); lpt()->dump_head(); } #endif @@ -1789,6 +1809,18 @@ CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); uint max_vlen_in_bytes = 0; uint max_vlen = 0; + + NOT_PRODUCT(if(_CountedLoopReserveKit_debug > 0) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);}) + + CountedLoopReserveKit make_reversable(_phase, _lpt, DoReserveCopyInSuperWord); + + NOT_PRODUCT(if(_CountedLoopReserveKit_debug > 0) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);}) + + if (DoReserveCopyInSuperWord && !make_reversable.has_reserved()) { + NOT_PRODUCT({tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");}) + return; + } + for (int i = 0; i < _block.length(); i++) { Node* n = _block.at(i); Node_List* p = my_pack(n); @@ -1888,6 +1920,7 @@ } } C->set_max_vector_size(max_vlen_in_bytes); + if (SuperWordLoopUnrollAnalysis) { if (cl->has_passed_slp()) { uint slp_max_unroll_factor = cl->slp_max_unroll(); @@ -1900,6 +1933,12 @@ } } } + + if (DoReserveCopyInSuperWord) { + make_reversable.use_new(); + } + NOT_PRODUCT(if(_CountedLoopReserveKit_debug > 0) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);}) + return; } //------------------------------vector_opd--------------------------- --- old/src/share/vm/opto/superword.hpp 2015-10-05 17:22:20.122598400 -0700 +++ new/src/share/vm/opto/superword.hpp 2015-10-05 17:22:19.959565800 -0700 @@ -274,6 +274,7 @@ GrowableArray _ii_order; #ifndef PRODUCT uintx _vector_loop_debug; // provide more printing in debug mode + uintx _CountedLoopReserveKit_debug; // for debugging CountedLoopReserveKit #endif // Accessors @@ -350,6 +351,7 @@ // Tracing support #ifndef PRODUCT void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment); + void print_loop(bool whole); #endif // Find a memory reference to align the loop induction variable to. MemNode* find_align_to_ref(Node_List &memops);