< prev index next >

src/share/vm/opto/loopnode.hpp

Print this page
rev 8885 : SIMD: starting support for safe loop modification in SW - added PhaseIdealLoop::create_reserve_version_of_loop.
Ugly, but works OK (no support yet for loop reverse)
rev 8886 : SIMD: added BoolNode bol_ne - now BoolNode bol_eq may be subsumed by bol_ne and therefore loop will be switched to the reserved copy.
rev 8887 : SIMD: class LoopReserveKit created. Both directions are tested, in this checkin
switch_to_reserved() is called in SuperWord::output just for testing reverse to the original loop.
rev 8888 : SIMD: big comment how to use LoopReserveKit
rev 8926 : Merge
rev 8927 : SIMD: added
        assert(bol->is_Bool(), "should be BoolNode - too late to bail out!");
        if (do_reserve_copy() && lk._has_reserved && !bol->is_Bool()) {
          goto output_error;
        }
rev 8930 : SIMD: cleanup - trailing spaces, tabs
rev 8931 : SIMD: version of LoopReserveKit that does not require goto:
it's dtor is making switching to the modified copy of the loop (switch_to_reserved),
if use_new() has been ever called, otherwise dtor does nothing and then
by default the graph remains in it's cloned copy.
NOTE, that superword.cpp:2106 includes goto output_error which unconditionally
(for testing purpose) switches control to the reserved copy.
NOTE: code is dirty, need cleaning. But works.
NOTE: it still has modification of BoolNode. Way to replace it with ConINode.
rev 8933 : SIMD: added LoopReserveKit::_active, cleanup
rev 8935 : SIMD: now PhaseIdealLoop::create_reserve_version_of_loop builds graph
intcon(1)->If, skipping creation of Cmp and Bool.
NOTE: LoopReserveKit::create_reserve need more sanity checks
NOTE: code needs cleanup
rev 8937 : SIMD: added option DoReserveCopyInSuperWordTest for testing switching to reversed copy.
Much better functionality description of LoopReserveKit in loopnode.hpp
Cleanup in loopUnswitch.cpp
rev 8938 : SIMD: some functions are renamed, some cleanup
rev 9034 : SIMD: merged with class CountedLoopReserveKit.
rev 9035 : SIMD: trimming tailing spaces
rev 9036 : SIMD: comment correction
rev 9039 : Merge
rev 9093 : Merge
rev 9094 : SIMD: fixing kvn comments to Re: RFR(M): 8136725 Provide utility for creation a counted loop reserve copy (clone)
rev 9095 : SIMD: fixing kvn comments (2) to Re: RFR(M): 8136725 Provide utility for creation a counted loop reserve copy (clone)
rev 9101 : Merge


  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_OPTO_LOOPNODE_HPP
  26 #define SHARE_VM_OPTO_LOOPNODE_HPP
  27 
  28 #include "opto/cfgnode.hpp"
  29 #include "opto/multnode.hpp"
  30 #include "opto/phaseX.hpp"
  31 #include "opto/subnode.hpp"
  32 #include "opto/type.hpp"
  33 
  34 class CmpNode;
  35 class CountedLoopEndNode;
  36 class CountedLoopNode;
  37 class IdealLoopTree;
  38 class LoopNode;
  39 class Node;
  40 class PhaseIdealLoop;

  41 class VectorSet;
  42 class Invariance;
  43 struct small_cache;
  44 
  45 //
  46 //                  I D E A L I Z E D   L O O P S
  47 //
  48 // Idealized loops are the set of loops I perform more interesting
  49 // transformations on, beyond simple hoisting.
  50 
  51 //------------------------------LoopNode---------------------------------------
  52 // Simple loop header.  Fall in path on left, loop-back path on right.
  53 class LoopNode : public RegionNode {
  54   // Size is bigger to hold the flags.  However, the flags do not change
  55   // the semantics so it does not appear in the hash & cmp functions.
  56   virtual uint size_of() const { return sizeof(*this); }
  57 protected:
  58   short _loop_flags;
  59   // Names for flag bitfields
  60   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,


 512   bool is_loop()    { return !_irreducible && _tail && !_tail->is_top(); }
 513   bool is_inner()   { return is_loop() && _child == NULL; }
 514   bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); }
 515 
 516   void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase);
 517 
 518 #ifndef PRODUCT
 519   void dump_head( ) const;      // Dump loop head only
 520   void dump() const;            // Dump this loop recursively
 521   void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
 522 #endif
 523 
 524 };
 525 
 526 // -----------------------------PhaseIdealLoop---------------------------------
 527 // Computes the mapping from Nodes to IdealLoopTrees.  Organizes IdealLoopTrees into a
 528 // loop tree.  Drives the loop-based transformations on the ideal graph.
 529 class PhaseIdealLoop : public PhaseTransform {
 530   friend class IdealLoopTree;
 531   friend class SuperWord;


 532   // Pre-computed def-use info
 533   PhaseIterGVN &_igvn;
 534 
 535   // Head of loop tree
 536   IdealLoopTree *_ltree_root;
 537 
 538   // Array of pre-order numbers, plus post-visited bit.
 539   // ZERO for not pre-visited.  EVEN for pre-visited but not post-visited.
 540   // ODD for post-visited.  Other bits are the pre-order number.
 541   uint *_preorders;
 542   uint _max_preorder;
 543 
 544   const PhaseIdealLoop* _verify_me;
 545   bool _verify_only;
 546 
 547   // Allocate _preorders[] array
 548   void allocate_preorders() {
 549     _max_preorder = C->unique()+8;
 550     _preorders = NEW_RESOURCE_ARRAY(uint, _max_preorder);
 551     memset(_preorders, 0, sizeof(uint) * _max_preorder);


 948   void eliminate_useless_predicates();
 949 
 950   // Change the control input of expensive nodes to allow commoning by
 951   // IGVN when it is guaranteed to not result in a more frequent
 952   // execution of the expensive node. Return true if progress.
 953   bool process_expensive_nodes();
 954 
 955   // Check whether node has become unreachable
 956   bool is_node_unreachable(Node *n) const {
 957     return !has_node(n) || n->is_unreachable(_igvn);
 958   }
 959 
 960   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
 961   void do_range_check( IdealLoopTree *loop, Node_List &old_new );
 962 
 963   // Create a slow version of the loop by cloning the loop
 964   // and inserting an if to select fast-slow versions.
 965   ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
 966                                         Node_List &old_new);
 967 










 968   // Clone loop with an invariant test (that does not exit) and
 969   // insert a clone of the test that selects which version to
 970   // execute.
 971   void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
 972 
 973   // Find candidate "if" for unswitching
 974   IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
 975 
 976   // Range Check Elimination uses this function!
 977   // Constrain the main loop iterations so the affine function:
 978   //    low_limit <= scale_con * I + offset  <  upper_limit
 979   // always holds true.  That is, either increase the number of iterations in
 980   // the pre-loop or the post-loop until the condition holds true in the main
 981   // loop.  Scale_con, offset and limit are all loop invariant.
 982   void add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit );
 983   // Helper function for add_constraint().
 984   Node* adjust_limit( int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl );
 985 
 986   // Partially peel loop up through last_peel node.
 987   bool partial_peel( IdealLoopTree *loop, Node_List &old_new );


1099 #ifdef ASSERT
1100   void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA);
1101 #endif
1102 
1103 #ifndef PRODUCT
1104   void dump( ) const;
1105   void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const;
1106   void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const;
1107   void verify() const;          // Major slow  :-)
1108   void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const;
1109   IdealLoopTree *get_loop_idx(Node* n) const {
1110     // Dead nodes have no loop, so return the top level loop instead
1111     return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root;
1112   }
1113   // Print some stats
1114   static void print_statistics();
1115   static int _loop_invokes;     // Count of PhaseIdealLoop invokes
1116   static int _loop_work;        // Sum of PhaseIdealLoop x _unique
1117 #endif
1118 };






























































1119 
1120 inline Node* IdealLoopTree::tail() {
1121 // Handle lazy update of _tail field
1122   Node *n = _tail;
1123   //while( !n->in(0) )  // Skip dead CFG nodes
1124     //n = n->in(1);
1125   if (n->in(0) == NULL)
1126     n = _phase->get_ctrl(n);
1127   _tail = n;
1128   return n;
1129 }
1130 
1131 
1132 // Iterate over the loop tree using a preorder, left-to-right traversal.
1133 //
1134 // Example that visits all counted loops from within PhaseIdealLoop
1135 //
1136 //  for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
1137 //   IdealLoopTree* lpt = iter.current();
1138 //   if (!lpt->is_counted()) continue;


  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_OPTO_LOOPNODE_HPP
  26 #define SHARE_VM_OPTO_LOOPNODE_HPP
  27 
  28 #include "opto/cfgnode.hpp"
  29 #include "opto/multnode.hpp"
  30 #include "opto/phaseX.hpp"
  31 #include "opto/subnode.hpp"
  32 #include "opto/type.hpp"
  33 
  34 class CmpNode;
  35 class CountedLoopEndNode;
  36 class CountedLoopNode;
  37 class IdealLoopTree;
  38 class LoopNode;
  39 class Node;
  40 class PhaseIdealLoop;
  41 class CountedLoopReserveKit;
  42 class VectorSet;
  43 class Invariance;
  44 struct small_cache;
  45 
  46 //
  47 //                  I D E A L I Z E D   L O O P S
  48 //
  49 // Idealized loops are the set of loops I perform more interesting
  50 // transformations on, beyond simple hoisting.
  51 
  52 //------------------------------LoopNode---------------------------------------
  53 // Simple loop header.  Fall in path on left, loop-back path on right.
  54 class LoopNode : public RegionNode {
  55   // Size is bigger to hold the flags.  However, the flags do not change
  56   // the semantics so it does not appear in the hash & cmp functions.
  57   virtual uint size_of() const { return sizeof(*this); }
  58 protected:
  59   short _loop_flags;
  60   // Names for flag bitfields
  61   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,


 513   bool is_loop()    { return !_irreducible && _tail && !_tail->is_top(); }
 514   bool is_inner()   { return is_loop() && _child == NULL; }
 515   bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); }
 516 
 517   void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase);
 518 
 519 #ifndef PRODUCT
 520   void dump_head( ) const;      // Dump loop head only
 521   void dump() const;            // Dump this loop recursively
 522   void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
 523 #endif
 524 
 525 };
 526 
 527 // -----------------------------PhaseIdealLoop---------------------------------
 528 // Computes the mapping from Nodes to IdealLoopTrees.  Organizes IdealLoopTrees into a
 529 // loop tree.  Drives the loop-based transformations on the ideal graph.
 530 class PhaseIdealLoop : public PhaseTransform {
 531   friend class IdealLoopTree;
 532   friend class SuperWord;
 533   friend class CountedLoopReserveKit;
 534 
 535   // Pre-computed def-use info
 536   PhaseIterGVN &_igvn;
 537 
 538   // Head of loop tree
 539   IdealLoopTree *_ltree_root;
 540 
 541   // Array of pre-order numbers, plus post-visited bit.
 542   // ZERO for not pre-visited.  EVEN for pre-visited but not post-visited.
 543   // ODD for post-visited.  Other bits are the pre-order number.
 544   uint *_preorders;
 545   uint _max_preorder;
 546 
 547   const PhaseIdealLoop* _verify_me;
 548   bool _verify_only;
 549 
 550   // Allocate _preorders[] array
 551   void allocate_preorders() {
 552     _max_preorder = C->unique()+8;
 553     _preorders = NEW_RESOURCE_ARRAY(uint, _max_preorder);
 554     memset(_preorders, 0, sizeof(uint) * _max_preorder);


 951   void eliminate_useless_predicates();
 952 
 953   // Change the control input of expensive nodes to allow commoning by
 954   // IGVN when it is guaranteed to not result in a more frequent
 955   // execution of the expensive node. Return true if progress.
 956   bool process_expensive_nodes();
 957 
 958   // Check whether node has become unreachable
 959   bool is_node_unreachable(Node *n) const {
 960     return !has_node(n) || n->is_unreachable(_igvn);
 961   }
 962 
 963   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
 964   void do_range_check( IdealLoopTree *loop, Node_List &old_new );
 965 
 966   // Create a slow version of the loop by cloning the loop
 967   // and inserting an if to select fast-slow versions.
 968   ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
 969                                         Node_List &old_new);
 970 
 971   // Clone a loop and return the clone head (clone_loop_head).
 972   // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
 973   // This routine was created for usage in CountedLoopReserveKit.
 974   //
 975   //    int(1) -> If -> IfTrue -> original_loop_head
 976   //              |
 977   //              V
 978   //           IfFalse -> clone_loop_head (returned by function pointer)
 979   //
 980   LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
 981   // Clone loop with an invariant test (that does not exit) and
 982   // insert a clone of the test that selects which version to
 983   // execute.
 984   void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
 985 
 986   // Find candidate "if" for unswitching
 987   IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
 988 
 989   // Range Check Elimination uses this function!
 990   // Constrain the main loop iterations so the affine function:
 991   //    low_limit <= scale_con * I + offset  <  upper_limit
 992   // always holds true.  That is, either increase the number of iterations in
 993   // the pre-loop or the post-loop until the condition holds true in the main
 994   // loop.  Scale_con, offset and limit are all loop invariant.
 995   void add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit );
 996   // Helper function for add_constraint().
 997   Node* adjust_limit( int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl );
 998 
 999   // Partially peel loop up through last_peel node.
1000   bool partial_peel( IdealLoopTree *loop, Node_List &old_new );


1112 #ifdef ASSERT
1113   void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA);
1114 #endif
1115 
1116 #ifndef PRODUCT
1117   void dump( ) const;
1118   void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const;
1119   void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const;
1120   void verify() const;          // Major slow  :-)
1121   void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const;
1122   IdealLoopTree *get_loop_idx(Node* n) const {
1123     // Dead nodes have no loop, so return the top level loop instead
1124     return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root;
1125   }
1126   // Print some stats
1127   static void print_statistics();
1128   static int _loop_invokes;     // Count of PhaseIdealLoop invokes
1129   static int _loop_work;        // Sum of PhaseIdealLoop x _unique
1130 #endif
1131 };
1132 
1133 // This kit may be used for making of a reserved copy of a loop before this loop
1134 //  goes under non-reversible changes.
1135 //
1136 // Function create_reserve() creates a reserved copy (clone) of the loop.
1137 // The reserved copy is created by calling
1138 // PhaseIdealLoop::create_reserve_version_of_loop - see there how
1139 // the original and reserved loops are connected in the outer graph.
1140 // If create_reserve succeeded, it returns 'true' and _has_reserved is set to 'true'.
1141 //
1142 // By default the reserved copy (clone) of the loop is created as dead code - it is
1143 // dominated in the outer loop by this node chain:
1144 //   intcon(1)->If->IfFalse->reserved_copy.
1145 // The original loop is dominated by the the same node chain but IfTrue projection:
1146 //   intcon(0)->If->IfTrue->original_loop.
1147 //
1148 // In this implementation of CountedLoopReserveKit the ctor includes create_reserve()
1149 // and the dtor, checks _use_new value.
1150 // If _use_new == false, it "switches" control to reserved copy of the loop
1151 // by simple replacing of node intcon(1) with node intcon(0).
1152 //
1153 // Here is a proposed example of usage (see also SuperWord::output in superword.cpp).
1154 //
1155 // void CountedLoopReserveKit_example()
1156 // {
1157 //    CountedLoopReserveKit lrk((phase, lpt, DoReserveCopy = true); // create local object
1158 //    if (DoReserveCopy && !lrk.has_reserved()) {
1159 //      return; //failed to create reserved loop copy
1160 //    }
1161 //    ...
1162 //    //something is wrong, switch to original loop
1163 ///   if(something_is_wrong) return; // ~CountedLoopReserveKit makes the switch
1164 //    ...
1165 //    //everything worked ok, return with the newly modified loop
1166 //    lrk.use_new();
1167 //    return; // ~CountedLoopReserveKit does nothing once use_new() was called
1168 //  }
1169 //
1170 // Keep in mind, that by default if create_reserve() is not followed by use_new()
1171 // the dtor will "switch to the original" loop.
1172 // NOTE. You you modify outside of the original loop this class is no help.
1173 //
1174 class CountedLoopReserveKit {
1175   private:
1176     PhaseIdealLoop* _phase;
1177     IdealLoopTree*  _lpt;
1178     LoopNode*       _lp;
1179     IfNode*         _iff;
1180     LoopNode*       _lp_reserved;
1181     bool            _has_reserved;
1182     bool            _use_new;
1183     const bool      _active; //may be set to false in ctor, then the object is dummy
1184 
1185   public:
1186     CountedLoopReserveKit(PhaseIdealLoop* phase, IdealLoopTree *loop, bool active);
1187     ~CountedLoopReserveKit();
1188     void use_new()                {_use_new = true;}
1189     void set_iff(IfNode* x)       {_iff = x;}
1190     bool has_reserved()     const { return _active && _has_reserved;}
1191   private:
1192     bool create_reserve();
1193 };// class CountedLoopReserveKit
1194 
1195 inline Node* IdealLoopTree::tail() {
1196 // Handle lazy update of _tail field
1197   Node *n = _tail;
1198   //while( !n->in(0) )  // Skip dead CFG nodes
1199     //n = n->in(1);
1200   if (n->in(0) == NULL)
1201     n = _phase->get_ctrl(n);
1202   _tail = n;
1203   return n;
1204 }
1205 
1206 
1207 // Iterate over the loop tree using a preorder, left-to-right traversal.
1208 //
1209 // Example that visits all counted loops from within PhaseIdealLoop
1210 //
1211 //  for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
1212 //   IdealLoopTree* lpt = iter.current();
1213 //   if (!lpt->is_counted()) continue;
< prev index next >