hs10 Sdiff src/hotspot/share/opto

src/hotspot/share/opto/loopnode.hpp

  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_OPTO_LOOPNODE_HPP
  26 #define SHARE_VM_OPTO_LOOPNODE_HPP
  27 
  28 #include "opto/cfgnode.hpp"
  29 #include "opto/multnode.hpp"
  30 #include "opto/phaseX.hpp"
  31 #include "opto/subnode.hpp"
  32 #include "opto/type.hpp"
  33 
  34 class CmpNode;
  35 class CountedLoopEndNode;
  36 class CountedLoopNode;
  37 class IdealLoopTree;
  38 class LoopNode;
  39 class Node;

  40 class PhaseIdealLoop;
  41 class CountedLoopReserveKit;
  42 class VectorSet;
  43 class Invariance;
  44 struct small_cache;
  45 
  46 //
  47 //                  I D E A L I Z E D   L O O P S
  48 //
  49 // Idealized loops are the set of loops I perform more interesting
  50 // transformations on, beyond simple hoisting.
  51 
  52 //------------------------------LoopNode---------------------------------------
  53 // Simple loop header.  Fall in path on left, loop-back path on right.
  54 class LoopNode : public RegionNode {
  55   // Size is bigger to hold the flags.  However, the flags do not change
  56   // the semantics so it does not appear in the hash & cmp functions.
  57   virtual uint size_of() const { return sizeof(*this); }
  58 protected:
  59   short _loop_flags;
  60   // Names for flag bitfields
  61   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
  62          MainHasNoPreLoop=4,
  63          HasExactTripCount=8,
  64          InnerLoop=16,
  65          PartialPeelLoop=32,
  66          PartialPeelFailed=64,
  67          HasReductions=128,
  68          WasSlpAnalyzed=256,
  69          PassedSlpAnalysis=512,
  70          DoUnrollOnly=1024,
  71          VectorizedLoop=2048,
  72          HasAtomicPostLoop=4096,
  73          HasRangeChecks=8192,
  74          IsMultiversioned=16384};

  75   char _unswitch_count;
  76   enum { _unswitch_max=3 };
  77   char _postloop_flags;
  78   enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
  79 
  80 public:
  81   // Names for edge indices
  82   enum { Self=0, EntryControl, LoopBackControl };
  83 
  84   int is_inner_loop() const { return _loop_flags & InnerLoop; }
  85   void set_inner_loop() { _loop_flags |= InnerLoop; }
  86 
  87   int range_checks_present() const { return _loop_flags & HasRangeChecks; }
  88   int is_multiversioned() const { return _loop_flags & IsMultiversioned; }
  89   int is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
  90   int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
  91   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
  92   int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }

  93 
  94   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
  95   void mark_has_reductions() { _loop_flags |= HasReductions; }
  96   void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
  97   void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
  98   void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
  99   void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
 100   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
 101   void mark_has_range_checks() { _loop_flags |=  HasRangeChecks; }
 102   void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }


 103 
 104   int unswitch_max() { return _unswitch_max; }
 105   int unswitch_count() { return _unswitch_count; }
 106 
 107   int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
 108   void set_has_been_range_checked() { _postloop_flags |= LoopRCEChecked; }
 109   int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
 110   void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
 111 
 112   void set_unswitch_count(int val) {
 113     assert (val <= unswitch_max(), "too many unswitches");
 114     _unswitch_count = val;
 115   }
 116 
 117   LoopNode(Node *entry, Node *backedge) : RegionNode(3), _loop_flags(0), _unswitch_count(0), _postloop_flags(0) {
 118     init_class_id(Class_Loop);
 119     init_req(EntryControl, entry);
 120     init_req(LoopBackControl, backedge);
 121   }
 122 
 123   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
 124   virtual int Opcode() const;
 125   bool can_be_counted_loop(PhaseTransform* phase) const {
 126     return req() == 3 && in(0) != NULL &&
 127       in(1) != NULL && phase->type(in(1)) != Type::TOP &&
 128       in(2) != NULL && phase->type(in(2)) != Type::TOP;
 129   }
 130   bool is_valid_counted_loop() const;
 131 #ifndef PRODUCT
 132   virtual void dump_spec(outputStream *st) const;
 133 #endif










 134 };
 135 
 136 //------------------------------Counted Loops----------------------------------
 137 // Counted loops are all trip-counted loops, with exactly 1 trip-counter exit
 138 // path (and maybe some other exit paths).  The trip-counter exit is always
 139 // last in the loop.  The trip-counter have to stride by a constant;
 140 // the exit value is also loop invariant.
 141 
 142 // CountedLoopNodes and CountedLoopEndNodes come in matched pairs.  The
 143 // CountedLoopNode has the incoming loop control and the loop-back-control
 144 // which is always the IfTrue before the matching CountedLoopEndNode.  The
 145 // CountedLoopEndNode has an incoming control (possibly not the
 146 // CountedLoopNode if there is control flow in the loop), the post-increment
 147 // trip-counter value, and the limit.  The trip-counter value is always of
 148 // the form (Op old-trip-counter stride).  The old-trip-counter is produced
 149 // by a Phi connected to the CountedLoopNode.  The stride is constant.
 150 // The Op is any commutable opcode, including Add, Mul, Xor.  The
 151 // CountedLoopEndNode also takes in the loop-invariant limit value.
 152 
 153 // From a CountedLoopNode I can reach the matching CountedLoopEndNode via the

 215   // Match increment with optional truncation
 216   static Node* match_incr_with_optional_truncation(Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type);
 217 
 218   // A 'main' loop has a pre-loop and a post-loop.  The 'main' loop
 219   // can run short a few iterations and may start a few iterations in.
 220   // It will be RCE'd and unrolled and aligned.
 221 
 222   // A following 'post' loop will run any remaining iterations.  Used
 223   // during Range Check Elimination, the 'post' loop will do any final
 224   // iterations with full checks.  Also used by Loop Unrolling, where
 225   // the 'post' loop will do any epilog iterations needed.  Basically,
 226   // a 'post' loop can not profitably be further unrolled or RCE'd.
 227 
 228   // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
 229   // it may do under-flow checks for RCE and may do alignment iterations
 230   // so the following main loop 'knows' that it is striding down cache
 231   // lines.
 232 
 233   // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
 234   // Aligned, may be missing it's pre-loop.
 235   int is_normal_loop   () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
 236   int is_pre_loop      () const { return (_loop_flags&PreMainPostFlagsMask) == Pre;    }
 237   int is_main_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Main;   }
 238   int is_post_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Post;   }
 239   int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
 240   int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
 241   int has_passed_slp   () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
 242   int do_unroll_only      () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
 243   int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
 244   int has_atomic_post_loop  () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
 245   void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
 246 
 247   int main_idx() const { return _main_idx; }
 248 
 249 
 250   void set_pre_loop  (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
 251   void set_main_loop (                     ) { assert(is_normal_loop(),""); _loop_flags |= Main;                         }
 252   void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
 253   void set_normal_loop(                    ) { _loop_flags &= ~PreMainPostFlagsMask; }
 254 
 255   void set_trip_count(uint tc) { _trip_count = tc; }
 256   uint trip_count()            { return _trip_count; }
 257 
 258   bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
 259   void set_exact_trip_count(uint tc) {
 260     _trip_count = tc;
 261     _loop_flags |= HasExactTripCount;
 262   }
 263   void set_nonexact_trip_count() {
 264     _loop_flags &= ~HasExactTripCount;
 265   }
 266   void set_notpassed_slp() {
 267     _loop_flags &= ~PassedSlpAnalysis;
 268   }
 269 
 270   void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
 271   float profile_trip_cnt()             { return _profile_trip_cnt; }
 272 
 273   void double_unrolled_count() { _unrolled_count_log2++; }
 274   int  unrolled_count()        { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); }
 275 
 276   void set_node_count_before_unroll(int ct)  { _node_count_before_unroll = ct; }
 277   int  node_count_before_unroll()            { return _node_count_before_unroll; }
 278   void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; }
 279   int  slp_max_unroll() const                { return _slp_maximum_unroll_factor; }
 280 










 281 #ifndef PRODUCT
 282   virtual void dump_spec(outputStream *st) const;
 283 #endif
 284 };
 285 
 286 //------------------------------CountedLoopEndNode-----------------------------
 287 // CountedLoopEndNodes end simple trip counted loops.  They act much like
 288 // IfNodes.
 289 class CountedLoopEndNode : public IfNode {
 290 public:
 291   enum { TestControl, TestValue };
 292 
 293   CountedLoopEndNode( Node *control, Node *test, float prob, float cnt )
 294     : IfNode( control, test, prob, cnt) {
 295     init_class_id(Class_CountedLoopEnd);
 296   }
 297   virtual int Opcode() const;
 298 
 299   Node *cmp_node() const            { return (in(TestValue)->req() >=2) ? in(TestValue)->in(1) : NULL; }
 300   Node *incr() const                { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }

 763     _nodes.map(old_node->_idx, (Node*)((intptr_t)new_node + 1));
 764   }
 765   void lazy_replace(Node *old_node, Node *new_node) {
 766     _igvn.replace_node(old_node, new_node);
 767     lazy_update(old_node, new_node);
 768   }
 769 
 770 private:
 771 
 772   // Place 'n' in some loop nest, where 'n' is a CFG node
 773   void build_loop_tree();
 774   int build_loop_tree_impl( Node *n, int pre_order );
 775   // Insert loop into the existing loop tree.  'innermost' is a leaf of the
 776   // loop tree, not the root.
 777   IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost );
 778 
 779   // Place Data nodes in some loop nest
 780   void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
 781   void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
 782   void build_loop_late_post ( Node* n );

 783 
 784   // Array of immediate dominance info for each CFG node indexed by node idx
 785 private:
 786   uint _idom_size;
 787   Node **_idom;                 // Array of immediate dominators
 788   uint *_dom_depth;           // Used for fast LCA test
 789   GrowableArray<uint>* _dom_stk; // For recomputation of dom depth
 790 
 791   Node* idom_no_update(Node* d) const {
 792     assert(d->_idx < _idom_size, "oob");
 793     Node* n = _idom[d->_idx];
 794     assert(n != NULL,"Bad immediate dominator info.");
 795     while (n->in(0) == NULL) {  // Skip dead CFG nodes
 796       //n = n->in(1);
 797       n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1);
 798       assert(n != NULL,"Bad immediate dominator info.");
 799     }
 800     return n;
 801   }
 802   Node *idom(Node* d) const {

 860     _dom_lca_tags(arena()), // Thread::resource_area
 861     _verify_me(verify_me),
 862     _verify_only(false) {
 863     build_and_optimize(false, false);
 864   }
 865 
 866   // Build and verify the loop tree without modifying the graph.  This
 867   // is useful to verify that all inputs properly dominate their uses.
 868   static void verify(PhaseIterGVN& igvn) {
 869 #ifdef ASSERT
 870     PhaseIdealLoop v(igvn);
 871 #endif
 872   }
 873 
 874   // True if the method has at least 1 irreducible loop
 875   bool _has_irreducible_loops;
 876 
 877   // Per-Node transform
 878   virtual Node *transform( Node *a_node ) { return 0; }
 879 
 880   bool is_counted_loop( Node *x, IdealLoopTree *loop );



 881 
 882   Node* exact_limit( IdealLoopTree *loop );
 883 
 884   // Return a post-walked LoopNode
 885   IdealLoopTree *get_loop( Node *n ) const {
 886     // Dead nodes have no loop, so return the top level loop instead
 887     if (!has_node(n))  return _ltree_root;
 888     assert(!has_ctrl(n), "");
 889     return (IdealLoopTree*)_nodes[n->_idx];
 890   }
 891 
 892   // Is 'n' a (nested) member of 'loop'?
 893   int is_member( const IdealLoopTree *loop, Node *n ) const {
 894     return loop->is_member(get_loop(n)); }
 895 
 896   // This is the basic building block of the loop optimizations.  It clones an
 897   // entire loop body.  It makes an old_new loop body mapping; with this
 898   // mapping you can find the new-loop equivalent to an old-loop node.  All
 899   // new-loop nodes are exactly equal to their old-loop counterparts, all
 900   // edges are the same.  All exits from the old-loop now have a RegionNode
 901   // that merges the equivalent new-loop path.  This is true even for the
 902   // normal "loop-exit" condition.  All uses of loop-invariant old-loop values
 903   // now come from (one or more) Phis that merge their new-loop equivalents.
 904   // Parameter side_by_side_idom:
 905   //   When side_by_size_idom is NULL, the dominator tree is constructed for
 906   //      the clone loop to dominate the original.  Used in construction of
 907   //      pre-main-post loop sequence.
 908   //   When nonnull, the clone and original are side-by-side, both are
 909   //      dominated by the passed in side_by_side_idom node.  Used in
 910   //      construction of unswitched loops.








 911   void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
 912                    Node* side_by_side_idom = NULL);








 913 
 914   // If we got the effect of peeling, either by actually peeling or by
 915   // making a pre-loop which must execute at least once, we can remove
 916   // all loop-invariant dominated tests in the main body.
 917   void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
 918 
 919   // Generate code to do a loop peel for the given loop (and body).
 920   // old_new is a temp array.
 921   void do_peeling( IdealLoopTree *loop, Node_List &old_new );
 922 
 923   // Add pre and post loops around the given loop.  These loops are used
 924   // during RCE, unrolling and aligning loops.
 925   void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
 926 
 927   // Add post loop after the given loop.
 928   Node *insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
 929                          CountedLoopNode *main_head, CountedLoopEndNode *main_end,
 930                          Node *incr, Node *limit, CountedLoopNode *&post_head);
 931 
 932   // Add an RCE'd post loop which we will multi-version adapt for run time test path usage

1003     return !has_node(n) || n->is_unreachable(_igvn);
1004   }
1005 
1006   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1007   int do_range_check( IdealLoopTree *loop, Node_List &old_new );
1008 
1009   // Check to see if do_range_check(...) cleaned the main loop of range-checks
1010   void has_range_checks(IdealLoopTree *loop);
1011 
1012   // Process post loops which have range checks and try to build a multi-version
1013   // guard to safely determine if we can execute the post loop which was RCE'd.
1014   bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1015 
1016   // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1017   void poison_rce_post_loop(IdealLoopTree *rce_loop);
1018 
1019   // Create a slow version of the loop by cloning the loop
1020   // and inserting an if to select fast-slow versions.
1021   ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
1022                                         Node_List &old_new,
1023                                         int opcode);

1024 
1025   // Clone a loop and return the clone head (clone_loop_head).
1026   // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1027   // This routine was created for usage in CountedLoopReserveKit.
1028   //
1029   //    int(1) -> If -> IfTrue -> original_loop_head
1030   //              |
1031   //              V
1032   //           IfFalse -> clone_loop_head (returned by function pointer)
1033   //
1034   LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1035   // Clone loop with an invariant test (that does not exit) and
1036   // insert a clone of the test that selects which version to
1037   // execute.
1038   void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1039 
1040   // Find candidate "if" for unswitching
1041   IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
1042 
1043   // Range Check Elimination uses this function!

  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_OPTO_LOOPNODE_HPP
  26 #define SHARE_VM_OPTO_LOOPNODE_HPP
  27 
  28 #include "opto/cfgnode.hpp"
  29 #include "opto/multnode.hpp"
  30 #include "opto/phaseX.hpp"
  31 #include "opto/subnode.hpp"
  32 #include "opto/type.hpp"
  33 
  34 class CmpNode;
  35 class CountedLoopEndNode;
  36 class CountedLoopNode;
  37 class IdealLoopTree;
  38 class LoopNode;
  39 class Node;
  40 class Opaque5Node;
  41 class PhaseIdealLoop;
  42 class CountedLoopReserveKit;
  43 class VectorSet;
  44 class Invariance;
  45 struct small_cache;
  46 
  47 //
  48 //                  I D E A L I Z E D   L O O P S
  49 //
  50 // Idealized loops are the set of loops I perform more interesting
  51 // transformations on, beyond simple hoisting.
  52 
  53 //------------------------------LoopNode---------------------------------------
  54 // Simple loop header.  Fall in path on left, loop-back path on right.
  55 class LoopNode : public RegionNode {
  56   // Size is bigger to hold the flags.  However, the flags do not change
  57   // the semantics so it does not appear in the hash & cmp functions.
  58   virtual uint size_of() const { return sizeof(*this); }
  59 protected:
  60   short _loop_flags;
  61   // Names for flag bitfields
  62   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
  63          MainHasNoPreLoop=4,
  64          HasExactTripCount=8,
  65          InnerLoop=16,
  66          PartialPeelLoop=32,
  67          PartialPeelFailed=64,
  68          HasReductions=128,
  69          WasSlpAnalyzed=256,
  70          PassedSlpAnalysis=512,
  71          DoUnrollOnly=1024,
  72          VectorizedLoop=2048,
  73          HasAtomicPostLoop=4096,
  74          HasRangeChecks=8192,
  75          IsMultiversioned=16384,
  76          StripMined=32768};
  77   char _unswitch_count;
  78   enum { _unswitch_max=3 };
  79   char _postloop_flags;
  80   enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
  81 
  82 public:
  83   // Names for edge indices
  84   enum { Self=0, EntryControl, LoopBackControl };
  85 
  86   uint is_inner_loop() const { return _loop_flags & InnerLoop; }
  87   void set_inner_loop() { _loop_flags |= InnerLoop; }
  88 
  89   uint range_checks_present() const { return _loop_flags & HasRangeChecks; }
  90   uint is_multiversioned() const { return _loop_flags & IsMultiversioned; }
  91   uint is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
  92   uint is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
  93   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
  94   uint partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
  95   uint is_strip_mined() const { return _loop_flags & StripMined; }
  96 
  97   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
  98   void mark_has_reductions() { _loop_flags |= HasReductions; }
  99   void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
 100   void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
 101   void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
 102   void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
 103   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
 104   void mark_has_range_checks() { _loop_flags |=  HasRangeChecks; }
 105   void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
 106   void mark_strip_mined() { _loop_flags |= StripMined; }
 107   void clear_strip_mined() { _loop_flags &= ~StripMined; }
 108 
 109   int unswitch_max() { return _unswitch_max; }
 110   int unswitch_count() { return _unswitch_count; }
 111 
 112   int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
 113   void set_has_been_range_checked() { _postloop_flags |= LoopRCEChecked; }
 114   int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
 115   void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
 116 
 117   void set_unswitch_count(int val) {
 118     assert (val <= unswitch_max(), "too many unswitches");
 119     _unswitch_count = val;
 120   }
 121 
 122   LoopNode(Node *entry, Node *backedge) : RegionNode(3), _loop_flags(0), _unswitch_count(0), _postloop_flags(0) {
 123     init_class_id(Class_Loop);
 124     init_req(EntryControl, entry);
 125     init_req(LoopBackControl, backedge);
 126   }
 127 
 128   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
 129   virtual int Opcode() const;
 130   bool can_be_counted_loop(PhaseTransform* phase) const {
 131     return req() == 3 && in(0) != NULL &&
 132       in(1) != NULL && phase->type(in(1)) != Type::TOP &&
 133       in(2) != NULL && phase->type(in(2)) != Type::TOP;
 134   }
 135   bool is_valid_counted_loop() const;
 136 #ifndef PRODUCT
 137   virtual void dump_spec(outputStream *st) const;
 138 #endif
 139 
 140   void verify_strip_mined(int expect_opaq) const;
 141   virtual LoopNode* skip_strip_mined(int expect_opaq = 1) { return this; }
 142   virtual IfTrueNode* outer_loop_tail() const;
 143   virtual IfNode* outer_loop_end() const;
 144   virtual IfFalseNode* outer_loop_exit() const;
 145   virtual SafePointNode* outer_safepoint() const;
 146   virtual BoolNode* outer_bol() const;
 147   virtual CmpINode* outer_cmp() const;
 148   virtual Opaque5Node* outer_opaq() const;
 149 };
 150 
 151 //------------------------------Counted Loops----------------------------------
 152 // Counted loops are all trip-counted loops, with exactly 1 trip-counter exit
 153 // path (and maybe some other exit paths).  The trip-counter exit is always
 154 // last in the loop.  The trip-counter have to stride by a constant;
 155 // the exit value is also loop invariant.
 156 
 157 // CountedLoopNodes and CountedLoopEndNodes come in matched pairs.  The
 158 // CountedLoopNode has the incoming loop control and the loop-back-control
 159 // which is always the IfTrue before the matching CountedLoopEndNode.  The
 160 // CountedLoopEndNode has an incoming control (possibly not the
 161 // CountedLoopNode if there is control flow in the loop), the post-increment
 162 // trip-counter value, and the limit.  The trip-counter value is always of
 163 // the form (Op old-trip-counter stride).  The old-trip-counter is produced
 164 // by a Phi connected to the CountedLoopNode.  The stride is constant.
 165 // The Op is any commutable opcode, including Add, Mul, Xor.  The
 166 // CountedLoopEndNode also takes in the loop-invariant limit value.
 167 
 168 // From a CountedLoopNode I can reach the matching CountedLoopEndNode via the

 230   // Match increment with optional truncation
 231   static Node* match_incr_with_optional_truncation(Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type);
 232 
 233   // A 'main' loop has a pre-loop and a post-loop.  The 'main' loop
 234   // can run short a few iterations and may start a few iterations in.
 235   // It will be RCE'd and unrolled and aligned.
 236 
 237   // A following 'post' loop will run any remaining iterations.  Used
 238   // during Range Check Elimination, the 'post' loop will do any final
 239   // iterations with full checks.  Also used by Loop Unrolling, where
 240   // the 'post' loop will do any epilog iterations needed.  Basically,
 241   // a 'post' loop can not profitably be further unrolled or RCE'd.
 242 
 243   // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
 244   // it may do under-flow checks for RCE and may do alignment iterations
 245   // so the following main loop 'knows' that it is striding down cache
 246   // lines.
 247 
 248   // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
 249   // Aligned, may be missing it's pre-loop.
 250   uint is_normal_loop   () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
 251   uint is_pre_loop      () const { return (_loop_flags&PreMainPostFlagsMask) == Pre;    }
 252   uint is_main_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Main;   }
 253   uint is_post_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Post;   }
 254   uint is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
 255   uint was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
 256   uint has_passed_slp   () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
 257   uint do_unroll_only      () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
 258   uint is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
 259   uint has_atomic_post_loop  () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
 260   void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
 261 
 262   int main_idx() const { return _main_idx; }
 263 
 264 
 265   void set_pre_loop  (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
 266   void set_main_loop (                     ) { assert(is_normal_loop(),""); _loop_flags |= Main;                         }
 267   void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
 268   void set_normal_loop(                    ) { _loop_flags &= ~PreMainPostFlagsMask; }
 269 
 270   void set_trip_count(uint tc) { _trip_count = tc; }
 271   uint trip_count()            { return _trip_count; }
 272 
 273   bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
 274   void set_exact_trip_count(uint tc) {
 275     _trip_count = tc;
 276     _loop_flags |= HasExactTripCount;
 277   }
 278   void set_nonexact_trip_count() {
 279     _loop_flags &= ~HasExactTripCount;
 280   }
 281   void set_notpassed_slp() {
 282     _loop_flags &= ~PassedSlpAnalysis;
 283   }
 284 
 285   void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
 286   float profile_trip_cnt()             { return _profile_trip_cnt; }
 287 
 288   void double_unrolled_count() { _unrolled_count_log2++; }
 289   int  unrolled_count()        { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); }
 290 
 291   void set_node_count_before_unroll(int ct)  { _node_count_before_unroll = ct; }
 292   int  node_count_before_unroll()            { return _node_count_before_unroll; }
 293   void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; }
 294   int  slp_max_unroll() const                { return _slp_maximum_unroll_factor; }
 295 
 296   virtual LoopNode* skip_strip_mined(int expect_opaq = 1);
 297   LoopNode* outer_loop() const;
 298   virtual IfTrueNode* outer_loop_tail() const;
 299   virtual IfNode* outer_loop_end() const;
 300   virtual IfFalseNode* outer_loop_exit() const;
 301   virtual SafePointNode* outer_safepoint() const;
 302   virtual BoolNode* outer_bol() const;
 303   virtual CmpINode* outer_cmp() const;
 304   virtual Opaque5Node* outer_opaq() const;
 305 
 306 #ifndef PRODUCT
 307   virtual void dump_spec(outputStream *st) const;
 308 #endif
 309 };
 310 
 311 //------------------------------CountedLoopEndNode-----------------------------
 312 // CountedLoopEndNodes end simple trip counted loops.  They act much like
 313 // IfNodes.
 314 class CountedLoopEndNode : public IfNode {
 315 public:
 316   enum { TestControl, TestValue };
 317 
 318   CountedLoopEndNode( Node *control, Node *test, float prob, float cnt )
 319     : IfNode( control, test, prob, cnt) {
 320     init_class_id(Class_CountedLoopEnd);
 321   }
 322   virtual int Opcode() const;
 323 
 324   Node *cmp_node() const            { return (in(TestValue)->req() >=2) ? in(TestValue)->in(1) : NULL; }
 325   Node *incr() const                { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }

 788     _nodes.map(old_node->_idx, (Node*)((intptr_t)new_node + 1));
 789   }
 790   void lazy_replace(Node *old_node, Node *new_node) {
 791     _igvn.replace_node(old_node, new_node);
 792     lazy_update(old_node, new_node);
 793   }
 794 
 795 private:
 796 
 797   // Place 'n' in some loop nest, where 'n' is a CFG node
 798   void build_loop_tree();
 799   int build_loop_tree_impl( Node *n, int pre_order );
 800   // Insert loop into the existing loop tree.  'innermost' is a leaf of the
 801   // loop tree, not the root.
 802   IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost );
 803 
 804   // Place Data nodes in some loop nest
 805   void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
 806   void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
 807   void build_loop_late_post ( Node* n );
 808   void verify_strip_mined_scheduling(Node *n, Node* least);
 809 
 810   // Array of immediate dominance info for each CFG node indexed by node idx
 811 private:
 812   uint _idom_size;
 813   Node **_idom;                 // Array of immediate dominators
 814   uint *_dom_depth;           // Used for fast LCA test
 815   GrowableArray<uint>* _dom_stk; // For recomputation of dom depth
 816 
 817   Node* idom_no_update(Node* d) const {
 818     assert(d->_idx < _idom_size, "oob");
 819     Node* n = _idom[d->_idx];
 820     assert(n != NULL,"Bad immediate dominator info.");
 821     while (n->in(0) == NULL) {  // Skip dead CFG nodes
 822       //n = n->in(1);
 823       n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1);
 824       assert(n != NULL,"Bad immediate dominator info.");
 825     }
 826     return n;
 827   }
 828   Node *idom(Node* d) const {

 886     _dom_lca_tags(arena()), // Thread::resource_area
 887     _verify_me(verify_me),
 888     _verify_only(false) {
 889     build_and_optimize(false, false);
 890   }
 891 
 892   // Build and verify the loop tree without modifying the graph.  This
 893   // is useful to verify that all inputs properly dominate their uses.
 894   static void verify(PhaseIterGVN& igvn) {
 895 #ifdef ASSERT
 896     PhaseIdealLoop v(igvn);
 897 #endif
 898   }
 899 
 900   // True if the method has at least 1 irreducible loop
 901   bool _has_irreducible_loops;
 902 
 903   // Per-Node transform
 904   virtual Node *transform( Node *a_node ) { return 0; }
 905 
 906   bool is_counted_loop(Node* x, IdealLoopTree*& loop);
 907   IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
 908                                                IdealLoopTree* loop, float cl_prob, float le_fcnt,
 909                                                Node*& entry_control, Node*& iffalse);
 910 
 911   Node* exact_limit( IdealLoopTree *loop );
 912 
 913   // Return a post-walked LoopNode
 914   IdealLoopTree *get_loop( Node *n ) const {
 915     // Dead nodes have no loop, so return the top level loop instead
 916     if (!has_node(n))  return _ltree_root;
 917     assert(!has_ctrl(n), "");
 918     return (IdealLoopTree*)_nodes[n->_idx];
 919   }
 920 
 921   // Is 'n' a (nested) member of 'loop'?
 922   int is_member( const IdealLoopTree *loop, Node *n ) const {
 923     return loop->is_member(get_loop(n)); }
 924 
 925   // This is the basic building block of the loop optimizations.  It clones an
 926   // entire loop body.  It makes an old_new loop body mapping; with this
 927   // mapping you can find the new-loop equivalent to an old-loop node.  All
 928   // new-loop nodes are exactly equal to their old-loop counterparts, all
 929   // edges are the same.  All exits from the old-loop now have a RegionNode
 930   // that merges the equivalent new-loop path.  This is true even for the
 931   // normal "loop-exit" condition.  All uses of loop-invariant old-loop values
 932   // now come from (one or more) Phis that merge their new-loop equivalents.
 933   // Parameter side_by_side_idom:
 934   //   When side_by_size_idom is NULL, the dominator tree is constructed for
 935   //      the clone loop to dominate the original.  Used in construction of
 936   //      pre-main-post loop sequence.
 937   //   When nonnull, the clone and original are side-by-side, both are
 938   //      dominated by the passed in side_by_side_idom node.  Used in
 939   //      construction of unswitched loops.
 940   enum CloneLoopMode {
 941     IgnoreStripMined = 0,        // Only clone inner strip mined loop
 942     CloneIncludesStripMined = 1, // clone both inner and outer strip mined loops
 943     ControlAroundStripMined = 2  // Only clone inner strip mined loop,
 944                                  // result control flow branches
 945                                  // either to inner clone or outer
 946                                  // strip mined loop.
 947   };
 948   void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
 949                   CloneLoopMode mode, Node* side_by_side_idom = NULL);
 950   void clone_loop_handle_data_uses(Node* old, Node_List &old_new,
 951                                    IdealLoopTree* loop, IdealLoopTree* companion_loop,
 952                                    Node_List*& split_if_set, Node_List*& split_bool_set,
 953                                    Node_List*& split_cex_set, Node_List& worklist,
 954                                    uint new_counter, CloneLoopMode mode);
 955   void clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
 956                         IdealLoopTree* outer_loop, int dd, Node_List &old_new,
 957                         Node_List& extra_data_nodes);
 958 
 959   // If we got the effect of peeling, either by actually peeling or by
 960   // making a pre-loop which must execute at least once, we can remove
 961   // all loop-invariant dominated tests in the main body.
 962   void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
 963 
 964   // Generate code to do a loop peel for the given loop (and body).
 965   // old_new is a temp array.
 966   void do_peeling( IdealLoopTree *loop, Node_List &old_new );
 967 
 968   // Add pre and post loops around the given loop.  These loops are used
 969   // during RCE, unrolling and aligning loops.
 970   void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
 971 
 972   // Add post loop after the given loop.
 973   Node *insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
 974                          CountedLoopNode *main_head, CountedLoopEndNode *main_end,
 975                          Node *incr, Node *limit, CountedLoopNode *&post_head);
 976 
 977   // Add an RCE'd post loop which we will multi-version adapt for run time test path usage

1048     return !has_node(n) || n->is_unreachable(_igvn);
1049   }
1050 
1051   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1052   int do_range_check( IdealLoopTree *loop, Node_List &old_new );
1053 
1054   // Check to see if do_range_check(...) cleaned the main loop of range-checks
1055   void has_range_checks(IdealLoopTree *loop);
1056 
1057   // Process post loops which have range checks and try to build a multi-version
1058   // guard to safely determine if we can execute the post loop which was RCE'd.
1059   bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1060 
1061   // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1062   void poison_rce_post_loop(IdealLoopTree *rce_loop);
1063 
1064   // Create a slow version of the loop by cloning the loop
1065   // and inserting an if to select fast-slow versions.
1066   ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
1067                                         Node_List &old_new,
1068                                         int opcode,
1069                                         CloneLoopMode mode);
1070 
1071   // Clone a loop and return the clone head (clone_loop_head).
1072   // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1073   // This routine was created for usage in CountedLoopReserveKit.
1074   //
1075   //    int(1) -> If -> IfTrue -> original_loop_head
1076   //              |
1077   //              V
1078   //           IfFalse -> clone_loop_head (returned by function pointer)
1079   //
1080   LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1081   // Clone loop with an invariant test (that does not exit) and
1082   // insert a clone of the test that selects which version to
1083   // execute.
1084   void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1085 
1086   // Find candidate "if" for unswitching
1087   IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
1088 
1089   // Range Check Elimination uses this function!

< prev index next >