20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef SHARE_VM_OPTO_LOOPNODE_HPP
26 #define SHARE_VM_OPTO_LOOPNODE_HPP
27
28 #include "opto/cfgnode.hpp"
29 #include "opto/multnode.hpp"
30 #include "opto/phaseX.hpp"
31 #include "opto/subnode.hpp"
32 #include "opto/type.hpp"
33
34 class CmpNode;
35 class CountedLoopEndNode;
36 class CountedLoopNode;
37 class IdealLoopTree;
38 class LoopNode;
39 class Node;
40 class PhaseIdealLoop;
41 class CountedLoopReserveKit;
42 class VectorSet;
43 class Invariance;
44 struct small_cache;
45
46 //
47 // I D E A L I Z E D L O O P S
48 //
49 // Idealized loops are the set of loops I perform more interesting
50 // transformations on, beyond simple hoisting.
51
52 //------------------------------LoopNode---------------------------------------
53 // Simple loop header. Fall in path on left, loop-back path on right.
54 class LoopNode : public RegionNode {
55 // Size is bigger to hold the flags. However, the flags do not change
56 // the semantics so it does not appear in the hash & cmp functions.
57 virtual uint size_of() const { return sizeof(*this); }
58 protected:
59 short _loop_flags;
60 // Names for flag bitfields
61 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
62 MainHasNoPreLoop=4,
63 HasExactTripCount=8,
64 InnerLoop=16,
65 PartialPeelLoop=32,
66 PartialPeelFailed=64,
67 HasReductions=128,
68 WasSlpAnalyzed=256,
69 PassedSlpAnalysis=512,
70 DoUnrollOnly=1024,
71 VectorizedLoop=2048,
72 HasAtomicPostLoop=4096,
73 HasRangeChecks=8192,
74 IsMultiversioned=16384};
75 char _unswitch_count;
76 enum { _unswitch_max=3 };
77 char _postloop_flags;
78 enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
79
80 public:
81 // Names for edge indices
82 enum { Self=0, EntryControl, LoopBackControl };
83
84 int is_inner_loop() const { return _loop_flags & InnerLoop; }
85 void set_inner_loop() { _loop_flags |= InnerLoop; }
86
87 int range_checks_present() const { return _loop_flags & HasRangeChecks; }
88 int is_multiversioned() const { return _loop_flags & IsMultiversioned; }
89 int is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
90 int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
91 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
92 int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
93
94 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
95 void mark_has_reductions() { _loop_flags |= HasReductions; }
96 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
97 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
98 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
99 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
100 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
101 void mark_has_range_checks() { _loop_flags |= HasRangeChecks; }
102 void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
103
104 int unswitch_max() { return _unswitch_max; }
105 int unswitch_count() { return _unswitch_count; }
106
107 int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
108 void set_has_been_range_checked() { _postloop_flags |= LoopRCEChecked; }
109 int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
110 void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
111
112 void set_unswitch_count(int val) {
113 assert (val <= unswitch_max(), "too many unswitches");
114 _unswitch_count = val;
115 }
116
117 LoopNode(Node *entry, Node *backedge) : RegionNode(3), _loop_flags(0), _unswitch_count(0), _postloop_flags(0) {
118 init_class_id(Class_Loop);
119 init_req(EntryControl, entry);
120 init_req(LoopBackControl, backedge);
121 }
122
123 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
124 virtual int Opcode() const;
125 bool can_be_counted_loop(PhaseTransform* phase) const {
126 return req() == 3 && in(0) != NULL &&
127 in(1) != NULL && phase->type(in(1)) != Type::TOP &&
128 in(2) != NULL && phase->type(in(2)) != Type::TOP;
129 }
130 bool is_valid_counted_loop() const;
131 #ifndef PRODUCT
132 virtual void dump_spec(outputStream *st) const;
133 #endif
134 };
135
136 //------------------------------Counted Loops----------------------------------
137 // Counted loops are all trip-counted loops, with exactly 1 trip-counter exit
138 // path (and maybe some other exit paths). The trip-counter exit is always
139 // last in the loop. The trip-counter have to stride by a constant;
140 // the exit value is also loop invariant.
141
142 // CountedLoopNodes and CountedLoopEndNodes come in matched pairs. The
143 // CountedLoopNode has the incoming loop control and the loop-back-control
144 // which is always the IfTrue before the matching CountedLoopEndNode. The
145 // CountedLoopEndNode has an incoming control (possibly not the
146 // CountedLoopNode if there is control flow in the loop), the post-increment
147 // trip-counter value, and the limit. The trip-counter value is always of
148 // the form (Op old-trip-counter stride). The old-trip-counter is produced
149 // by a Phi connected to the CountedLoopNode. The stride is constant.
150 // The Op is any commutable opcode, including Add, Mul, Xor. The
151 // CountedLoopEndNode also takes in the loop-invariant limit value.
152
153 // From a CountedLoopNode I can reach the matching CountedLoopEndNode via the
215 // Match increment with optional truncation
216 static Node* match_incr_with_optional_truncation(Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type);
217
218 // A 'main' loop has a pre-loop and a post-loop. The 'main' loop
219 // can run short a few iterations and may start a few iterations in.
220 // It will be RCE'd and unrolled and aligned.
221
222 // A following 'post' loop will run any remaining iterations. Used
223 // during Range Check Elimination, the 'post' loop will do any final
224 // iterations with full checks. Also used by Loop Unrolling, where
225 // the 'post' loop will do any epilog iterations needed. Basically,
226 // a 'post' loop can not profitably be further unrolled or RCE'd.
227
228 // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
229 // it may do under-flow checks for RCE and may do alignment iterations
230 // so the following main loop 'knows' that it is striding down cache
231 // lines.
232
233 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
234 // Aligned, may be missing it's pre-loop.
235 int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
236 int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
237 int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
238 int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
239 int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
240 int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
241 int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
242 int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
243 int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
244 int has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
245 void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
246
247 int main_idx() const { return _main_idx; }
248
249
250 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
251 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
252 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
253 void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; }
254
255 void set_trip_count(uint tc) { _trip_count = tc; }
256 uint trip_count() { return _trip_count; }
257
258 bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
259 void set_exact_trip_count(uint tc) {
260 _trip_count = tc;
261 _loop_flags |= HasExactTripCount;
262 }
263 void set_nonexact_trip_count() {
264 _loop_flags &= ~HasExactTripCount;
265 }
266 void set_notpassed_slp() {
267 _loop_flags &= ~PassedSlpAnalysis;
268 }
269
270 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
271 float profile_trip_cnt() { return _profile_trip_cnt; }
272
273 void double_unrolled_count() { _unrolled_count_log2++; }
274 int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); }
275
276 void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; }
277 int node_count_before_unroll() { return _node_count_before_unroll; }
278 void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; }
279 int slp_max_unroll() const { return _slp_maximum_unroll_factor; }
280
281 #ifndef PRODUCT
282 virtual void dump_spec(outputStream *st) const;
283 #endif
284 };
285
286 //------------------------------CountedLoopEndNode-----------------------------
287 // CountedLoopEndNodes end simple trip counted loops. They act much like
288 // IfNodes.
289 class CountedLoopEndNode : public IfNode {
290 public:
291 enum { TestControl, TestValue };
292
293 CountedLoopEndNode( Node *control, Node *test, float prob, float cnt )
294 : IfNode( control, test, prob, cnt) {
295 init_class_id(Class_CountedLoopEnd);
296 }
297 virtual int Opcode() const;
298
299 Node *cmp_node() const { return (in(TestValue)->req() >=2) ? in(TestValue)->in(1) : NULL; }
300 Node *incr() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
763 _nodes.map(old_node->_idx, (Node*)((intptr_t)new_node + 1));
764 }
765 void lazy_replace(Node *old_node, Node *new_node) {
766 _igvn.replace_node(old_node, new_node);
767 lazy_update(old_node, new_node);
768 }
769
770 private:
771
772 // Place 'n' in some loop nest, where 'n' is a CFG node
773 void build_loop_tree();
774 int build_loop_tree_impl( Node *n, int pre_order );
775 // Insert loop into the existing loop tree. 'innermost' is a leaf of the
776 // loop tree, not the root.
777 IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost );
778
779 // Place Data nodes in some loop nest
780 void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
781 void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
782 void build_loop_late_post ( Node* n );
783
784 // Array of immediate dominance info for each CFG node indexed by node idx
785 private:
786 uint _idom_size;
787 Node **_idom; // Array of immediate dominators
788 uint *_dom_depth; // Used for fast LCA test
789 GrowableArray<uint>* _dom_stk; // For recomputation of dom depth
790
791 Node* idom_no_update(Node* d) const {
792 assert(d->_idx < _idom_size, "oob");
793 Node* n = _idom[d->_idx];
794 assert(n != NULL,"Bad immediate dominator info.");
795 while (n->in(0) == NULL) { // Skip dead CFG nodes
796 //n = n->in(1);
797 n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1);
798 assert(n != NULL,"Bad immediate dominator info.");
799 }
800 return n;
801 }
802 Node *idom(Node* d) const {
860 _dom_lca_tags(arena()), // Thread::resource_area
861 _verify_me(verify_me),
862 _verify_only(false) {
863 build_and_optimize(false, false);
864 }
865
866 // Build and verify the loop tree without modifying the graph. This
867 // is useful to verify that all inputs properly dominate their uses.
868 static void verify(PhaseIterGVN& igvn) {
869 #ifdef ASSERT
870 PhaseIdealLoop v(igvn);
871 #endif
872 }
873
874 // True if the method has at least 1 irreducible loop
875 bool _has_irreducible_loops;
876
877 // Per-Node transform
878 virtual Node *transform( Node *a_node ) { return 0; }
879
880 bool is_counted_loop( Node *x, IdealLoopTree *loop );
881
882 Node* exact_limit( IdealLoopTree *loop );
883
884 // Return a post-walked LoopNode
885 IdealLoopTree *get_loop( Node *n ) const {
886 // Dead nodes have no loop, so return the top level loop instead
887 if (!has_node(n)) return _ltree_root;
888 assert(!has_ctrl(n), "");
889 return (IdealLoopTree*)_nodes[n->_idx];
890 }
891
892 // Is 'n' a (nested) member of 'loop'?
893 int is_member( const IdealLoopTree *loop, Node *n ) const {
894 return loop->is_member(get_loop(n)); }
895
896 // This is the basic building block of the loop optimizations. It clones an
897 // entire loop body. It makes an old_new loop body mapping; with this
898 // mapping you can find the new-loop equivalent to an old-loop node. All
899 // new-loop nodes are exactly equal to their old-loop counterparts, all
900 // edges are the same. All exits from the old-loop now have a RegionNode
901 // that merges the equivalent new-loop path. This is true even for the
902 // normal "loop-exit" condition. All uses of loop-invariant old-loop values
903 // now come from (one or more) Phis that merge their new-loop equivalents.
904 // Parameter side_by_side_idom:
905 // When side_by_size_idom is NULL, the dominator tree is constructed for
906 // the clone loop to dominate the original. Used in construction of
907 // pre-main-post loop sequence.
908 // When nonnull, the clone and original are side-by-side, both are
909 // dominated by the passed in side_by_side_idom node. Used in
910 // construction of unswitched loops.
911 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
912 Node* side_by_side_idom = NULL);
913
914 // If we got the effect of peeling, either by actually peeling or by
915 // making a pre-loop which must execute at least once, we can remove
916 // all loop-invariant dominated tests in the main body.
917 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
918
919 // Generate code to do a loop peel for the given loop (and body).
920 // old_new is a temp array.
921 void do_peeling( IdealLoopTree *loop, Node_List &old_new );
922
923 // Add pre and post loops around the given loop. These loops are used
924 // during RCE, unrolling and aligning loops.
925 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
926
927 // Add post loop after the given loop.
928 Node *insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
929 CountedLoopNode *main_head, CountedLoopEndNode *main_end,
930 Node *incr, Node *limit, CountedLoopNode *&post_head);
931
932 // Add an RCE'd post loop which we will multi-version adapt for run time test path usage
1003 return !has_node(n) || n->is_unreachable(_igvn);
1004 }
1005
1006 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1007 int do_range_check( IdealLoopTree *loop, Node_List &old_new );
1008
1009 // Check to see if do_range_check(...) cleaned the main loop of range-checks
1010 void has_range_checks(IdealLoopTree *loop);
1011
1012 // Process post loops which have range checks and try to build a multi-version
1013 // guard to safely determine if we can execute the post loop which was RCE'd.
1014 bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1015
1016 // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1017 void poison_rce_post_loop(IdealLoopTree *rce_loop);
1018
1019 // Create a slow version of the loop by cloning the loop
1020 // and inserting an if to select fast-slow versions.
1021 ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
1022 Node_List &old_new,
1023 int opcode);
1024
1025 // Clone a loop and return the clone head (clone_loop_head).
1026 // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1027 // This routine was created for usage in CountedLoopReserveKit.
1028 //
1029 // int(1) -> If -> IfTrue -> original_loop_head
1030 // |
1031 // V
1032 // IfFalse -> clone_loop_head (returned by function pointer)
1033 //
1034 LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1035 // Clone loop with an invariant test (that does not exit) and
1036 // insert a clone of the test that selects which version to
1037 // execute.
1038 void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1039
1040 // Find candidate "if" for unswitching
1041 IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
1042
1043 // Range Check Elimination uses this function!
|
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef SHARE_VM_OPTO_LOOPNODE_HPP
26 #define SHARE_VM_OPTO_LOOPNODE_HPP
27
28 #include "opto/cfgnode.hpp"
29 #include "opto/multnode.hpp"
30 #include "opto/phaseX.hpp"
31 #include "opto/subnode.hpp"
32 #include "opto/type.hpp"
33
34 class CmpNode;
35 class CountedLoopEndNode;
36 class CountedLoopNode;
37 class IdealLoopTree;
38 class LoopNode;
39 class Node;
40 class Opaque5Node;
41 class PhaseIdealLoop;
42 class CountedLoopReserveKit;
43 class VectorSet;
44 class Invariance;
45 struct small_cache;
46
47 //
48 // I D E A L I Z E D L O O P S
49 //
50 // Idealized loops are the set of loops I perform more interesting
51 // transformations on, beyond simple hoisting.
52
53 //------------------------------LoopNode---------------------------------------
54 // Simple loop header. Fall in path on left, loop-back path on right.
55 class LoopNode : public RegionNode {
56 // Size is bigger to hold the flags. However, the flags do not change
57 // the semantics so it does not appear in the hash & cmp functions.
58 virtual uint size_of() const { return sizeof(*this); }
59 protected:
60 short _loop_flags;
61 // Names for flag bitfields
62 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
63 MainHasNoPreLoop=4,
64 HasExactTripCount=8,
65 InnerLoop=16,
66 PartialPeelLoop=32,
67 PartialPeelFailed=64,
68 HasReductions=128,
69 WasSlpAnalyzed=256,
70 PassedSlpAnalysis=512,
71 DoUnrollOnly=1024,
72 VectorizedLoop=2048,
73 HasAtomicPostLoop=4096,
74 HasRangeChecks=8192,
75 IsMultiversioned=16384,
76 StripMined=32768};
77 char _unswitch_count;
78 enum { _unswitch_max=3 };
79 char _postloop_flags;
80 enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
81
82 public:
83 // Names for edge indices
84 enum { Self=0, EntryControl, LoopBackControl };
85
86 uint is_inner_loop() const { return _loop_flags & InnerLoop; }
87 void set_inner_loop() { _loop_flags |= InnerLoop; }
88
89 uint range_checks_present() const { return _loop_flags & HasRangeChecks; }
90 uint is_multiversioned() const { return _loop_flags & IsMultiversioned; }
91 uint is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
92 uint is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
93 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
94 uint partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
95 uint is_strip_mined() const { return _loop_flags & StripMined; }
96
97 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
98 void mark_has_reductions() { _loop_flags |= HasReductions; }
99 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
100 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
101 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
102 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
103 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
104 void mark_has_range_checks() { _loop_flags |= HasRangeChecks; }
105 void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
106 void mark_strip_mined() { _loop_flags |= StripMined; }
107 void clear_strip_mined() { _loop_flags &= ~StripMined; }
108
109 int unswitch_max() { return _unswitch_max; }
110 int unswitch_count() { return _unswitch_count; }
111
112 int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
113 void set_has_been_range_checked() { _postloop_flags |= LoopRCEChecked; }
114 int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
115 void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
116
117 void set_unswitch_count(int val) {
118 assert (val <= unswitch_max(), "too many unswitches");
119 _unswitch_count = val;
120 }
121
122 LoopNode(Node *entry, Node *backedge) : RegionNode(3), _loop_flags(0), _unswitch_count(0), _postloop_flags(0) {
123 init_class_id(Class_Loop);
124 init_req(EntryControl, entry);
125 init_req(LoopBackControl, backedge);
126 }
127
128 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
129 virtual int Opcode() const;
130 bool can_be_counted_loop(PhaseTransform* phase) const {
131 return req() == 3 && in(0) != NULL &&
132 in(1) != NULL && phase->type(in(1)) != Type::TOP &&
133 in(2) != NULL && phase->type(in(2)) != Type::TOP;
134 }
135 bool is_valid_counted_loop() const;
136 #ifndef PRODUCT
137 virtual void dump_spec(outputStream *st) const;
138 #endif
139
140 void verify_strip_mined(int expect_opaq) const;
141 virtual LoopNode* skip_strip_mined(int expect_opaq = 1) { return this; }
142 virtual IfTrueNode* outer_loop_tail() const;
143 virtual IfNode* outer_loop_end() const;
144 virtual IfFalseNode* outer_loop_exit() const;
145 virtual SafePointNode* outer_safepoint() const;
146 virtual BoolNode* outer_bol() const;
147 virtual CmpINode* outer_cmp() const;
148 virtual Opaque5Node* outer_opaq() const;
149 };
150
151 //------------------------------Counted Loops----------------------------------
152 // Counted loops are all trip-counted loops, with exactly 1 trip-counter exit
153 // path (and maybe some other exit paths). The trip-counter exit is always
154 // last in the loop. The trip-counter have to stride by a constant;
155 // the exit value is also loop invariant.
156
157 // CountedLoopNodes and CountedLoopEndNodes come in matched pairs. The
158 // CountedLoopNode has the incoming loop control and the loop-back-control
159 // which is always the IfTrue before the matching CountedLoopEndNode. The
160 // CountedLoopEndNode has an incoming control (possibly not the
161 // CountedLoopNode if there is control flow in the loop), the post-increment
162 // trip-counter value, and the limit. The trip-counter value is always of
163 // the form (Op old-trip-counter stride). The old-trip-counter is produced
164 // by a Phi connected to the CountedLoopNode. The stride is constant.
165 // The Op is any commutable opcode, including Add, Mul, Xor. The
166 // CountedLoopEndNode also takes in the loop-invariant limit value.
167
168 // From a CountedLoopNode I can reach the matching CountedLoopEndNode via the
230 // Match increment with optional truncation
231 static Node* match_incr_with_optional_truncation(Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type);
232
233 // A 'main' loop has a pre-loop and a post-loop. The 'main' loop
234 // can run short a few iterations and may start a few iterations in.
235 // It will be RCE'd and unrolled and aligned.
236
237 // A following 'post' loop will run any remaining iterations. Used
238 // during Range Check Elimination, the 'post' loop will do any final
239 // iterations with full checks. Also used by Loop Unrolling, where
240 // the 'post' loop will do any epilog iterations needed. Basically,
241 // a 'post' loop can not profitably be further unrolled or RCE'd.
242
243 // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
244 // it may do under-flow checks for RCE and may do alignment iterations
245 // so the following main loop 'knows' that it is striding down cache
246 // lines.
247
248 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
249 // Aligned, may be missing it's pre-loop.
250 uint is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
251 uint is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
252 uint is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
253 uint is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
254 uint is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
255 uint was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
256 uint has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
257 uint do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
258 uint is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
259 uint has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
260 void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
261
262 int main_idx() const { return _main_idx; }
263
264
265 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
266 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
267 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
268 void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; }
269
270 void set_trip_count(uint tc) { _trip_count = tc; }
271 uint trip_count() { return _trip_count; }
272
273 bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
274 void set_exact_trip_count(uint tc) {
275 _trip_count = tc;
276 _loop_flags |= HasExactTripCount;
277 }
278 void set_nonexact_trip_count() {
279 _loop_flags &= ~HasExactTripCount;
280 }
281 void set_notpassed_slp() {
282 _loop_flags &= ~PassedSlpAnalysis;
283 }
284
285 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
286 float profile_trip_cnt() { return _profile_trip_cnt; }
287
288 void double_unrolled_count() { _unrolled_count_log2++; }
289 int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); }
290
291 void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; }
292 int node_count_before_unroll() { return _node_count_before_unroll; }
293 void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; }
294 int slp_max_unroll() const { return _slp_maximum_unroll_factor; }
295
296 virtual LoopNode* skip_strip_mined(int expect_opaq = 1);
297 LoopNode* outer_loop() const;
298 virtual IfTrueNode* outer_loop_tail() const;
299 virtual IfNode* outer_loop_end() const;
300 virtual IfFalseNode* outer_loop_exit() const;
301 virtual SafePointNode* outer_safepoint() const;
302 virtual BoolNode* outer_bol() const;
303 virtual CmpINode* outer_cmp() const;
304 virtual Opaque5Node* outer_opaq() const;
305
306 #ifndef PRODUCT
307 virtual void dump_spec(outputStream *st) const;
308 #endif
309 };
310
311 //------------------------------CountedLoopEndNode-----------------------------
312 // CountedLoopEndNodes end simple trip counted loops. They act much like
313 // IfNodes.
314 class CountedLoopEndNode : public IfNode {
315 public:
316 enum { TestControl, TestValue };
317
318 CountedLoopEndNode( Node *control, Node *test, float prob, float cnt )
319 : IfNode( control, test, prob, cnt) {
320 init_class_id(Class_CountedLoopEnd);
321 }
322 virtual int Opcode() const;
323
324 Node *cmp_node() const { return (in(TestValue)->req() >=2) ? in(TestValue)->in(1) : NULL; }
325 Node *incr() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
788 _nodes.map(old_node->_idx, (Node*)((intptr_t)new_node + 1));
789 }
790 void lazy_replace(Node *old_node, Node *new_node) {
791 _igvn.replace_node(old_node, new_node);
792 lazy_update(old_node, new_node);
793 }
794
795 private:
796
797 // Place 'n' in some loop nest, where 'n' is a CFG node
798 void build_loop_tree();
799 int build_loop_tree_impl( Node *n, int pre_order );
800 // Insert loop into the existing loop tree. 'innermost' is a leaf of the
801 // loop tree, not the root.
802 IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost );
803
804 // Place Data nodes in some loop nest
805 void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
806 void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
807 void build_loop_late_post ( Node* n );
808 void verify_strip_mined_scheduling(Node *n, Node* least);
809
810 // Array of immediate dominance info for each CFG node indexed by node idx
811 private:
812 uint _idom_size;
813 Node **_idom; // Array of immediate dominators
814 uint *_dom_depth; // Used for fast LCA test
815 GrowableArray<uint>* _dom_stk; // For recomputation of dom depth
816
817 Node* idom_no_update(Node* d) const {
818 assert(d->_idx < _idom_size, "oob");
819 Node* n = _idom[d->_idx];
820 assert(n != NULL,"Bad immediate dominator info.");
821 while (n->in(0) == NULL) { // Skip dead CFG nodes
822 //n = n->in(1);
823 n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1);
824 assert(n != NULL,"Bad immediate dominator info.");
825 }
826 return n;
827 }
828 Node *idom(Node* d) const {
886 _dom_lca_tags(arena()), // Thread::resource_area
887 _verify_me(verify_me),
888 _verify_only(false) {
889 build_and_optimize(false, false);
890 }
891
892 // Build and verify the loop tree without modifying the graph. This
893 // is useful to verify that all inputs properly dominate their uses.
894 static void verify(PhaseIterGVN& igvn) {
895 #ifdef ASSERT
896 PhaseIdealLoop v(igvn);
897 #endif
898 }
899
900 // True if the method has at least 1 irreducible loop
901 bool _has_irreducible_loops;
902
903 // Per-Node transform
904 virtual Node *transform( Node *a_node ) { return 0; }
905
906 bool is_counted_loop(Node* x, IdealLoopTree*& loop);
907 IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
908 IdealLoopTree* loop, float cl_prob, float le_fcnt,
909 Node*& entry_control, Node*& iffalse);
910
911 Node* exact_limit( IdealLoopTree *loop );
912
913 // Return a post-walked LoopNode
914 IdealLoopTree *get_loop( Node *n ) const {
915 // Dead nodes have no loop, so return the top level loop instead
916 if (!has_node(n)) return _ltree_root;
917 assert(!has_ctrl(n), "");
918 return (IdealLoopTree*)_nodes[n->_idx];
919 }
920
921 // Is 'n' a (nested) member of 'loop'?
922 int is_member( const IdealLoopTree *loop, Node *n ) const {
923 return loop->is_member(get_loop(n)); }
924
925 // This is the basic building block of the loop optimizations. It clones an
926 // entire loop body. It makes an old_new loop body mapping; with this
927 // mapping you can find the new-loop equivalent to an old-loop node. All
928 // new-loop nodes are exactly equal to their old-loop counterparts, all
929 // edges are the same. All exits from the old-loop now have a RegionNode
930 // that merges the equivalent new-loop path. This is true even for the
931 // normal "loop-exit" condition. All uses of loop-invariant old-loop values
932 // now come from (one or more) Phis that merge their new-loop equivalents.
933 // Parameter side_by_side_idom:
934 // When side_by_size_idom is NULL, the dominator tree is constructed for
935 // the clone loop to dominate the original. Used in construction of
936 // pre-main-post loop sequence.
937 // When nonnull, the clone and original are side-by-side, both are
938 // dominated by the passed in side_by_side_idom node. Used in
939 // construction of unswitched loops.
940 enum CloneLoopMode {
941 IgnoreStripMined = 0, // Only clone inner strip mined loop
942 CloneIncludesStripMined = 1, // clone both inner and outer strip mined loops
943 ControlAroundStripMined = 2 // Only clone inner strip mined loop,
944 // result control flow branches
945 // either to inner clone or outer
946 // strip mined loop.
947 };
948 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
949 CloneLoopMode mode, Node* side_by_side_idom = NULL);
950 void clone_loop_handle_data_uses(Node* old, Node_List &old_new,
951 IdealLoopTree* loop, IdealLoopTree* companion_loop,
952 Node_List*& split_if_set, Node_List*& split_bool_set,
953 Node_List*& split_cex_set, Node_List& worklist,
954 uint new_counter, CloneLoopMode mode);
955 void clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
956 IdealLoopTree* outer_loop, int dd, Node_List &old_new,
957 Node_List& extra_data_nodes);
958
959 // If we got the effect of peeling, either by actually peeling or by
960 // making a pre-loop which must execute at least once, we can remove
961 // all loop-invariant dominated tests in the main body.
962 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
963
964 // Generate code to do a loop peel for the given loop (and body).
965 // old_new is a temp array.
966 void do_peeling( IdealLoopTree *loop, Node_List &old_new );
967
968 // Add pre and post loops around the given loop. These loops are used
969 // during RCE, unrolling and aligning loops.
970 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
971
972 // Add post loop after the given loop.
973 Node *insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
974 CountedLoopNode *main_head, CountedLoopEndNode *main_end,
975 Node *incr, Node *limit, CountedLoopNode *&post_head);
976
977 // Add an RCE'd post loop which we will multi-version adapt for run time test path usage
1048 return !has_node(n) || n->is_unreachable(_igvn);
1049 }
1050
1051 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1052 int do_range_check( IdealLoopTree *loop, Node_List &old_new );
1053
1054 // Check to see if do_range_check(...) cleaned the main loop of range-checks
1055 void has_range_checks(IdealLoopTree *loop);
1056
1057 // Process post loops which have range checks and try to build a multi-version
1058 // guard to safely determine if we can execute the post loop which was RCE'd.
1059 bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1060
1061 // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1062 void poison_rce_post_loop(IdealLoopTree *rce_loop);
1063
1064 // Create a slow version of the loop by cloning the loop
1065 // and inserting an if to select fast-slow versions.
1066 ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
1067 Node_List &old_new,
1068 int opcode,
1069 CloneLoopMode mode);
1070
1071 // Clone a loop and return the clone head (clone_loop_head).
1072 // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1073 // This routine was created for usage in CountedLoopReserveKit.
1074 //
1075 // int(1) -> If -> IfTrue -> original_loop_head
1076 // |
1077 // V
1078 // IfFalse -> clone_loop_head (returned by function pointer)
1079 //
1080 LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1081 // Clone loop with an invariant test (that does not exit) and
1082 // insert a clone of the test that selects which version to
1083 // execute.
1084 void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1085
1086 // Find candidate "if" for unswitching
1087 IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
1088
1089 // Range Check Elimination uses this function!
|