50 // transformations on, beyond simple hoisting.
51
52 //------------------------------LoopNode---------------------------------------
53 // Simple loop header. Fall in path on left, loop-back path on right.
54 class LoopNode : public RegionNode {
55 // Size is bigger to hold the flags. However, the flags do not change
56 // the semantics so it does not appear in the hash & cmp functions.
57 virtual uint size_of() const { return sizeof(*this); }
58 protected:
59 short _loop_flags;
60 // Names for flag bitfields
61 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
62 MainHasNoPreLoop=4,
63 HasExactTripCount=8,
64 InnerLoop=16,
65 PartialPeelLoop=32,
66 PartialPeelFailed=64,
67 HasReductions=128,
68 WasSlpAnalyzed=256,
69 PassedSlpAnalysis=512,
70 DoUnrollOnly=1024 };
71 char _unswitch_count;
72 enum { _unswitch_max=3 };
73
74 public:
75 // Names for edge indices
76 enum { Self=0, EntryControl, LoopBackControl };
77
78 int is_inner_loop() const { return _loop_flags & InnerLoop; }
79 void set_inner_loop() { _loop_flags |= InnerLoop; }
80
81 int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
82 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
83 int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
84 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
85 void mark_has_reductions() { _loop_flags |= HasReductions; }
86 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
87 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
88 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
89
90 int unswitch_max() { return _unswitch_max; }
91 int unswitch_count() { return _unswitch_count; }
92 void set_unswitch_count(int val) {
93 assert (val <= unswitch_max(), "too many unswitches");
94 _unswitch_count = val;
95 }
96
97 LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) {
98 init_class_id(Class_Loop);
99 init_req(EntryControl, entry);
100 init_req(LoopBackControl, backedge);
101 }
102
103 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
104 virtual int Opcode() const;
105 bool can_be_counted_loop(PhaseTransform* phase) const {
106 return req() == 3 && in(0) != NULL &&
107 in(1) != NULL && phase->type(in(1)) != Type::TOP &&
108 in(2) != NULL && phase->type(in(2)) != Type::TOP;
204 // iterations with full checks. Also used by Loop Unrolling, where
205 // the 'post' loop will do any epilog iterations needed. Basically,
206 // a 'post' loop can not profitably be further unrolled or RCE'd.
207
208 // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
209 // it may do under-flow checks for RCE and may do alignment iterations
210 // so the following main loop 'knows' that it is striding down cache
211 // lines.
212
213 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
214 // Aligned, may be missing it's pre-loop.
215 int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
216 int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
217 int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
218 int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
219 int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
220 int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
221 int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
222 int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
223 int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
224 void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
225
226 int main_idx() const { return _main_idx; }
227
228
229 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
230 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
231 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
232 void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; }
233
234 void set_trip_count(uint tc) { _trip_count = tc; }
235 uint trip_count() { return _trip_count; }
236
237 bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
238 void set_exact_trip_count(uint tc) {
239 _trip_count = tc;
240 _loop_flags |= HasExactTripCount;
241 }
242 void set_nonexact_trip_count() {
243 _loop_flags &= ~HasExactTripCount;
876 // the clone loop to dominate the original. Used in construction of
877 // pre-main-post loop sequence.
878 // When nonnull, the clone and original are side-by-side, both are
879 // dominated by the passed in side_by_side_idom node. Used in
880 // construction of unswitched loops.
881 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
882 Node* side_by_side_idom = NULL);
883
884 // If we got the effect of peeling, either by actually peeling or by
885 // making a pre-loop which must execute at least once, we can remove
886 // all loop-invariant dominated tests in the main body.
887 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
888
889 // Generate code to do a loop peel for the given loop (and body).
890 // old_new is a temp array.
891 void do_peeling( IdealLoopTree *loop, Node_List &old_new );
892
893 // Add pre and post loops around the given loop. These loops are used
894 // during RCE, unrolling and aligning loops.
895 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
896 // If Node n lives in the back_ctrl block, we clone a private version of n
897 // in preheader_ctrl block and return that, otherwise return n.
898 Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones );
899
900 // Take steps to maximally unroll the loop. Peel any odd iterations, then
901 // unroll to do double iterations. The next round of major loop transforms
902 // will repeat till the doubled loop body does all remaining iterations in 1
903 // pass.
904 void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new );
905
906 // Unroll the loop body one step - make each trip do 2 iterations.
907 void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip );
908
909 // Mark vector reduction candidates before loop unrolling
910 void mark_reductions( IdealLoopTree *loop );
911
912 // Return true if exp is a constant times an induction var
913 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
914
915 // Return true if exp is a scaled induction var plus (or minus) constant
|
50 // transformations on, beyond simple hoisting.
51
52 //------------------------------LoopNode---------------------------------------
53 // Simple loop header. Fall in path on left, loop-back path on right.
54 class LoopNode : public RegionNode {
55 // Size is bigger to hold the flags. However, the flags do not change
56 // the semantics so it does not appear in the hash & cmp functions.
57 virtual uint size_of() const { return sizeof(*this); }
58 protected:
59 short _loop_flags;
60 // Names for flag bitfields
61 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
62 MainHasNoPreLoop=4,
63 HasExactTripCount=8,
64 InnerLoop=16,
65 PartialPeelLoop=32,
66 PartialPeelFailed=64,
67 HasReductions=128,
68 WasSlpAnalyzed=256,
69 PassedSlpAnalysis=512,
70 DoUnrollOnly=1024,
71 VectorizedLoop=2048,
72 HasAtomicPostLoop=4096 };
73 char _unswitch_count;
74 enum { _unswitch_max=3 };
75
76 public:
77 // Names for edge indices
78 enum { Self=0, EntryControl, LoopBackControl };
79
80 int is_inner_loop() const { return _loop_flags & InnerLoop; }
81 void set_inner_loop() { _loop_flags |= InnerLoop; }
82
83 int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
84 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
85 int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
86 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
87 void mark_has_reductions() { _loop_flags |= HasReductions; }
88 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
89 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
90 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
91 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
92 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
93
94 int unswitch_max() { return _unswitch_max; }
95 int unswitch_count() { return _unswitch_count; }
96 void set_unswitch_count(int val) {
97 assert (val <= unswitch_max(), "too many unswitches");
98 _unswitch_count = val;
99 }
100
101 LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) {
102 init_class_id(Class_Loop);
103 init_req(EntryControl, entry);
104 init_req(LoopBackControl, backedge);
105 }
106
107 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
108 virtual int Opcode() const;
109 bool can_be_counted_loop(PhaseTransform* phase) const {
110 return req() == 3 && in(0) != NULL &&
111 in(1) != NULL && phase->type(in(1)) != Type::TOP &&
112 in(2) != NULL && phase->type(in(2)) != Type::TOP;
208 // iterations with full checks. Also used by Loop Unrolling, where
209 // the 'post' loop will do any epilog iterations needed. Basically,
210 // a 'post' loop can not profitably be further unrolled or RCE'd.
211
212 // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
213 // it may do under-flow checks for RCE and may do alignment iterations
214 // so the following main loop 'knows' that it is striding down cache
215 // lines.
216
217 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
218 // Aligned, may be missing it's pre-loop.
219 int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
220 int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
221 int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
222 int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
223 int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
224 int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
225 int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
226 int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
227 int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
228 int is_vectorized_loop () const { return (_loop_flags & VectorizedLoop) == VectorizedLoop; }
229 int has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
230 void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
231
232 int main_idx() const { return _main_idx; }
233
234
235 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
236 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
237 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
238 void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; }
239
240 void set_trip_count(uint tc) { _trip_count = tc; }
241 uint trip_count() { return _trip_count; }
242
243 bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
244 void set_exact_trip_count(uint tc) {
245 _trip_count = tc;
246 _loop_flags |= HasExactTripCount;
247 }
248 void set_nonexact_trip_count() {
249 _loop_flags &= ~HasExactTripCount;
882 // the clone loop to dominate the original. Used in construction of
883 // pre-main-post loop sequence.
884 // When nonnull, the clone and original are side-by-side, both are
885 // dominated by the passed in side_by_side_idom node. Used in
886 // construction of unswitched loops.
887 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
888 Node* side_by_side_idom = NULL);
889
890 // If we got the effect of peeling, either by actually peeling or by
891 // making a pre-loop which must execute at least once, we can remove
892 // all loop-invariant dominated tests in the main body.
893 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
894
895 // Generate code to do a loop peel for the given loop (and body).
896 // old_new is a temp array.
897 void do_peeling( IdealLoopTree *loop, Node_List &old_new );
898
899 // Add pre and post loops around the given loop. These loops are used
900 // during RCE, unrolling and aligning loops.
901 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
902 // Add a vector post loop between a vector main loop and the current post loop
903 void insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new);
904 // If Node n lives in the back_ctrl block, we clone a private version of n
905 // in preheader_ctrl block and return that, otherwise return n.
906 Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones );
907
908 // Take steps to maximally unroll the loop. Peel any odd iterations, then
909 // unroll to do double iterations. The next round of major loop transforms
910 // will repeat till the doubled loop body does all remaining iterations in 1
911 // pass.
912 void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new );
913
914 // Unroll the loop body one step - make each trip do 2 iterations.
915 void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip );
916
917 // Mark vector reduction candidates before loop unrolling
918 void mark_reductions( IdealLoopTree *loop );
919
920 // Return true if exp is a constant times an induction var
921 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
922
923 // Return true if exp is a scaled induction var plus (or minus) constant
|