52 //------------------------------LoopNode---------------------------------------
53 // Simple loop header. Fall in path on left, loop-back path on right.
54 class LoopNode : public RegionNode {
55 // Size is bigger to hold the flags. However, the flags do not change
56 // the semantics so it does not appear in the hash & cmp functions.
57 virtual uint size_of() const { return sizeof(*this); }
58 protected:
59 short _loop_flags;
60 // Names for flag bitfields
61 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
62 MainHasNoPreLoop=4,
63 HasExactTripCount=8,
64 InnerLoop=16,
65 PartialPeelLoop=32,
66 PartialPeelFailed=64,
67 HasReductions=128,
68 WasSlpAnalyzed=256,
69 PassedSlpAnalysis=512,
70 DoUnrollOnly=1024,
71 VectorizedLoop=2048,
72 HasAtomicPostLoop=4096 };
73 char _unswitch_count;
74 enum { _unswitch_max=3 };
75
76 public:
77 // Names for edge indices
78 enum { Self=0, EntryControl, LoopBackControl };
79
80 int is_inner_loop() const { return _loop_flags & InnerLoop; }
81 void set_inner_loop() { _loop_flags |= InnerLoop; }
82
83 int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
84 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
85 int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
86 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
87 void mark_has_reductions() { _loop_flags |= HasReductions; }
88 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
89 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
90 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
91 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
92 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
93
94 int unswitch_max() { return _unswitch_max; }
95 int unswitch_count() { return _unswitch_count; }
96 void set_unswitch_count(int val) {
97 assert (val <= unswitch_max(), "too many unswitches");
98 _unswitch_count = val;
99 }
100
101 LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) {
102 init_class_id(Class_Loop);
103 init_req(EntryControl, entry);
104 init_req(LoopBackControl, backedge);
105 }
106
107 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
108 virtual int Opcode() const;
109 bool can_be_counted_loop(PhaseTransform* phase) const {
110 return req() == 3 && in(0) != NULL &&
111 in(1) != NULL && phase->type(in(1)) != Type::TOP &&
112 in(2) != NULL && phase->type(in(2)) != Type::TOP;
208 // iterations with full checks. Also used by Loop Unrolling, where
209 // the 'post' loop will do any epilog iterations needed. Basically,
210 // a 'post' loop can not profitably be further unrolled or RCE'd.
211
212 // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
213 // it may do under-flow checks for RCE and may do alignment iterations
214 // so the following main loop 'knows' that it is striding down cache
215 // lines.
216
217 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
218 // Aligned, may be missing it's pre-loop.
219 int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
220 int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
221 int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
222 int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
223 int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
224 int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
225 int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
226 int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
227 int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
228 int is_vectorized_loop () const { return (_loop_flags & VectorizedLoop) == VectorizedLoop; }
229 int has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
230 void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
231
232 int main_idx() const { return _main_idx; }
233
234
235 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
236 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
237 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
238 void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; }
239
240 void set_trip_count(uint tc) { _trip_count = tc; }
241 uint trip_count() { return _trip_count; }
242
243 bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
244 void set_exact_trip_count(uint tc) {
245 _trip_count = tc;
246 _loop_flags |= HasExactTripCount;
247 }
248 void set_nonexact_trip_count() {
525 // Return true if n is invariant
526 bool is_invariant(Node* n) const;
527
528 // Put loop body on igvn work list
529 void record_for_igvn();
530
531 bool is_loop() { return !_irreducible && _tail && !_tail->is_top(); }
532 bool is_inner() { return is_loop() && _child == NULL; }
533 bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); }
534
535 void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase);
536
537 #ifndef PRODUCT
538 void dump_head( ) const; // Dump loop head only
539 void dump() const; // Dump this loop recursively
540 void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
541 #endif
542
543 };
544
545 // -----------------------------PhaseIdealLoop---------------------------------
546 // Computes the mapping from Nodes to IdealLoopTrees. Organizes IdealLoopTrees into a
547 // loop tree. Drives the loop-based transformations on the ideal graph.
548 class PhaseIdealLoop : public PhaseTransform {
549 friend class IdealLoopTree;
550 friend class SuperWord;
551 friend class CountedLoopReserveKit;
552
553 // Pre-computed def-use info
554 PhaseIterGVN &_igvn;
555
556 // Head of loop tree
557 IdealLoopTree *_ltree_root;
558
559 // Array of pre-order numbers, plus post-visited bit.
560 // ZERO for not pre-visited. EVEN for pre-visited but not post-visited.
561 // ODD for post-visited. Other bits are the pre-order number.
562 uint *_preorders;
563 uint _max_preorder;
564
891 // the clone loop to dominate the original. Used in construction of
892 // pre-main-post loop sequence.
893 // When nonnull, the clone and original are side-by-side, both are
894 // dominated by the passed in side_by_side_idom node. Used in
895 // construction of unswitched loops.
896 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
897 Node* side_by_side_idom = NULL);
898
899 // If we got the effect of peeling, either by actually peeling or by
900 // making a pre-loop which must execute at least once, we can remove
901 // all loop-invariant dominated tests in the main body.
902 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
903
904 // Generate code to do a loop peel for the given loop (and body).
905 // old_new is a temp array.
906 void do_peeling( IdealLoopTree *loop, Node_List &old_new );
907
908 // Add pre and post loops around the given loop. These loops are used
909 // during RCE, unrolling and aligning loops.
910 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
911 // Add a vector post loop between a vector main loop and the current post loop
912 void insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new);
913 // If Node n lives in the back_ctrl block, we clone a private version of n
914 // in preheader_ctrl block and return that, otherwise return n.
915 Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones );
916
917 // Take steps to maximally unroll the loop. Peel any odd iterations, then
918 // unroll to do double iterations. The next round of major loop transforms
919 // will repeat till the doubled loop body does all remaining iterations in 1
920 // pass.
921 void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new );
922
923 // Unroll the loop body one step - make each trip do 2 iterations.
924 void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip );
925
926 // Mark vector reduction candidates before loop unrolling
927 void mark_reductions( IdealLoopTree *loop );
928
929 // Return true if exp is a constant times an induction var
930 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
964
965 // Implementation of the loop predication to promote checks outside the loop
966 bool loop_predication_impl(IdealLoopTree *loop);
967
968 // Helper function to collect predicate for eliminating the useless ones
969 void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
970 void eliminate_useless_predicates();
971
972 // Change the control input of expensive nodes to allow commoning by
973 // IGVN when it is guaranteed to not result in a more frequent
974 // execution of the expensive node. Return true if progress.
975 bool process_expensive_nodes();
976
977 // Check whether node has become unreachable
978 bool is_node_unreachable(Node *n) const {
979 return !has_node(n) || n->is_unreachable(_igvn);
980 }
981
982 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
983 void do_range_check( IdealLoopTree *loop, Node_List &old_new );
984
985 // Create a slow version of the loop by cloning the loop
986 // and inserting an if to select fast-slow versions.
987 ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
988 Node_List &old_new,
989 int opcode);
990
991 // Clone a loop and return the clone head (clone_loop_head).
992 // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
993 // This routine was created for usage in CountedLoopReserveKit.
994 //
995 // int(1) -> If -> IfTrue -> original_loop_head
996 // |
997 // V
998 // IfFalse -> clone_loop_head (returned by function pointer)
999 //
1000 LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1001 // Clone loop with an invariant test (that does not exit) and
1002 // insert a clone of the test that selects which version to
1003 // execute.
|
52 //------------------------------LoopNode---------------------------------------
53 // Simple loop header. Fall in path on left, loop-back path on right.
54 class LoopNode : public RegionNode {
55 // Size is bigger to hold the flags. However, the flags do not change
56 // the semantics so it does not appear in the hash & cmp functions.
57 virtual uint size_of() const { return sizeof(*this); }
58 protected:
59 short _loop_flags;
60 // Names for flag bitfields
61 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
62 MainHasNoPreLoop=4,
63 HasExactTripCount=8,
64 InnerLoop=16,
65 PartialPeelLoop=32,
66 PartialPeelFailed=64,
67 HasReductions=128,
68 WasSlpAnalyzed=256,
69 PassedSlpAnalysis=512,
70 DoUnrollOnly=1024,
71 VectorizedLoop=2048,
72 HasAtomicPostLoop=4096,
73 HasRangeChecks=8192,
74 IsMultiversioned=16384};
75 char _unswitch_count;
76 enum { _unswitch_max=3 };
77
78 public:
79 // Names for edge indices
80 enum { Self=0, EntryControl, LoopBackControl };
81
82 int is_inner_loop() const { return _loop_flags & InnerLoop; }
83 void set_inner_loop() { _loop_flags |= InnerLoop; }
84
85 int has_range_checks() const { return _loop_flags & HasRangeChecks; }
86 int is_multiversioned() const { return _loop_flags & IsMultiversioned; }
87 int is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
88 int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
89 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
90 int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
91
92 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
93 void mark_has_reductions() { _loop_flags |= HasReductions; }
94 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
95 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
96 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
97 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
98 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
99 void mark_has_range_checks() { _loop_flags |= HasRangeChecks; }
100 void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
101
102 int unswitch_max() { return _unswitch_max; }
103 int unswitch_count() { return _unswitch_count; }
104 void set_unswitch_count(int val) {
105 assert (val <= unswitch_max(), "too many unswitches");
106 _unswitch_count = val;
107 }
108
109 LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) {
110 init_class_id(Class_Loop);
111 init_req(EntryControl, entry);
112 init_req(LoopBackControl, backedge);
113 }
114
115 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
116 virtual int Opcode() const;
117 bool can_be_counted_loop(PhaseTransform* phase) const {
118 return req() == 3 && in(0) != NULL &&
119 in(1) != NULL && phase->type(in(1)) != Type::TOP &&
120 in(2) != NULL && phase->type(in(2)) != Type::TOP;
216 // iterations with full checks. Also used by Loop Unrolling, where
217 // the 'post' loop will do any epilog iterations needed. Basically,
218 // a 'post' loop can not profitably be further unrolled or RCE'd.
219
220 // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
221 // it may do under-flow checks for RCE and may do alignment iterations
222 // so the following main loop 'knows' that it is striding down cache
223 // lines.
224
225 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
226 // Aligned, may be missing it's pre-loop.
227 int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
228 int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
229 int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
230 int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
231 int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
232 int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
233 int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
234 int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
235 int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
236 int has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
237 void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
238
239 int main_idx() const { return _main_idx; }
240
241
242 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
243 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
244 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
245 void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; }
246
247 void set_trip_count(uint tc) { _trip_count = tc; }
248 uint trip_count() { return _trip_count; }
249
250 bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
251 void set_exact_trip_count(uint tc) {
252 _trip_count = tc;
253 _loop_flags |= HasExactTripCount;
254 }
255 void set_nonexact_trip_count() {
532 // Return true if n is invariant
533 bool is_invariant(Node* n) const;
534
535 // Put loop body on igvn work list
536 void record_for_igvn();
537
538 bool is_loop() { return !_irreducible && _tail && !_tail->is_top(); }
539 bool is_inner() { return is_loop() && _child == NULL; }
540 bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); }
541
542 void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase);
543
544 #ifndef PRODUCT
545 void dump_head( ) const; // Dump loop head only
546 void dump() const; // Dump this loop recursively
547 void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
548 #endif
549
550 };
551
552 class PostLoopInfo {
553 public:
554 Node *new_main_exit;
555 CountedLoopNode *post_head;
556
557 PostLoopInfo() { init(); }
558
559 void init() { new_main_exit = NULL; post_head = NULL; }
560 };
561
562 // -----------------------------PhaseIdealLoop---------------------------------
563 // Computes the mapping from Nodes to IdealLoopTrees. Organizes IdealLoopTrees into a
564 // loop tree. Drives the loop-based transformations on the ideal graph.
565 class PhaseIdealLoop : public PhaseTransform {
566 friend class IdealLoopTree;
567 friend class SuperWord;
568 friend class CountedLoopReserveKit;
569
570 // Pre-computed def-use info
571 PhaseIterGVN &_igvn;
572
573 // Head of loop tree
574 IdealLoopTree *_ltree_root;
575
576 // Array of pre-order numbers, plus post-visited bit.
577 // ZERO for not pre-visited. EVEN for pre-visited but not post-visited.
578 // ODD for post-visited. Other bits are the pre-order number.
579 uint *_preorders;
580 uint _max_preorder;
581
908 // the clone loop to dominate the original. Used in construction of
909 // pre-main-post loop sequence.
910 // When nonnull, the clone and original are side-by-side, both are
911 // dominated by the passed in side_by_side_idom node. Used in
912 // construction of unswitched loops.
913 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
914 Node* side_by_side_idom = NULL);
915
916 // If we got the effect of peeling, either by actually peeling or by
917 // making a pre-loop which must execute at least once, we can remove
918 // all loop-invariant dominated tests in the main body.
919 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
920
921 // Generate code to do a loop peel for the given loop (and body).
922 // old_new is a temp array.
923 void do_peeling( IdealLoopTree *loop, Node_List &old_new );
924
925 // Add pre and post loops around the given loop. These loops are used
926 // during RCE, unrolling and aligning loops.
927 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
928
929 // Add post loop after the given loop.
930 void insert_post_loop( IdealLoopTree *loop, Node_List &old_new,
931 CountedLoopNode *main_head, CountedLoopEndNode *main_end,
932 Node *incr, Node *limit, PostLoopInfo &post_loop_info );
933
934 // Add an RCE'd post loop which we will multi-version adapt for run time test path usage
935 void insert_scalar_rced_post_loop( IdealLoopTree *loop, Node_List &old_new );
936
937 // Add a vector post loop between a vector main loop and the current post loop
938 void insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new);
939 // If Node n lives in the back_ctrl block, we clone a private version of n
940 // in preheader_ctrl block and return that, otherwise return n.
941 Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones );
942
943 // Take steps to maximally unroll the loop. Peel any odd iterations, then
944 // unroll to do double iterations. The next round of major loop transforms
945 // will repeat till the doubled loop body does all remaining iterations in 1
946 // pass.
947 void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new );
948
949 // Unroll the loop body one step - make each trip do 2 iterations.
950 void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip );
951
952 // Mark vector reduction candidates before loop unrolling
953 void mark_reductions( IdealLoopTree *loop );
954
955 // Return true if exp is a constant times an induction var
956 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
990
991 // Implementation of the loop predication to promote checks outside the loop
992 bool loop_predication_impl(IdealLoopTree *loop);
993
994 // Helper function to collect predicate for eliminating the useless ones
995 void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
996 void eliminate_useless_predicates();
997
998 // Change the control input of expensive nodes to allow commoning by
999 // IGVN when it is guaranteed to not result in a more frequent
1000 // execution of the expensive node. Return true if progress.
1001 bool process_expensive_nodes();
1002
1003 // Check whether node has become unreachable
1004 bool is_node_unreachable(Node *n) const {
1005 return !has_node(n) || n->is_unreachable(_igvn);
1006 }
1007
1008 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1009 void do_range_check( IdealLoopTree *loop, Node_List &old_new );
1010
1011 // Check to see if do_range_check(...) cleaned the main loop of range-checks
1012 void has_range_checks(IdealLoopTree *loop);
1013
1014 // Process post loops which have range checks and try to build a multi-version
1015 // guard to safely determine if we can execute the post loop which was RCE'd.
1016 bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1017
1018 // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1019 void poison_rce_post_loop(IdealLoopTree *rce_loop);
1020
1021 // Create a slow version of the loop by cloning the loop
1022 // and inserting an if to select fast-slow versions.
1023 ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
1024 Node_List &old_new,
1025 int opcode);
1026
1027 // Clone a loop and return the clone head (clone_loop_head).
1028 // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1029 // This routine was created for usage in CountedLoopReserveKit.
1030 //
1031 // int(1) -> If -> IfTrue -> original_loop_head
1032 // |
1033 // V
1034 // IfFalse -> clone_loop_head (returned by function pointer)
1035 //
1036 LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1037 // Clone loop with an invariant test (that does not exit) and
1038 // insert a clone of the test that selects which version to
1039 // execute.
|