52 //------------------------------LoopNode---------------------------------------
53 // Simple loop header. Fall in path on left, loop-back path on right.
54 class LoopNode : public RegionNode {
55 // Size is bigger to hold the flags. However, the flags do not change
56 // the semantics so it does not appear in the hash & cmp functions.
57 virtual uint size_of() const { return sizeof(*this); }
58 protected:
59 short _loop_flags;
60 // Names for flag bitfields
61 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
62 MainHasNoPreLoop=4,
63 HasExactTripCount=8,
64 InnerLoop=16,
65 PartialPeelLoop=32,
66 PartialPeelFailed=64,
67 HasReductions=128,
68 WasSlpAnalyzed=256,
69 PassedSlpAnalysis=512,
70 DoUnrollOnly=1024,
71 VectorizedLoop=2048,
72 HasAtomicPostLoop=4096 };
73 char _unswitch_count;
74 enum { _unswitch_max=3 };
75
76 public:
77 // Names for edge indices
78 enum { Self=0, EntryControl, LoopBackControl };
79
80 int is_inner_loop() const { return _loop_flags & InnerLoop; }
81 void set_inner_loop() { _loop_flags |= InnerLoop; }
82
83 int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
84 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
85 int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
86 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
87 void mark_has_reductions() { _loop_flags |= HasReductions; }
88 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
89 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
90 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
91 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
92 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
93
94 int unswitch_max() { return _unswitch_max; }
95 int unswitch_count() { return _unswitch_count; }
96 void set_unswitch_count(int val) {
97 assert (val <= unswitch_max(), "too many unswitches");
98 _unswitch_count = val;
99 }
100
101 LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) {
102 init_class_id(Class_Loop);
103 init_req(EntryControl, entry);
104 init_req(LoopBackControl, backedge);
105 }
106
107 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
108 virtual int Opcode() const;
109 bool can_be_counted_loop(PhaseTransform* phase) const {
110 return req() == 3 && in(0) != NULL &&
111 in(1) != NULL && phase->type(in(1)) != Type::TOP &&
112 in(2) != NULL && phase->type(in(2)) != Type::TOP;
207 // during Range Check Elimination, the 'post' loop will do any final
208 // iterations with full checks. Also used by Loop Unrolling, where
209 // the 'post' loop will do any epilog iterations needed. Basically,
210 // a 'post' loop can not profitably be further unrolled or RCE'd.
211
212 // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
213 // it may do under-flow checks for RCE and may do alignment iterations
214 // so the following main loop 'knows' that it is striding down cache
215 // lines.
216
217 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
218 // Aligned, may be missing it's pre-loop.
219 int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
220 int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
221 int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
222 int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
223 int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
224 int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
225 int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
226 int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
227 int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
228 int is_vectorized_loop () const { return (_loop_flags & VectorizedLoop) == VectorizedLoop; }
229 int has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
230 void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
231
232 int main_idx() const { return _main_idx; }
233
234
235 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
236 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
237 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
238 void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; }
239
240 void set_trip_count(uint tc) { _trip_count = tc; }
241 uint trip_count() { return _trip_count; }
242
243 bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
244 void set_exact_trip_count(uint tc) {
245 _trip_count = tc;
246 _loop_flags |= HasExactTripCount;
247 }
248 void set_nonexact_trip_count() {
249 _loop_flags &= ~HasExactTripCount;
250 }
525 // Return true if n is invariant
526 bool is_invariant(Node* n) const;
527
528 // Put loop body on igvn work list
529 void record_for_igvn();
530
531 bool is_loop() { return !_irreducible && _tail && !_tail->is_top(); }
532 bool is_inner() { return is_loop() && _child == NULL; }
533 bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); }
534
535 void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase);
536
537 #ifndef PRODUCT
538 void dump_head( ) const; // Dump loop head only
539 void dump() const; // Dump this loop recursively
540 void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
541 #endif
542
543 };
544
545 // -----------------------------PhaseIdealLoop---------------------------------
546 // Computes the mapping from Nodes to IdealLoopTrees. Organizes IdealLoopTrees into a
547 // loop tree. Drives the loop-based transformations on the ideal graph.
548 class PhaseIdealLoop : public PhaseTransform {
549 friend class IdealLoopTree;
550 friend class SuperWord;
551 friend class CountedLoopReserveKit;
552
553 // Pre-computed def-use info
554 PhaseIterGVN &_igvn;
555
556 // Head of loop tree
557 IdealLoopTree *_ltree_root;
558
559 // Array of pre-order numbers, plus post-visited bit.
560 // ZERO for not pre-visited. EVEN for pre-visited but not post-visited.
561 // ODD for post-visited. Other bits are the pre-order number.
562 uint *_preorders;
563 uint _max_preorder;
564
891 // the clone loop to dominate the original. Used in construction of
892 // pre-main-post loop sequence.
893 // When nonnull, the clone and original are side-by-side, both are
894 // dominated by the passed in side_by_side_idom node. Used in
895 // construction of unswitched loops.
896 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
897 Node* side_by_side_idom = NULL);
898
899 // If we got the effect of peeling, either by actually peeling or by
900 // making a pre-loop which must execute at least once, we can remove
901 // all loop-invariant dominated tests in the main body.
902 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
903
904 // Generate code to do a loop peel for the given loop (and body).
905 // old_new is a temp array.
906 void do_peeling( IdealLoopTree *loop, Node_List &old_new );
907
908 // Add pre and post loops around the given loop. These loops are used
909 // during RCE, unrolling and aligning loops.
910 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
911 // Add a vector post loop between a vector main loop and the current post loop
912 void insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new);
913 // If Node n lives in the back_ctrl block, we clone a private version of n
914 // in preheader_ctrl block and return that, otherwise return n.
915 Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones );
916
917 // Take steps to maximally unroll the loop. Peel any odd iterations, then
918 // unroll to do double iterations. The next round of major loop transforms
919 // will repeat till the doubled loop body does all remaining iterations in 1
920 // pass.
921 void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new );
922
923 // Unroll the loop body one step - make each trip do 2 iterations.
924 void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip );
925
926 // Mark vector reduction candidates before loop unrolling
927 void mark_reductions( IdealLoopTree *loop );
928
929 // Return true if exp is a constant times an induction var
930 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
931
932 // Return true if exp is a scaled induction var plus (or minus) constant
963 Node* range, bool upper);
964
965 // Implementation of the loop predication to promote checks outside the loop
966 bool loop_predication_impl(IdealLoopTree *loop);
967
968 // Helper function to collect predicate for eliminating the useless ones
969 void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
970 void eliminate_useless_predicates();
971
972 // Change the control input of expensive nodes to allow commoning by
973 // IGVN when it is guaranteed to not result in a more frequent
974 // execution of the expensive node. Return true if progress.
975 bool process_expensive_nodes();
976
977 // Check whether node has become unreachable
978 bool is_node_unreachable(Node *n) const {
979 return !has_node(n) || n->is_unreachable(_igvn);
980 }
981
982 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
983 void do_range_check( IdealLoopTree *loop, Node_List &old_new );
984
985 // Create a slow version of the loop by cloning the loop
986 // and inserting an if to select fast-slow versions.
987 ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
988 Node_List &old_new,
989 int opcode);
990
991 // Clone a loop and return the clone head (clone_loop_head).
992 // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
993 // This routine was created for usage in CountedLoopReserveKit.
994 //
995 // int(1) -> If -> IfTrue -> original_loop_head
996 // |
997 // V
998 // IfFalse -> clone_loop_head (returned by function pointer)
999 //
1000 LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1001 // Clone loop with an invariant test (that does not exit) and
1002 // insert a clone of the test that selects which version to
1003 // execute.
|
52 //------------------------------LoopNode---------------------------------------
53 // Simple loop header. Fall in path on left, loop-back path on right.
54 class LoopNode : public RegionNode {
55 // Size is bigger to hold the flags. However, the flags do not change
56 // the semantics so it does not appear in the hash & cmp functions.
57 virtual uint size_of() const { return sizeof(*this); }
58 protected:
59 short _loop_flags;
60 // Names for flag bitfields
61 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
62 MainHasNoPreLoop=4,
63 HasExactTripCount=8,
64 InnerLoop=16,
65 PartialPeelLoop=32,
66 PartialPeelFailed=64,
67 HasReductions=128,
68 WasSlpAnalyzed=256,
69 PassedSlpAnalysis=512,
70 DoUnrollOnly=1024,
71 VectorizedLoop=2048,
72 HasAtomicPostLoop=4096,
73 HasRangeChecks=8192,
74 IsMultiversioned=16384};
75 char _unswitch_count;
76 enum { _unswitch_max=3 };
77
78 public:
79 // Names for edge indices
80 enum { Self=0, EntryControl, LoopBackControl };
81
82 int is_inner_loop() const { return _loop_flags & InnerLoop; }
83 void set_inner_loop() { _loop_flags |= InnerLoop; }
84
85 int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
86 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
87 int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
88 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
89 void mark_has_reductions() { _loop_flags |= HasReductions; }
90 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
91 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
92 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
93 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
94 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
95 void mark_has_range_checks() { _loop_flags |= HasRangeChecks; }
96 void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
97
98 int unswitch_max() { return _unswitch_max; }
99 int unswitch_count() { return _unswitch_count; }
100 void set_unswitch_count(int val) {
101 assert (val <= unswitch_max(), "too many unswitches");
102 _unswitch_count = val;
103 }
104
105 LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) {
106 init_class_id(Class_Loop);
107 init_req(EntryControl, entry);
108 init_req(LoopBackControl, backedge);
109 }
110
111 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
112 virtual int Opcode() const;
113 bool can_be_counted_loop(PhaseTransform* phase) const {
114 return req() == 3 && in(0) != NULL &&
115 in(1) != NULL && phase->type(in(1)) != Type::TOP &&
116 in(2) != NULL && phase->type(in(2)) != Type::TOP;
211 // during Range Check Elimination, the 'post' loop will do any final
212 // iterations with full checks. Also used by Loop Unrolling, where
213 // the 'post' loop will do any epilog iterations needed. Basically,
214 // a 'post' loop can not profitably be further unrolled or RCE'd.
215
216 // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
217 // it may do under-flow checks for RCE and may do alignment iterations
218 // so the following main loop 'knows' that it is striding down cache
219 // lines.
220
221 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
222 // Aligned, may be missing it's pre-loop.
223 int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
224 int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
225 int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
226 int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
227 int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
228 int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
229 int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
230 int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
231 int is_main_no_pre_loop () const { return _loop_flags & MainHasNoPreLoop; }
232 int is_vectorized_loop () const { return (_loop_flags & VectorizedLoop) == VectorizedLoop; }
233 int has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
234 int loop_has_range_checks () const { return (_loop_flags & HasRangeChecks) == HasRangeChecks; }
235 int loop_is_multiversioned() const { return (_loop_flags & IsMultiversioned) == IsMultiversioned; }
236 void set_main_no_pre_loop () { _loop_flags |= MainHasNoPreLoop; }
237
238 int main_idx() const { return _main_idx; }
239
240
241 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
242 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
243 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
244 void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; }
245
246 void set_trip_count(uint tc) { _trip_count = tc; }
247 uint trip_count() { return _trip_count; }
248
249 bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
250 void set_exact_trip_count(uint tc) {
251 _trip_count = tc;
252 _loop_flags |= HasExactTripCount;
253 }
254 void set_nonexact_trip_count() {
255 _loop_flags &= ~HasExactTripCount;
256 }
531 // Return true if n is invariant
532 bool is_invariant(Node* n) const;
533
534 // Put loop body on igvn work list
535 void record_for_igvn();
536
537 bool is_loop() { return !_irreducible && _tail && !_tail->is_top(); }
538 bool is_inner() { return is_loop() && _child == NULL; }
539 bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); }
540
541 void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase);
542
543 #ifndef PRODUCT
544 void dump_head( ) const; // Dump loop head only
545 void dump() const; // Dump this loop recursively
546 void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
547 #endif
548
549 };
550
551 class PostLoopInfo {
552 public:
553 Node *new_main_exit;
554 CountedLoopNode *post_head;
555
556 PostLoopInfo() { init(); }
557
558 void init() { new_main_exit = NULL; post_head = NULL; }
559 };
560
561 // -----------------------------PhaseIdealLoop---------------------------------
562 // Computes the mapping from Nodes to IdealLoopTrees. Organizes IdealLoopTrees into a
563 // loop tree. Drives the loop-based transformations on the ideal graph.
564 class PhaseIdealLoop : public PhaseTransform {
565 friend class IdealLoopTree;
566 friend class SuperWord;
567 friend class CountedLoopReserveKit;
568
569 // Pre-computed def-use info
570 PhaseIterGVN &_igvn;
571
572 // Head of loop tree
573 IdealLoopTree *_ltree_root;
574
575 // Array of pre-order numbers, plus post-visited bit.
576 // ZERO for not pre-visited. EVEN for pre-visited but not post-visited.
577 // ODD for post-visited. Other bits are the pre-order number.
578 uint *_preorders;
579 uint _max_preorder;
580
907 // the clone loop to dominate the original. Used in construction of
908 // pre-main-post loop sequence.
909 // When nonnull, the clone and original are side-by-side, both are
910 // dominated by the passed in side_by_side_idom node. Used in
911 // construction of unswitched loops.
912 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
913 Node* side_by_side_idom = NULL);
914
915 // If we got the effect of peeling, either by actually peeling or by
916 // making a pre-loop which must execute at least once, we can remove
917 // all loop-invariant dominated tests in the main body.
918 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
919
920 // Generate code to do a loop peel for the given loop (and body).
921 // old_new is a temp array.
922 void do_peeling( IdealLoopTree *loop, Node_List &old_new );
923
924 // Add pre and post loops around the given loop. These loops are used
925 // during RCE, unrolling and aligning loops.
926 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
927
928 // Add post loop after the given loop.
929 void insert_post_loop( IdealLoopTree *loop, Node_List &old_new,
930 CountedLoopNode *main_head, CountedLoopEndNode *main_end,
931 Node *incr, Node *limit, PostLoopInfo &post_loop_info );
932
933 // Add an RCE'd post loop which we will multi-version adapt for run time test path usage
934 void insert_scalar_rced_post_loop( IdealLoopTree *loop, Node_List &old_new );
935
936 // Add a vector post loop between a vector main loop and the current post loop
937 void insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new);
938
939 // If Node n lives in the back_ctrl block, we clone a private version of n
940 // in preheader_ctrl block and return that, otherwise return n.
941 Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones );
942
943 // Take steps to maximally unroll the loop. Peel any odd iterations, then
944 // unroll to do double iterations. The next round of major loop transforms
945 // will repeat till the doubled loop body does all remaining iterations in 1
946 // pass.
947 void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new );
948
949 // Unroll the loop body one step - make each trip do 2 iterations.
950 void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip );
951
952 // Mark vector reduction candidates before loop unrolling
953 void mark_reductions( IdealLoopTree *loop );
954
955 // Return true if exp is a constant times an induction var
956 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
957
958 // Return true if exp is a scaled induction var plus (or minus) constant
989 Node* range, bool upper);
990
991 // Implementation of the loop predication to promote checks outside the loop
992 bool loop_predication_impl(IdealLoopTree *loop);
993
994 // Helper function to collect predicate for eliminating the useless ones
995 void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
996 void eliminate_useless_predicates();
997
998 // Change the control input of expensive nodes to allow commoning by
999 // IGVN when it is guaranteed to not result in a more frequent
1000 // execution of the expensive node. Return true if progress.
1001 bool process_expensive_nodes();
1002
1003 // Check whether node has become unreachable
1004 bool is_node_unreachable(Node *n) const {
1005 return !has_node(n) || n->is_unreachable(_igvn);
1006 }
1007
1008 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1009 void do_range_check(IdealLoopTree *loop, Node_List &range_check_list);
1010
1011 // Check to see if do_range_check(...) cleaned the main loop of range-checks
1012 void has_range_checks(IdealLoopTree *loop);
1013
1014 // Process post loops which have range checks and try to build a multi-version
1015 // guard to safely determine if we can execute the post loop which was RCE'd.
1016 bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1017
1018 // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1019 void poison_rce_post_loop(IdealLoopTree *rce_loop);
1020
1021 // Create a slow version of the loop by cloning the loop
1022 // and inserting an if to select fast-slow versions.
1023 ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
1024 Node_List &old_new,
1025 int opcode);
1026
1027 // Clone a loop and return the clone head (clone_loop_head).
1028 // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1029 // This routine was created for usage in CountedLoopReserveKit.
1030 //
1031 // int(1) -> If -> IfTrue -> original_loop_head
1032 // |
1033 // V
1034 // IfFalse -> clone_loop_head (returned by function pointer)
1035 //
1036 LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1037 // Clone loop with an invariant test (that does not exit) and
1038 // insert a clone of the test that selects which version to
1039 // execute.
|