--- old/src/cpu/aarch64/vm/c2_init_aarch64.cpp 2015-05-01 18:06:41.349650500 -0700 +++ new/src/cpu/aarch64/vm/c2_init_aarch64.cpp 2015-05-01 18:06:41.164650500 -0700 @@ -33,4 +33,6 @@ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); // QQQ presumably all 64bit cpu's support this. Seems like the ifdef could // simply be left out. + + SuperWordLoopUnrollAnalysis = false; } --- old/src/cpu/ppc/vm/c2_init_ppc.cpp 2015-05-01 18:06:45.115650500 -0700 +++ new/src/cpu/ppc/vm/c2_init_ppc.cpp 2015-05-01 18:06:44.931650500 -0700 @@ -45,4 +45,6 @@ FLAG_SET_ERGO(bool, InsertEndGroupPPC64, true); } } + + SuperWordLoopUnrollAnalysis = false; } --- old/src/cpu/sparc/vm/c2_init_sparc.cpp 2015-05-01 18:06:48.818650500 -0700 +++ new/src/cpu/sparc/vm/c2_init_sparc.cpp 2015-05-01 18:06:48.635650500 -0700 @@ -30,4 +30,6 @@ void Compile::pd_compiler2_init() { guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); + + SuperWordLoopUnrollAnalysis = false; } --- old/src/share/vm/opto/c2_globals.hpp 2015-05-01 18:06:52.614650500 -0700 +++ new/src/share/vm/opto/c2_globals.hpp 2015-05-01 18:06:52.416650500 -0700 @@ -191,6 +191,12 @@ product(intx, LoopMaxUnroll, 16, \ "Maximum number of unrolls for main loop") \ \ + product(bool, SuperWordLoopUnrollAnalysis, true, \ + "Map number of unrolls for main loop via slp analysis") \ + \ + notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \ + "Trace what slp analysis applies") \ + \ product(intx, LoopUnrollMin, 4, \ "Minimum number of unroll loop bodies before checking progress" \ "of rounds of unroll,optimize,..") \ --- old/src/share/vm/opto/loopTransform.cpp 2015-05-01 18:06:56.436650500 -0700 +++ new/src/share/vm/opto/loopTransform.cpp 2015-05-01 18:06:56.250650500 -0700 @@ -38,6 +38,7 @@ #include "opto/rootnode.hpp" #include "opto/runtime.hpp" #include "opto/subnode.hpp" +#include "opto/superword.hpp" #include "opto/vectornode.hpp" //------------------------------is_loop_exit----------------------------------- @@ -640,7 +641,7 @@ //------------------------------policy_unroll---------------------------------- // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if // the loop is a CountedLoop and the body is small enough. -bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const { +bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) { CountedLoopNode *cl = _head->as_CountedLoop(); assert(cl->is_normal_loop() || cl->is_main_loop(), ""); @@ -652,9 +653,46 @@ // After split at least one iteration will be executed in pre-loop. if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; + _local_loop_unroll_limit = LoopUnrollLimit; + _local_loop_unroll_factor = 4; int future_unroll_ct = cl->unrolled_count() * 2; if (future_unroll_ct > LoopMaxUnroll) return false; + if (UseSuperWord) { + if (cl->is_reduction_loop() == false) phase->mark_reductions(this); + + // Only attempt slp analysis when user controls do not prohibit it + if (LoopMaxUnroll > _local_loop_unroll_factor) { + // Once policy_slp_analysis succeeds, mark the loop with the + // maximal unroll factor so that we minimize analysis passes + if (cl->has_passed_slp() == false) { + if (policy_slp_analysis(cl, phase)) { + if (_local_loop_unroll_factor > 4) { + cl->mark_passed_slp(); + cl->set_slp_max_unroll(_local_loop_unroll_factor); + } + } + } + + if (cl->has_passed_slp()) { + int slp_max_unroll_factor = cl->slp_max_unroll(); + if ((slp_max_unroll_factor > 4) && + (slp_max_unroll_factor >= future_unroll_ct)) { + int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor; + if (new_limit > LoopUnrollLimit) { +#ifndef PRODUCT + if (TraceSuperWordLoopUnrollAnalysis) { + tty->print_cr("slp analysis is applying unroll limit %d, the original limit was %d\n", + new_limit, _local_loop_unroll_limit); + } +#endif + _local_loop_unroll_limit = new_limit; + } + } + } + } + } + // Check for initial stride being a small enough constant if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false; @@ -748,7 +786,7 @@ } // Check for being too big - if (body_size > (uint)LoopUnrollLimit) { + if (body_size > (uint)_local_loop_unroll_limit) { if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; // Normal case: loop too big return false; @@ -758,6 +796,172 @@ return true; } +bool IdealLoopTree::policy_slp_analysis( CountedLoopNode *cl, PhaseIdealLoop *phase ) { + // SLP analysis + bool not_slp = false; + + // Enable this functionality target by target as needed + if (SuperWordLoopUnrollAnalysis) { + SuperWord sw(phase); + sw.transform_loop(this, false); + + // If the loop is slp canonical analyze it + if (sw.early_return() == false) { + Arena *a = Thread::current()->resource_area(); + int max_vector = Matcher::max_vector_size(T_INT); + size_t ignored_size = _body.size()*sizeof(int*); + int *ignored_loop_nodes = (int*)a->Amalloc_D(ignored_size); + Node_Stack nstack((int)ignored_size); + Node *cl_exit = cl->loopexit(); + + // First clear the entries + for (uint i = 0; i < _body.size(); i++) { + ignored_loop_nodes[i] = -1; + } + + // Process the loop, some/all of the stack entries will not be in order, ergo + // need to preprocess the ignored initial state before we process the loop + for (uint i = 0; i < _body.size(); i++) { + Node* n = _body.at(i); + if (n == cl->incr() || + n->is_reduction() || + n->is_AddP() || + n->is_Cmp() || + n->is_IfTrue() || + n->is_CountedLoop() || + (n == cl_exit)) { + ignored_loop_nodes[i] = n->_idx; + continue; + } + + if (n->is_If()) { + IfNode *iff = n->as_If(); + if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) { + if (is_loop_exit(iff)) { + ignored_loop_nodes[i] = n->_idx; + continue; + } + } + } + + if (n->is_Phi() && (n->bottom_type() == Type::MEMORY)) { + Node* n_tail = n->in(LoopNode::LoopBackControl); + if (n_tail != n->in(LoopNode::EntryControl)) { + if (!n_tail->is_Mem()) { + not_slp = true; + break; + } + } + } + + // This must happen after check of phi/if + if (n->is_Phi() || n->is_If()) { + ignored_loop_nodes[i] = n->_idx; + continue; + } + + if (n->is_LoadStore() || n->is_MergeMem() || + (n->is_Proj() && !n->as_Proj()->is_CFG())) { + not_slp = true; + break; + } + + if (n->is_Mem()) { + Node* adr = n->in(MemNode::Address); + Node* n_ctrl = phase->get_ctrl(adr); + + // save a queue of post process nodes + if (n_ctrl != NULL && is_member(phase->get_loop(n_ctrl))) { + MemNode* current = n->as_Mem(); + BasicType bt = current->memory_type(); + if (is_java_primitive(bt) == false) { + ignored_loop_nodes[i] = n->_idx; + continue; + } + + // Process the memory expression + int stack_idx = 0; + bool have_side_effects = true; + if (adr->is_AddP() == false) { + nstack.push(adr, stack_idx++); + } else { + // Mark the components of the memory operation in nstack + SWPointer p1(current, &sw, &nstack, true); + have_side_effects = p1.node_stack()->is_nonempty(); + } + + // Process the pointer stack + while (have_side_effects) { + Node* pointer_node = nstack.node(); + for (uint j = 0; j < _body.size(); j++) { + Node* cur_node = _body.at(j); + if (cur_node == pointer_node) { + ignored_loop_nodes[j] = cur_node->_idx; + break; + } + } + nstack.pop(); + have_side_effects = nstack.is_nonempty(); + } + + // Cleanup + nstack.clear(); + } + } + } + + if (not_slp == false) { + // Now we try to find the maximum supported consistent vector which the machine + // description can use + for (uint i = 0; i < _body.size(); i++) { + if (ignored_loop_nodes[i] != -1) continue; + + BasicType bt; + Node* n = _body.at(i); + if (n->is_Store()) { + bt = n->as_Mem()->memory_type(); + } else { + bt = n->bottom_type()->basic_type(); + } + + int cur_max_vector = Matcher::max_vector_size(bt); + + // If a max vector exists which is not larger than _local_loop_unroll_factor + // stop looking, we already have the max vector to map to. + if (cur_max_vector <= _local_loop_unroll_factor) { + not_slp = true; +#ifndef PRODUCT + if (TraceSuperWordLoopUnrollAnalysis) { + tty->print_cr("slp analysis fails: unroll limit equals max vector\n"); + } +#endif + break; + } + + // Map the maximal common vector + if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) { + if (cur_max_vector < max_vector) { + max_vector = cur_max_vector; + } + } + } + if (not_slp == false) _local_loop_unroll_factor = max_vector; + } + + if (not_slp) { + // Mark the loop as processed so that we do not try again + cl->mark_passed_slp(); + cl->set_slp_max_unroll(_local_loop_unroll_factor); + } + + // Now clean things up + a->Afree(ignored_loop_nodes, ignored_size); + } + } + + return (not_slp == false); +} + //------------------------------policy_align----------------------------------- // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the // expression that does the alignment. Note that only one array base can be @@ -1551,6 +1755,7 @@ for (unsigned j = 1; j < def_node->req(); j++) { Node* in = def_node->in(j); if (in == phi) { + loop_head->mark_has_reductions(); def_node->add_flag(Node::Flag_is_reduction); break; } @@ -2401,7 +2606,6 @@ // and we'd rather unroll the post-RCE'd loop SO... do not unroll if // peeling. if (should_unroll && !should_peel) { - phase->mark_reductions(this); phase->do_unroll(this, old_new, true); } --- old/src/share/vm/opto/loopnode.cpp 2015-05-01 18:07:00.247650500 -0700 +++ new/src/share/vm/opto/loopnode.cpp 2015-05-01 18:07:00.062650500 -0700 @@ -2408,7 +2408,7 @@ for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { IdealLoopTree* lpt = iter.current(); if (lpt->is_counted()) { - sw.transform_loop(lpt); + sw.transform_loop(lpt, true); } } } --- old/src/share/vm/opto/loopnode.hpp 2015-05-01 18:07:04.106650500 -0700 +++ new/src/share/vm/opto/loopnode.hpp 2015-05-01 18:07:03.911650500 -0700 @@ -62,7 +62,9 @@ HasExactTripCount=8, InnerLoop=16, PartialPeelLoop=32, - PartialPeelFailed=64 }; + PartialPeelFailed=64, + HasReductions=128, + PassedSlpAnalysis=256 }; char _unswitch_count; enum { _unswitch_max=3 }; @@ -77,6 +79,8 @@ void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; } int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; } void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; } + void mark_has_reductions() { _loop_flags |= HasReductions; } + void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; } int unswitch_max() { return _unswitch_max; } int unswitch_count() { return _unswitch_count; } @@ -155,6 +159,10 @@ // unroll,optimize,unroll,optimize,... is making progress int _node_count_before_unroll; + // If slp analysis is performed we record the maximum + // vector mapped unroll factor here + int slp_maximum_unroll_factor; + public: CountedLoopNode( Node *entry, Node *backedge ) : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint), @@ -199,10 +207,12 @@ // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or // Aligned, may be missing it's pre-loop. - int is_normal_loop() const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } - int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } - int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } - int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } + int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } + int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } + int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } + int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } + int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; } + int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; } int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; } @@ -232,8 +242,10 @@ void double_unrolled_count() { _unrolled_count_log2++; } int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); } - void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; } - int node_count_before_unroll() { return _node_count_before_unroll; } + void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; } + int node_count_before_unroll() { return _node_count_before_unroll; } + void set_slp_max_unroll(int unroll_factor) { slp_maximum_unroll_factor = unroll_factor; } + int slp_max_unroll() { return slp_maximum_unroll_factor; } #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; @@ -336,6 +348,8 @@ Node *_tail; // Tail of loop inline Node *tail(); // Handle lazy update of _tail field PhaseIdealLoop* _phase; + int _local_loop_unroll_limit; + int _local_loop_unroll_factor; Node_List _body; // Loop body for inner loops @@ -356,7 +370,8 @@ _safepts(NULL), _required_safept(NULL), _allow_optimizations(true), - _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0) + _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0), + _local_loop_unroll_limit(0), _local_loop_unroll_factor(0) { } // Is 'l' a member of 'this'? @@ -444,7 +459,11 @@ // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if // the loop is a CountedLoop and the body is small enough. - bool policy_unroll( PhaseIdealLoop *phase ) const; + bool policy_unroll( PhaseIdealLoop *phase ); + + // Return TRUE or FALSE if the loop analyzes to map to a maximal + // superword unrolling for vectorization. + bool policy_slp_analysis( CountedLoopNode *cl, PhaseIdealLoop *phase ); // Return TRUE or FALSE if the loop should be range-check-eliminated. // Gather a list of IF tests that are dominated by iteration splitting; --- old/src/share/vm/opto/superword.cpp 2015-05-01 18:07:07.885650500 -0700 +++ new/src/share/vm/opto/superword.cpp 2015-05-01 18:07:07.689650500 -0700 @@ -66,11 +66,12 @@ _lp(NULL), // LoopNode _bb(NULL), // basic block _iv(NULL), // induction var - _race_possible(false) // cases where SDMU is true + _race_possible(false), // cases where SDMU is true + _early_return(true) {} //------------------------------transform_loop--------------------------- -void SuperWord::transform_loop(IdealLoopTree* lpt) { +void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { assert(UseSuperWord, "should be"); // Do vectors exist on this architecture? if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; @@ -105,8 +106,10 @@ // For now, define one block which is the entire loop body set_bb(cl); - assert(_packset.length() == 0, "packset must be empty"); - SLP_extract(); + if (do_optimization) { + assert(_packset.length() == 0, "packset must be empty"); + SLP_extract(); + } } //------------------------------SLP_extract--------------------------- @@ -210,12 +213,12 @@ best_iv_adjustment = iv_adjustment; } - SWPointer align_to_ref_p(mem_ref, this); + SWPointer align_to_ref_p(mem_ref, this, NULL, false); // Set alignment relative to "align_to_ref" for all related memory operations. for (int i = memops.size() - 1; i >= 0; i--) { MemNode* s = memops.at(i)->as_Mem(); if (isomorphic(s, mem_ref)) { - SWPointer p2(s, this); + SWPointer p2(s, this, NULL, false); if (p2.comparable(align_to_ref_p)) { int align = memory_alignment(s, iv_adjustment); set_alignment(s, align); @@ -344,7 +347,7 @@ // Count number of comparable memory ops for (uint i = 0; i < memops.size(); i++) { MemNode* s1 = memops.at(i)->as_Mem(); - SWPointer p1(s1, this); + SWPointer p1(s1, this, NULL, false); // Discard if pre loop can't align this reference if (!ref_is_alignable(p1)) { *cmp_ct.adr_at(i) = 0; @@ -353,7 +356,7 @@ for (uint j = i+1; j < memops.size(); j++) { MemNode* s2 = memops.at(j)->as_Mem(); if (isomorphic(s1, s2)) { - SWPointer p2(s2, this); + SWPointer p2(s2, this, NULL, false); if (p1.comparable(p2)) { (*cmp_ct.adr_at(i))++; (*cmp_ct.adr_at(j))++; @@ -374,7 +377,7 @@ if (s->is_Store()) { int vw = vector_width_in_bytes(s); assert(vw > 1, "sanity"); - SWPointer p(s, this); + SWPointer p(s, this, NULL, false); if (cmp_ct.at(j) > max_ct || cmp_ct.at(j) == max_ct && (vw > max_vw || @@ -397,7 +400,7 @@ if (s->is_Load()) { int vw = vector_width_in_bytes(s); assert(vw > 1, "sanity"); - SWPointer p(s, this); + SWPointer p(s, this, NULL, false); if (cmp_ct.at(j) > max_ct || cmp_ct.at(j) == max_ct && (vw > max_vw || @@ -482,7 +485,7 @@ //---------------------------get_iv_adjustment--------------------------- // Calculate loop's iv adjustment for this memory ops. int SuperWord::get_iv_adjustment(MemNode* mem_ref) { - SWPointer align_to_ref_p(mem_ref, this); + SWPointer align_to_ref_p(mem_ref, this, NULL, false); int offset = align_to_ref_p.offset_in_bytes(); int scale = align_to_ref_p.scale_in_bytes(); int vw = vector_width_in_bytes(mem_ref); @@ -542,13 +545,13 @@ if (_dg.dep(s1)->in_cnt() == 0) { _dg.make_edge(slice, s1); } - SWPointer p1(s1->as_Mem(), this); + SWPointer p1(s1->as_Mem(), this, NULL, false); bool sink_dependent = true; for (int k = j - 1; k >= 0; k--) { Node* s2 = _nlist.at(k); if (s1->is_Load() && s2->is_Load()) continue; - SWPointer p2(s2->as_Mem(), this); + SWPointer p2(s2->as_Mem(), this, NULL, false); int cmp = p1.cmp(p2); if (SuperWordRTDepCheck && @@ -688,8 +691,8 @@ if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) != _phase->C->get_alias_index(s2->as_Mem()->adr_type())) return false; - SWPointer p1(s1->as_Mem(), this); - SWPointer p2(s2->as_Mem(), this); + SWPointer p1(s1->as_Mem(), this, NULL, false); + SWPointer p2(s2->as_Mem(), this, NULL, false); if (p1.base() != p2.base() || !p1.comparable(p2)) return false; int diff = p2.offset_in_bytes() - p1.offset_in_bytes(); return diff == data_size(s1); @@ -1497,13 +1500,13 @@ if (n->is_Load()) { Node* ctl = n->in(MemNode::Control); Node* mem = first->in(MemNode::Memory); - SWPointer p1(n->as_Mem(), this); + SWPointer p1(n->as_Mem(), this, NULL, false); // Identify the memory dependency for the new loadVector node by // walking up through memory chain. // This is done to give flexibility to the new loadVector node so that // it can move above independent storeVector nodes. while (mem->is_StoreVector()) { - SWPointer p2(mem->as_Mem(), this); + SWPointer p2(mem->as_Mem(), this, NULL, false); int cmp = p1.cmp(p2); if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) { mem = mem->in(MemNode::Memory); @@ -2020,7 +2023,7 @@ //------------------------------memory_alignment--------------------------- // Alignment within a vector memory reference int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { - SWPointer p(s, this); + SWPointer p(s, this, NULL, false); if (!p.valid()) { return bottom_align; } @@ -2184,7 +2187,7 @@ Node *orig_limit = pre_opaq->original_loop_limit(); assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); - SWPointer align_to_ref_p(align_to_ref, this); + SWPointer align_to_ref_p(align_to_ref, this, NULL, false); assert(align_to_ref_p.valid(), "sanity"); // Given: @@ -2355,6 +2358,7 @@ _lp = NULL; _bb = NULL; _iv = NULL; + _early_return = false; } //------------------------------print_packset--------------------------- @@ -2411,9 +2415,11 @@ //==============================SWPointer=========================== //----------------------------SWPointer------------------------ -SWPointer::SWPointer(MemNode* mem, SuperWord* slp) : +SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) : _mem(mem), _slp(slp), _base(NULL), _adr(NULL), - _scale(0), _offset(0), _invar(NULL), _negate_invar(false) { + _scale(0), _offset(0), _invar(NULL), _negate_invar(false), + _nstack(nstack), _analyze_only(analyze_only), + _stack_idx(0) { Node* adr = mem->in(MemNode::Address); if (!adr->is_AddP()) { @@ -2446,7 +2452,9 @@ // the pattern match of an address expression. SWPointer::SWPointer(SWPointer* p) : _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), - _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {} + _scale(0), _offset(0), _invar(NULL), _negate_invar(false), + _nstack(p->_nstack), _analyze_only(p->_analyze_only), + _stack_idx(p->_stack_idx) {} //------------------------scaled_iv_plus_offset-------------------- // Match: k*iv + offset @@ -2489,6 +2497,9 @@ _scale = 1; return true; } + if (_analyze_only && (invariant(n) == false)) { + _nstack->push(n, _stack_idx++); + } int opc = n->Opcode(); if (opc == Op_MulI) { if (n->in(1) == iv() && n->in(2)->is_Con()) { @@ -2546,6 +2557,9 @@ return false; } if (_invar != NULL) return false; // already have an invariant + if (_analyze_only && (invariant(n) == false)) { + _nstack->push(n, _stack_idx++); + } if (opc == Op_AddI) { if (n->in(2)->is_Con() && invariant(n->in(1))) { _negate_invar = negate; --- old/src/share/vm/opto/superword.hpp 2015-05-01 18:07:11.616650500 -0700 +++ new/src/share/vm/opto/superword.hpp 2015-05-01 18:07:11.432650500 -0700 @@ -237,12 +237,13 @@ public: SuperWord(PhaseIdealLoop* phase); - void transform_loop(IdealLoopTree* lpt); + void transform_loop(IdealLoopTree* lpt, bool do_optimization); // Accessors for SWPointer PhaseIdealLoop* phase() { return _phase; } IdealLoopTree* lpt() { return _lpt; } PhiNode* iv() { return _iv; } + bool early_return() { return _early_return; } private: IdealLoopTree* _lpt; // Current loop tree node @@ -250,6 +251,7 @@ Node* _bb; // Current basic block PhiNode* _iv; // Induction var bool _race_possible; // In cases where SDMU is true + bool _early_return; // True if we do not initialize // Accessors Arena* arena() { return _arena; } @@ -434,15 +436,18 @@ // Information about an address for dependence checking and vector alignment class SWPointer VALUE_OBJ_CLASS_SPEC { protected: - MemNode* _mem; // My memory reference node - SuperWord* _slp; // SuperWord class + MemNode* _mem; // My memory reference node + SuperWord* _slp; // SuperWord class - Node* _base; // NULL if unsafe nonheap reference - Node* _adr; // address pointer - jint _scale; // multipler for iv (in bytes), 0 if no loop iv - jint _offset; // constant offset (in bytes) - Node* _invar; // invariant offset (in bytes), NULL if none - bool _negate_invar; // if true then use: (0 - _invar) + Node* _base; // NULL if unsafe nonheap reference + Node* _adr; // address pointer + jint _scale; // multipler for iv (in bytes), 0 if no loop iv + jint _offset; // constant offset (in bytes) + Node* _invar; // invariant offset (in bytes), NULL if none + bool _negate_invar; // if true then use: (0 - _invar) + Node_Stack* _nstack; // stack used to record a swpointer trace of variants + bool _analyze_only; // Used in loop unrolling only for swpointer trace + uint _stack_idx; // Used in loop unrolling only for swpointer trace PhaseIdealLoop* phase() { return _slp->phase(); } IdealLoopTree* lpt() { return _slp->lpt(); } @@ -469,7 +474,7 @@ NotComparable = (Less | Greater | Equal) }; - SWPointer(MemNode* mem, SuperWord* slp); + SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only); // Following is used to create a temporary object during // the pattern match of an address expression. SWPointer(SWPointer* p); @@ -477,14 +482,15 @@ bool valid() { return _adr != NULL; } bool has_iv() { return _scale != 0; } - Node* base() { return _base; } - Node* adr() { return _adr; } - MemNode* mem() { return _mem; } - int scale_in_bytes() { return _scale; } - Node* invar() { return _invar; } - bool negate_invar() { return _negate_invar; } - int offset_in_bytes() { return _offset; } - int memory_size() { return _mem->memory_size(); } + Node* base() { return _base; } + Node* adr() { return _adr; } + MemNode* mem() { return _mem; } + int scale_in_bytes() { return _scale; } + Node* invar() { return _invar; } + bool negate_invar() { return _negate_invar; } + int offset_in_bytes() { return _offset; } + int memory_size() { return _mem->memory_size(); } + Node_Stack* node_stack() { return _nstack; } // Comparable? int cmp(SWPointer& q) {