src/hotspot/share/opto/superword.cpp
Index Unified diffs Context diffs Sdiffs Frames Patch New Old Previous File Next File open Sdiff src/hotspot/share/opto

src/hotspot/share/opto/superword.cpp

Print this page




2151   Node_Stack stack(_arena, _phase->C->unique() >> 2);
2152   Node_List rpo_list;
2153   VectorSet visited(_arena);
2154   visited.set(lpt()->_head->_idx);
2155   _phase->rpo(lpt()->_head, stack, visited, rpo_list);
2156   _phase->dump(lpt(), rpo_list.size(), rpo_list );
2157   if(whole) {
2158     tty->print_cr("\n Whole loop tree");
2159     _phase->dump();
2160     tty->print_cr(" End of whole loop tree\n");
2161   }
2162 }
2163 #endif
2164 
2165 //------------------------------output---------------------------
2166 // Convert packs into vector node operations
2167 void SuperWord::output() {
2168   CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
2169   Compile* C = _phase->C;
2170   if (_packset.length() == 0) {

2171     // Instigate more unrolling for optimization when vectorization fails.
2172     C->set_major_progress();
2173     cl->set_notpassed_slp();
2174     cl->mark_do_unroll_only();

2175     return;
2176   }
2177 
2178 #ifndef PRODUCT
2179   if (TraceLoopOpts) {
2180     tty->print("SuperWord::output    ");
2181     lpt()->dump_head();
2182   }
2183 #endif
2184 
2185   if (cl->is_main_loop()) {
2186     // MUST ENSURE main loop's initial value is properly aligned:
2187     //  (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
2188 
2189     align_initial_loop_index(align_to_ref());
2190 
2191     // Insert extract (unpack) operations for scalar uses
2192     for (int i = 0; i < _packset.length(); i++) {
2193       insert_extracts(_packset.at(i));
2194     }


2400         // other vector size have reduced values for predicated data mapping.
2401         if (vlen_in_bytes != (uint)MaxVectorSize) {
2402           return;
2403         }
2404       }
2405 
2406       if (vlen_in_bytes >= max_vlen_in_bytes && vlen > max_vlen) {
2407         max_vlen = vlen;
2408         max_vlen_in_bytes = vlen_in_bytes;
2409       }
2410 #ifdef ASSERT
2411       if (TraceNewVectors) {
2412         tty->print("new Vector node: ");
2413         vn->dump();
2414       }
2415 #endif
2416     }
2417   }//for (int i = 0; i < _block.length(); i++)
2418 
2419   C->set_max_vector_size(max_vlen_in_bytes);



2420 
2421   if (SuperWordLoopUnrollAnalysis) {
2422     if (cl->has_passed_slp()) {
2423       uint slp_max_unroll_factor = cl->slp_max_unroll();
2424       if (slp_max_unroll_factor == max_vlen) {
2425         if (TraceSuperWordLoopUnrollAnalysis) {
2426           tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte);
2427         }
2428 
2429         // For atomic unrolled loops which are vector mapped, instigate more unrolling
2430         cl->set_notpassed_slp();
2431         if (cl->is_main_loop()) {
2432           // if vector resources are limited, do not allow additional unrolling, also
2433           // do not unroll more on pure vector loops which were not reduced so that we can
2434           // program the post loop to single iteration execution.
2435           if (FLOATPRESSURE > 8) {
2436             C->set_major_progress();
2437             cl->mark_do_unroll_only();
2438           }
2439         }
2440 
2441         if (do_reserve_copy()) {
2442           cl->mark_loop_vectorized();
2443           if (can_process_post_loop) {
2444             // Now create the difference of trip and limit and use it as our mask index.
2445             // Note: We limited the unroll of the vectorized loop so that
2446             //       only vlen-1 size iterations can remain to be mask programmed.
2447             Node *incr = cl->incr();
2448             SubINode *index = new SubINode(cl->limit(), cl->init_trip());
2449             _igvn.register_new_node_with_optimizer(index);
2450             SetVectMaskINode  *mask = new SetVectMaskINode(_phase->get_ctrl(cl->init_trip()), index);
2451             _igvn.register_new_node_with_optimizer(mask);
2452             // make this a single iteration loop
2453             AddINode *new_incr = new AddINode(incr->in(1), mask);
2454             _igvn.register_new_node_with_optimizer(new_incr);
2455             _phase->set_ctrl(new_incr, _phase->get_ctrl(incr));
2456             _igvn.replace_node(incr, new_incr);
2457             cl->mark_is_multiversioned();
2458             cl->loopexit()->add_flag(Node::Flag_has_vector_mask_set);
2459           }
2460         }
2461       }
2462     }




2151   Node_Stack stack(_arena, _phase->C->unique() >> 2);
2152   Node_List rpo_list;
2153   VectorSet visited(_arena);
2154   visited.set(lpt()->_head->_idx);
2155   _phase->rpo(lpt()->_head, stack, visited, rpo_list);
2156   _phase->dump(lpt(), rpo_list.size(), rpo_list );
2157   if(whole) {
2158     tty->print_cr("\n Whole loop tree");
2159     _phase->dump();
2160     tty->print_cr(" End of whole loop tree\n");
2161   }
2162 }
2163 #endif
2164 
2165 //------------------------------output---------------------------
2166 // Convert packs into vector node operations
2167 void SuperWord::output() {
2168   CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
2169   Compile* C = _phase->C;
2170   if (_packset.length() == 0) {
2171     if (cl->is_main_loop()) {
2172       // Instigate more unrolling for optimization when vectorization fails.
2173       C->set_major_progress();
2174       cl->set_notpassed_slp();
2175       cl->mark_do_unroll_only();
2176     }
2177     return;
2178   }
2179 
2180 #ifndef PRODUCT
2181   if (TraceLoopOpts) {
2182     tty->print("SuperWord::output    ");
2183     lpt()->dump_head();
2184   }
2185 #endif
2186 
2187   if (cl->is_main_loop()) {
2188     // MUST ENSURE main loop's initial value is properly aligned:
2189     //  (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
2190 
2191     align_initial_loop_index(align_to_ref());
2192 
2193     // Insert extract (unpack) operations for scalar uses
2194     for (int i = 0; i < _packset.length(); i++) {
2195       insert_extracts(_packset.at(i));
2196     }


2402         // other vector size have reduced values for predicated data mapping.
2403         if (vlen_in_bytes != (uint)MaxVectorSize) {
2404           return;
2405         }
2406       }
2407 
2408       if (vlen_in_bytes >= max_vlen_in_bytes && vlen > max_vlen) {
2409         max_vlen = vlen;
2410         max_vlen_in_bytes = vlen_in_bytes;
2411       }
2412 #ifdef ASSERT
2413       if (TraceNewVectors) {
2414         tty->print("new Vector node: ");
2415         vn->dump();
2416       }
2417 #endif
2418     }
2419   }//for (int i = 0; i < _block.length(); i++)
2420 
2421   C->set_max_vector_size(max_vlen_in_bytes);
2422   if (max_vlen_in_bytes > 0) {
2423     cl->mark_loop_vectorized();
2424   }
2425 
2426   if (SuperWordLoopUnrollAnalysis) {
2427     if (cl->has_passed_slp()) {
2428       uint slp_max_unroll_factor = cl->slp_max_unroll();
2429       if (slp_max_unroll_factor == max_vlen) {
2430         if (TraceSuperWordLoopUnrollAnalysis) {
2431           tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte);
2432         }
2433 
2434         // For atomic unrolled loops which are vector mapped, instigate more unrolling
2435         cl->set_notpassed_slp();
2436         if (cl->is_main_loop()) {
2437           // if vector resources are limited, do not allow additional unrolling, also
2438           // do not unroll more on pure vector loops which were not reduced so that we can
2439           // program the post loop to single iteration execution.
2440           if (FLOATPRESSURE > 8) {
2441             C->set_major_progress();
2442             cl->mark_do_unroll_only();
2443           }
2444         }
2445 
2446         if (do_reserve_copy()) {

2447           if (can_process_post_loop) {
2448             // Now create the difference of trip and limit and use it as our mask index.
2449             // Note: We limited the unroll of the vectorized loop so that
2450             //       only vlen-1 size iterations can remain to be mask programmed.
2451             Node *incr = cl->incr();
2452             SubINode *index = new SubINode(cl->limit(), cl->init_trip());
2453             _igvn.register_new_node_with_optimizer(index);
2454             SetVectMaskINode  *mask = new SetVectMaskINode(_phase->get_ctrl(cl->init_trip()), index);
2455             _igvn.register_new_node_with_optimizer(mask);
2456             // make this a single iteration loop
2457             AddINode *new_incr = new AddINode(incr->in(1), mask);
2458             _igvn.register_new_node_with_optimizer(new_incr);
2459             _phase->set_ctrl(new_incr, _phase->get_ctrl(incr));
2460             _igvn.replace_node(incr, new_incr);
2461             cl->mark_is_multiversioned();
2462             cl->loopexit()->add_flag(Node::Flag_has_vector_mask_set);
2463           }
2464         }
2465       }
2466     }


src/hotspot/share/opto/superword.cpp
Index Unified diffs Context diffs Sdiffs Frames Patch New Old Previous File Next File