181 }
182
183 //------------------------------early unrolling analysis------------------------------
184 void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
185 bool is_slp = true;
186 ResourceMark rm;
187 size_t ignored_size = lpt()->_body.size();
188 int *ignored_loop_nodes = NEW_RESOURCE_ARRAY(int, ignored_size);
189 Node_Stack nstack((int)ignored_size);
190 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
191 Node *cl_exit = cl->loopexit();
192 int rpo_idx = _post_block.length();
193
194 assert(rpo_idx == 0, "post loop block is empty");
195
196 // First clear the entries
197 for (uint i = 0; i < lpt()->_body.size(); i++) {
198 ignored_loop_nodes[i] = -1;
199 }
200
201 int max_vector = Matcher::max_vector_size(T_INT);
202 bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
203
204 // Process the loop, some/all of the stack entries will not be in order, ergo
205 // need to preprocess the ignored initial state before we process the loop
206 for (uint i = 0; i < lpt()->_body.size(); i++) {
207 Node* n = lpt()->_body.at(i);
208 if (n == cl->incr() ||
209 n->is_reduction() ||
210 n->is_AddP() ||
211 n->is_Cmp() ||
212 n->is_IfTrue() ||
213 n->is_CountedLoop() ||
214 (n == cl_exit)) {
215 ignored_loop_nodes[i] = n->_idx;
216 continue;
217 }
218
219 if (n->is_If()) {
220 IfNode *iff = n->as_If();
221 if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) {
281 // Process the pointer stack
282 while (have_side_effects) {
283 Node* pointer_node = nstack.node();
284 for (uint j = 0; j < lpt()->_body.size(); j++) {
285 Node* cur_node = lpt()->_body.at(j);
286 if (cur_node == pointer_node) {
287 ignored_loop_nodes[j] = cur_node->_idx;
288 break;
289 }
290 }
291 nstack.pop();
292 have_side_effects = nstack.is_nonempty();
293 }
294 }
295 }
296 }
297
298 if (is_slp) {
299 // Now we try to find the maximum supported consistent vector which the machine
300 // description can use
301 bool small_basic_type = false;
302 for (uint i = 0; i < lpt()->_body.size(); i++) {
303 if (ignored_loop_nodes[i] != -1) continue;
304
305 BasicType bt;
306 Node* n = lpt()->_body.at(i);
307 if (n->is_Mem()) {
308 bt = n->as_Mem()->memory_type();
309 } else {
310 bt = n->bottom_type()->basic_type();
311 }
312
313 if (post_loop_allowed) {
314 if (!small_basic_type) {
315 switch (bt) {
316 case T_CHAR:
317 case T_BYTE:
318 case T_SHORT:
319 small_basic_type = true;
320 break;
321
322 case T_LONG:
323 // TODO: Remove when support completed for mask context with LONG.
324 // Support needs to be augmented for logical qword operations, currently we map to dword
325 // buckets for vectors on logicals as these were legacy.
326 small_basic_type = true;
327 break;
328 }
329 }
330 }
331
332 if (is_java_primitive(bt) == false) continue;
333
334 int cur_max_vector = Matcher::max_vector_size(bt);
335
336 // If a max vector exists which is not larger than _local_loop_unroll_factor
337 // stop looking, we already have the max vector to map to.
338 if (cur_max_vector < local_loop_unroll_factor) {
339 is_slp = false;
340 if (TraceSuperWordLoopUnrollAnalysis) {
341 tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
342 }
343 break;
344 }
345
346 // Map the maximal common vector
347 if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) {
348 if (cur_max_vector < max_vector) {
349 max_vector = cur_max_vector;
350 }
351
352 // We only process post loops on predicated targets where we want to
353 // mask map the loop to a single iteration
354 if (post_loop_allowed) {
355 _post_block.at_put_grow(rpo_idx++, n);
356 }
357 }
358 }
359 if (is_slp) {
360 local_loop_unroll_factor = max_vector;
361 cl->mark_passed_slp();
362 }
363 cl->mark_was_slp();
364 if (cl->is_main_loop()) {
365 cl->set_slp_max_unroll(local_loop_unroll_factor);
366 } else if (post_loop_allowed) {
367 if (!small_basic_type) {
368 // avoid replication context for small basic types in programmable masked loops
369 cl->set_slp_max_unroll(local_loop_unroll_factor);
370 }
371 }
372 }
373 }
374
375 //------------------------------SLP_extract---------------------------
376 // Extract the superword level parallelism
|
181 }
182
183 //------------------------------early unrolling analysis------------------------------
184 void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
185 bool is_slp = true;
186 ResourceMark rm;
187 size_t ignored_size = lpt()->_body.size();
188 int *ignored_loop_nodes = NEW_RESOURCE_ARRAY(int, ignored_size);
189 Node_Stack nstack((int)ignored_size);
190 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
191 Node *cl_exit = cl->loopexit();
192 int rpo_idx = _post_block.length();
193
194 assert(rpo_idx == 0, "post loop block is empty");
195
196 // First clear the entries
197 for (uint i = 0; i < lpt()->_body.size(); i++) {
198 ignored_loop_nodes[i] = -1;
199 }
200
201 int max_vector = Matcher::max_vector_size(T_BYTE);
202 bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
203
204 // Process the loop, some/all of the stack entries will not be in order, ergo
205 // need to preprocess the ignored initial state before we process the loop
206 for (uint i = 0; i < lpt()->_body.size(); i++) {
207 Node* n = lpt()->_body.at(i);
208 if (n == cl->incr() ||
209 n->is_reduction() ||
210 n->is_AddP() ||
211 n->is_Cmp() ||
212 n->is_IfTrue() ||
213 n->is_CountedLoop() ||
214 (n == cl_exit)) {
215 ignored_loop_nodes[i] = n->_idx;
216 continue;
217 }
218
219 if (n->is_If()) {
220 IfNode *iff = n->as_If();
221 if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) {
281 // Process the pointer stack
282 while (have_side_effects) {
283 Node* pointer_node = nstack.node();
284 for (uint j = 0; j < lpt()->_body.size(); j++) {
285 Node* cur_node = lpt()->_body.at(j);
286 if (cur_node == pointer_node) {
287 ignored_loop_nodes[j] = cur_node->_idx;
288 break;
289 }
290 }
291 nstack.pop();
292 have_side_effects = nstack.is_nonempty();
293 }
294 }
295 }
296 }
297
298 if (is_slp) {
299 // Now we try to find the maximum supported consistent vector which the machine
300 // description can use
301 int implemented[T_LONG+1];
302 for (uint i = 0; i <= T_LONG; i++) {
303 implemented[i] = -1;
304 }
305 bool small_basic_type = false;
306 for (uint i = 0; i < lpt()->_body.size(); i++) {
307 if (ignored_loop_nodes[i] != -1) continue;
308
309 BasicType bt;
310 Node* n = lpt()->_body.at(i);
311 if (n->is_Mem()) {
312 bt = n->as_Mem()->memory_type();
313 } else {
314 bt = n->bottom_type()->basic_type();
315 }
316
317 if (post_loop_allowed) {
318 if (!small_basic_type) {
319 switch (bt) {
320 case T_CHAR:
321 case T_BYTE:
322 case T_SHORT:
323 small_basic_type = true;
324 break;
325
326 case T_LONG:
327 // TODO: Remove when support completed for mask context with LONG.
328 // Support needs to be augmented for logical qword operations, currently we map to dword
329 // buckets for vectors on logicals as these were legacy.
330 small_basic_type = true;
331 break;
332 }
333 }
334 }
335
336 if (is_java_primitive(bt) == false) continue;
337
338 int cur_max_vector = Matcher::max_vector_size(bt);
339 bool impl = VectorNode::implemented(n->Opcode(), cur_max_vector, bt);
340
341 if (impl) {
342 // We only process post loops on predicated targets where we want to
343 // mask map the loop to a single iteration
344 if (post_loop_allowed) {
345 _post_block.at_put_grow(rpo_idx++, n);
346 }
347 }
348
349 assert(bt <= T_LONG, "bad basic type");
350 if (implemented[bt] == 0) {
351 continue;
352 }
353
354 implemented[bt] = (int)impl;
355 }
356
357 is_slp = false;
358 for (uint i = T_BOOLEAN; i <= T_LONG; i++) {
359 BasicType bt = (BasicType)i;
360 if (implemented[bt] == -1 || implemented[bt] == 0) {
361 continue;
362 }
363
364 int cur_max_vector = Matcher::max_vector_size(bt);
365
366 // If a max vector exists which is not larger than _local_loop_unroll_factor
367 // stop looking, we already have the max vector to map to.
368 if (cur_max_vector < local_loop_unroll_factor) {
369 is_slp = false;
370 if (TraceSuperWordLoopUnrollAnalysis) {
371 tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
372 }
373 break;
374 }
375
376 is_slp = true;
377
378 if (cur_max_vector < max_vector) {
379 max_vector = cur_max_vector;
380 }
381 }
382 if (is_slp) {
383 local_loop_unroll_factor = max_vector;
384 cl->mark_passed_slp();
385 }
386 cl->mark_was_slp();
387 if (cl->is_main_loop()) {
388 cl->set_slp_max_unroll(local_loop_unroll_factor);
389 } else if (post_loop_allowed) {
390 if (!small_basic_type) {
391 // avoid replication context for small basic types in programmable masked loops
392 cl->set_slp_max_unroll(local_loop_unroll_factor);
393 }
394 }
395 }
396 }
397
398 //------------------------------SLP_extract---------------------------
399 // Extract the superword level parallelism
|