hotspot Sdiff src/share/vm/opto

src/share/vm/opto/superword.cpp

rev 10955 : undo
rev 10956 : 8154939: 8153998 broke vectorization on aarch64
Summary: code assumes SuperWordLoopUnrollAnalysis on which is not true on aarch64
Reviewed-by:
rev 10962 : vectorization fix
rev 10963 : vectorization fix exp

 181 }
 182 
 183 //------------------------------early unrolling analysis------------------------------
 184 void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
 185   bool is_slp = true;
 186   ResourceMark rm;
 187   size_t ignored_size = lpt()->_body.size();
 188   int *ignored_loop_nodes = NEW_RESOURCE_ARRAY(int, ignored_size);
 189   Node_Stack nstack((int)ignored_size);
 190   CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
 191   Node *cl_exit = cl->loopexit();
 192   int rpo_idx = _post_block.length();
 193 
 194   assert(rpo_idx == 0, "post loop block is empty");
 195 
 196   // First clear the entries
 197   for (uint i = 0; i < lpt()->_body.size(); i++) {
 198     ignored_loop_nodes[i] = -1;
 199   }
 200 
 201   int max_vector = Matcher::max_vector_size(T_INT);
 202   bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
 203 
 204   // Process the loop, some/all of the stack entries will not be in order, ergo
 205   // need to preprocess the ignored initial state before we process the loop
 206   for (uint i = 0; i < lpt()->_body.size(); i++) {
 207     Node* n = lpt()->_body.at(i);
 208     if (n == cl->incr() ||
 209       n->is_reduction() ||
 210       n->is_AddP() ||
 211       n->is_Cmp() ||
 212       n->is_IfTrue() ||
 213       n->is_CountedLoop() ||
 214       (n == cl_exit)) {
 215       ignored_loop_nodes[i] = n->_idx;
 216       continue;
 217     }
 218 
 219     if (n->is_If()) {
 220       IfNode *iff = n->as_If();
 221       if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) {

 281         // Process the pointer stack
 282         while (have_side_effects) {
 283           Node* pointer_node = nstack.node();
 284           for (uint j = 0; j < lpt()->_body.size(); j++) {
 285             Node* cur_node = lpt()->_body.at(j);
 286             if (cur_node == pointer_node) {
 287               ignored_loop_nodes[j] = cur_node->_idx;
 288               break;
 289             }
 290           }
 291           nstack.pop();
 292           have_side_effects = nstack.is_nonempty();
 293         }
 294       }
 295     }
 296   }
 297 
 298   if (is_slp) {
 299     // Now we try to find the maximum supported consistent vector which the machine
 300     // description can use




 301     bool small_basic_type = false;
 302     for (uint i = 0; i < lpt()->_body.size(); i++) {
 303       if (ignored_loop_nodes[i] != -1) continue;
 304 
 305       BasicType bt;
 306       Node* n = lpt()->_body.at(i);
 307       if (n->is_Mem()) {
 308         bt = n->as_Mem()->memory_type();
 309       } else {
 310         bt = n->bottom_type()->basic_type();
 311       }
 312 
 313       if (post_loop_allowed) {
 314         if (!small_basic_type) {
 315           switch (bt) {
 316           case T_CHAR:
 317           case T_BYTE:
 318           case T_SHORT:
 319             small_basic_type = true;
 320             break;
 321 
 322           case T_LONG:
 323             // TODO: Remove when support completed for mask context with LONG.
 324             //       Support needs to be augmented for logical qword operations, currently we map to dword
 325             //       buckets for vectors on logicals as these were legacy.
 326             small_basic_type = true;
 327             break;
 328           }
 329         }
 330       }
 331 
 332       if (is_java_primitive(bt) == false) continue;
 333 
 334       int cur_max_vector = Matcher::max_vector_size(bt);


























 335 
 336       // If a max vector exists which is not larger than _local_loop_unroll_factor
 337       // stop looking, we already have the max vector to map to.
 338       if (cur_max_vector < local_loop_unroll_factor) {
 339         is_slp = false;
 340         if (TraceSuperWordLoopUnrollAnalysis) {
 341           tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
 342         }
 343         break;
 344       }
 345 
 346       // Map the maximal common vector
 347       if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) {
 348         if (cur_max_vector < max_vector) {
 349           max_vector = cur_max_vector;
 350         }
 351 
 352         // We only process post loops on predicated targets where we want to
 353         // mask map the loop to a single iteration
 354         if (post_loop_allowed) {
 355           _post_block.at_put_grow(rpo_idx++, n);
 356         }
 357       }
 358     }
 359     if (is_slp) {
 360       local_loop_unroll_factor = max_vector;
 361       cl->mark_passed_slp();
 362     }
 363     cl->mark_was_slp();
 364     if (cl->is_main_loop()) {
 365       cl->set_slp_max_unroll(local_loop_unroll_factor);
 366     } else if (post_loop_allowed) {
 367       if (!small_basic_type) {
 368         // avoid replication context for small basic types in programmable masked loops
 369         cl->set_slp_max_unroll(local_loop_unroll_factor);
 370       }
 371     }
 372   }
 373 }
 374 
 375 //------------------------------SLP_extract---------------------------
 376 // Extract the superword level parallelism

 181 }
 182 
 183 //------------------------------early unrolling analysis------------------------------
 184 void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
 185   bool is_slp = true;
 186   ResourceMark rm;
 187   size_t ignored_size = lpt()->_body.size();
 188   int *ignored_loop_nodes = NEW_RESOURCE_ARRAY(int, ignored_size);
 189   Node_Stack nstack((int)ignored_size);
 190   CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
 191   Node *cl_exit = cl->loopexit();
 192   int rpo_idx = _post_block.length();
 193 
 194   assert(rpo_idx == 0, "post loop block is empty");
 195 
 196   // First clear the entries
 197   for (uint i = 0; i < lpt()->_body.size(); i++) {
 198     ignored_loop_nodes[i] = -1;
 199   }
 200 
 201   int max_vector = Matcher::max_vector_size(T_BYTE);
 202   bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
 203 
 204   // Process the loop, some/all of the stack entries will not be in order, ergo
 205   // need to preprocess the ignored initial state before we process the loop
 206   for (uint i = 0; i < lpt()->_body.size(); i++) {
 207     Node* n = lpt()->_body.at(i);
 208     if (n == cl->incr() ||
 209       n->is_reduction() ||
 210       n->is_AddP() ||
 211       n->is_Cmp() ||
 212       n->is_IfTrue() ||
 213       n->is_CountedLoop() ||
 214       (n == cl_exit)) {
 215       ignored_loop_nodes[i] = n->_idx;
 216       continue;
 217     }
 218 
 219     if (n->is_If()) {
 220       IfNode *iff = n->as_If();
 221       if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) {

 281         // Process the pointer stack
 282         while (have_side_effects) {
 283           Node* pointer_node = nstack.node();
 284           for (uint j = 0; j < lpt()->_body.size(); j++) {
 285             Node* cur_node = lpt()->_body.at(j);
 286             if (cur_node == pointer_node) {
 287               ignored_loop_nodes[j] = cur_node->_idx;
 288               break;
 289             }
 290           }
 291           nstack.pop();
 292           have_side_effects = nstack.is_nonempty();
 293         }
 294       }
 295     }
 296   }
 297 
 298   if (is_slp) {
 299     // Now we try to find the maximum supported consistent vector which the machine
 300     // description can use
 301     int implemented[T_LONG+1];
 302     for (uint i = 0; i <= T_LONG; i++) {
 303       implemented[i] = -1;
 304     }
 305     bool small_basic_type = false;
 306     for (uint i = 0; i < lpt()->_body.size(); i++) {
 307       if (ignored_loop_nodes[i] != -1) continue;
 308 
 309       BasicType bt;
 310       Node* n = lpt()->_body.at(i);
 311       if (n->is_Mem()) {
 312         bt = n->as_Mem()->memory_type();
 313       } else {
 314         bt = n->bottom_type()->basic_type();
 315       }
 316 
 317       if (post_loop_allowed) {
 318         if (!small_basic_type) {
 319           switch (bt) {
 320           case T_CHAR:
 321           case T_BYTE:
 322           case T_SHORT:
 323             small_basic_type = true;
 324             break;
 325 
 326           case T_LONG:
 327             // TODO: Remove when support completed for mask context with LONG.
 328             //       Support needs to be augmented for logical qword operations, currently we map to dword
 329             //       buckets for vectors on logicals as these were legacy.
 330             small_basic_type = true;
 331             break;
 332           }
 333         }
 334       }
 335 
 336       if (is_java_primitive(bt) == false) continue;
 337 
 338       int cur_max_vector = Matcher::max_vector_size(bt);
 339       bool impl = VectorNode::implemented(n->Opcode(), cur_max_vector, bt);
 340 
 341       if (impl) {
 342         // We only process post loops on predicated targets where we want to
 343         // mask map the loop to a single iteration
 344         if (post_loop_allowed) {
 345           _post_block.at_put_grow(rpo_idx++, n);
 346         }
 347       }
 348 
 349       assert(bt <= T_LONG, "bad basic type");
 350       if (implemented[bt] == 0) {
 351         continue;
 352       }
 353 
 354       implemented[bt] = (int)impl;
 355     }
 356 
 357     is_slp = false;
 358     for (uint i = T_BOOLEAN; i <= T_LONG; i++) {
 359       BasicType bt = (BasicType)i;
 360       if (implemented[bt] == -1 || implemented[bt] == 0) {
 361         continue;
 362       }
 363 
 364       int cur_max_vector = Matcher::max_vector_size(bt);
 365 
 366       // If a max vector exists which is not larger than _local_loop_unroll_factor
 367       // stop looking, we already have the max vector to map to.
 368       if (cur_max_vector < local_loop_unroll_factor) {
 369         is_slp = false;
 370         if (TraceSuperWordLoopUnrollAnalysis) {
 371           tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
 372         }
 373         break;
 374       }
 375 
 376       is_slp = true;
 377 
 378       if (cur_max_vector < max_vector) {
 379         max_vector = cur_max_vector;







 380       }
 381     }
 382     if (is_slp) {
 383       local_loop_unroll_factor = max_vector;
 384       cl->mark_passed_slp();
 385     }
 386     cl->mark_was_slp();
 387     if (cl->is_main_loop()) {
 388       cl->set_slp_max_unroll(local_loop_unroll_factor);
 389     } else if (post_loop_allowed) {
 390       if (!small_basic_type) {
 391         // avoid replication context for small basic types in programmable masked loops
 392         cl->set_slp_max_unroll(local_loop_unroll_factor);
 393       }
 394     }
 395   }
 396 }
 397 
 398 //------------------------------SLP_extract---------------------------
 399 // Extract the superword level parallelism

< prev index next >