186
187
188 // ========================= SuperWord =====================
189
190 // -----------------------------SWNodeInfo---------------------------------
191 // Per node info needed by SuperWord
192 class SWNodeInfo VALUE_OBJ_CLASS_SPEC {
193 public:
194 int _alignment; // memory alignment for a node
195 int _depth; // Max expression (DAG) depth from block start
196 const Type* _velt_type; // vector element type
197 Node_List* _my_pack; // pack containing this node
198
199 SWNodeInfo() : _alignment(-1), _depth(0), _velt_type(NULL), _my_pack(NULL) {}
200 static const SWNodeInfo initial;
201 };
202
203 // -----------------------------SuperWord---------------------------------
204 // Transforms scalar operations into packed (superword) operations.
205 class SuperWord : public ResourceObj {
206 private:
207 PhaseIdealLoop* _phase;
208 Arena* _arena;
209 PhaseIterGVN &_igvn;
210
211 enum consts { top_align = -1, bottom_align = -666 };
212
213 GrowableArray<Node_List*> _packset; // Packs for the current block
214
215 GrowableArray<int> _bb_idx; // Map from Node _idx to index within block
216
217 GrowableArray<Node*> _block; // Nodes in current block
218 GrowableArray<Node*> _data_entry; // Nodes with all inputs from outside
219 GrowableArray<Node*> _mem_slice_head; // Memory slice head nodes
220 GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes
221 GrowableArray<Node*> _iteration_first; // nodes in the generation that has deps from phi
222 GrowableArray<Node*> _iteration_last; // nodes in the generation that has deps to phi
223 GrowableArray<SWNodeInfo> _node_info; // Info needed per node
224 CloneMap& _clone_map; // map of nodes created in cloning
225
228 GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs
229
230 DepGraph _dg; // Dependence graph
231
232 // Scratch pads
233 VectorSet _visited; // Visited set
234 VectorSet _post_visited; // Post-visited set
235 Node_Stack _n_idx_list; // List of (node,index) pairs
236 GrowableArray<Node*> _nlist; // List of nodes
237 GrowableArray<Node*> _stk; // Stack of nodes
238
239 public:
240 SuperWord(PhaseIdealLoop* phase);
241
242 void transform_loop(IdealLoopTree* lpt);
243
244 // Accessors for SWPointer
245 PhaseIdealLoop* phase() { return _phase; }
246 IdealLoopTree* lpt() { return _lpt; }
247 PhiNode* iv() { return _iv; }
248
249 private:
250 IdealLoopTree* _lpt; // Current loop tree node
251 LoopNode* _lp; // Current LoopNode
252 Node* _bb; // Current basic block
253 PhiNode* _iv; // Induction var
254 bool _race_possible; // In cases where SDMU is true
255 bool _do_vector_loop; // whether to do vectorization/simd style
256 bool _vector_loop_debug; // provide more printing in debug mode
257 int _num_work_vecs; // Number of non memory vector operations
258 int _num_reductions; // Number of reduction expressions applied
259 int _ii_first; // generation with direct deps from mem phi
260 int _ii_last; // generation with direct deps to mem phi
261 GrowableArray<int> _ii_order;
262
263 // Accessors
264 Arena* arena() { return _arena; }
265
266 Node* bb() { return _bb; }
267 void set_bb(Node* bb) { _bb = bb; }
268
269 void set_lpt(IdealLoopTree* lpt) { _lpt = lpt; }
270
271 LoopNode* lp() { return _lp; }
272 void set_lp(LoopNode* lp) { _lp = lp;
273 _iv = lp->as_CountedLoop()->phi()->as_Phi(); }
274 int iv_stride() { return lp()->as_CountedLoop()->stride_con(); }
275
276 int vector_width(Node* n) {
277 BasicType bt = velt_basic_type(n);
278 return MIN2(ABS(iv_stride()), Matcher::max_vector_size(bt));
279 }
280 int vector_width_in_bytes(Node* n) {
281 BasicType bt = velt_basic_type(n);
304 void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); }
305
306 // memory alignment for a node
307 int alignment(Node* n) { return _node_info.adr_at(bb_idx(n))->_alignment; }
308 void set_alignment(Node* n, int a) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; }
309
310 // Max expression (DAG) depth from beginning of the block for each node
311 int depth(Node* n) { return _node_info.adr_at(bb_idx(n))->_depth; }
312 void set_depth(Node* n, int d) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_depth = d; }
313
314 // vector element type
315 const Type* velt_type(Node* n) { return _node_info.adr_at(bb_idx(n))->_velt_type; }
316 BasicType velt_basic_type(Node* n) { return velt_type(n)->array_element_basic_type(); }
317 void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; }
318 bool same_velt_type(Node* n1, Node* n2);
319
320 // my_pack
321 Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
322 void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
323
324 // methods
325
326 // Extract the superword level parallelism
327 void SLP_extract();
328 // Find the adjacent memory references and create pack pairs for them.
329 void find_adjacent_refs();
330 // Find a memory reference to align the loop induction variable to.
331 MemNode* find_align_to_ref(Node_List &memops);
332 // Calculate loop's iv adjustment for this memory ops.
333 int get_iv_adjustment(MemNode* mem);
334 // Can the preloop align the reference to position zero in the vector?
335 bool ref_is_alignable(SWPointer& p);
336 // rebuild the graph so all loads in different iterations of cloned loop become dependant on phi node (in _do_vector_loop only)
337 bool hoist_loads_in_graph();
338 // Test whether MemNode::Memory dependency to the same load but in the first iteration of this loop is coming from memory phi
339 // Return false if failed.
340 Node* find_phi_for_mem_dep(LoadNode* ld);
341 // Return same node but from the first generation. Return 0, if not found
342 Node* first_node(Node* nd);
343 // Return same node as this but from the last generation. Return 0, if not found
344 Node* last_node(Node* n);
345 // Mark nodes belonging to first and last generation,
346 // returns first generation index or -1 if vectorization/simd is impossible
347 int mark_generations();
348 // swapping inputs of commutative instruction (Add or Mul)
349 bool fix_commutative_inputs(Node* gold, Node* fix);
350 // make packs forcefully (in _do_vector_loop only)
351 bool pack_parallel();
352 // Construct dependency graph.
353 void dependence_graph();
354 // Return a memory slice (node list) in predecessor order starting at "start"
355 void mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds);
356 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and s1 aligned at "align"
357 bool stmts_can_pack(Node* s1, Node* s2, int align);
358 // Does s exist in a pack at position pos?
359 bool exists_at(Node* s, uint pos);
360 // Is s1 immediately before s2 in memory?
361 bool are_adjacent_refs(Node* s1, Node* s2);
362 // Are s1 and s2 similar?
363 bool isomorphic(Node* s1, Node* s2);
364 // Is there no data path from s1 to s2 or s2 to s1?
365 bool independent(Node* s1, Node* s2);
454 char* blank(uint depth);
455
456 void packset_sort(int n);
457 };
458
459
460
461 //------------------------------SWPointer---------------------------
462 // Information about an address for dependence checking and vector alignment
463 class SWPointer VALUE_OBJ_CLASS_SPEC {
464 protected:
465 MemNode* _mem; // My memory reference node
466 SuperWord* _slp; // SuperWord class
467
468 Node* _base; // NULL if unsafe nonheap reference
469 Node* _adr; // address pointer
470 jint _scale; // multiplier for iv (in bytes), 0 if no loop iv
471 jint _offset; // constant offset (in bytes)
472 Node* _invar; // invariant offset (in bytes), NULL if none
473 bool _negate_invar; // if true then use: (0 - _invar)
474
475 PhaseIdealLoop* phase() { return _slp->phase(); }
476 IdealLoopTree* lpt() { return _slp->lpt(); }
477 PhiNode* iv() { return _slp->iv(); } // Induction var
478
479 bool invariant(Node* n) {
480 Node *n_c = phase()->get_ctrl(n);
481 return !lpt()->is_member(phase()->get_loop(n_c));
482 }
483
484 // Match: k*iv + offset
485 bool scaled_iv_plus_offset(Node* n);
486 // Match: k*iv where k is a constant that's not zero
487 bool scaled_iv(Node* n);
488 // Match: offset is (k [+/- invariant])
489 bool offset_plus_k(Node* n, bool negate = false);
490
491 public:
492 enum CMP {
493 Less = 1,
494 Greater = 2,
495 Equal = 4,
496 NotEqual = (Less | Greater),
497 NotComparable = (Less | Greater | Equal)
498 };
499
500 SWPointer(MemNode* mem, SuperWord* slp);
501 // Following is used to create a temporary object during
502 // the pattern match of an address expression.
520 (_adr == q._adr || _base == _adr && q._base == q._adr) &&
521 _scale == q._scale &&
522 _invar == q._invar &&
523 _negate_invar == q._negate_invar) {
524 bool overlap = q._offset < _offset + memory_size() &&
525 _offset < q._offset + q.memory_size();
526 return overlap ? Equal : (_offset < q._offset ? Less : Greater);
527 } else {
528 return NotComparable;
529 }
530 }
531
532 bool not_equal(SWPointer& q) { return not_equal(cmp(q)); }
533 bool equal(SWPointer& q) { return equal(cmp(q)); }
534 bool comparable(SWPointer& q) { return comparable(cmp(q)); }
535 static bool not_equal(int cmp) { return cmp <= NotEqual; }
536 static bool equal(int cmp) { return cmp == Equal; }
537 static bool comparable(int cmp) { return cmp < NotComparable; }
538
539 void print();
540 };
541
542
543 //------------------------------OrderedPair---------------------------
544 // Ordered pair of Node*.
545 class OrderedPair VALUE_OBJ_CLASS_SPEC {
546 protected:
547 Node* _p1;
548 Node* _p2;
549 public:
550 OrderedPair() : _p1(NULL), _p2(NULL) {}
551 OrderedPair(Node* p1, Node* p2) {
552 if (p1->_idx < p2->_idx) {
553 _p1 = p1; _p2 = p2;
554 } else {
555 _p1 = p2; _p2 = p1;
556 }
557 }
558
559 bool operator==(const OrderedPair &rhs) {
|
186
187
188 // ========================= SuperWord =====================
189
190 // -----------------------------SWNodeInfo---------------------------------
191 // Per node info needed by SuperWord
192 class SWNodeInfo VALUE_OBJ_CLASS_SPEC {
193 public:
194 int _alignment; // memory alignment for a node
195 int _depth; // Max expression (DAG) depth from block start
196 const Type* _velt_type; // vector element type
197 Node_List* _my_pack; // pack containing this node
198
199 SWNodeInfo() : _alignment(-1), _depth(0), _velt_type(NULL), _my_pack(NULL) {}
200 static const SWNodeInfo initial;
201 };
202
203 // -----------------------------SuperWord---------------------------------
204 // Transforms scalar operations into packed (superword) operations.
205 class SuperWord : public ResourceObj {
206 friend SWPointer;
207 private:
208 PhaseIdealLoop* _phase;
209 Arena* _arena;
210 PhaseIterGVN &_igvn;
211
212 enum consts { top_align = -1, bottom_align = -666 };
213
214 GrowableArray<Node_List*> _packset; // Packs for the current block
215
216 GrowableArray<int> _bb_idx; // Map from Node _idx to index within block
217
218 GrowableArray<Node*> _block; // Nodes in current block
219 GrowableArray<Node*> _data_entry; // Nodes with all inputs from outside
220 GrowableArray<Node*> _mem_slice_head; // Memory slice head nodes
221 GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes
222 GrowableArray<Node*> _iteration_first; // nodes in the generation that has deps from phi
223 GrowableArray<Node*> _iteration_last; // nodes in the generation that has deps to phi
224 GrowableArray<SWNodeInfo> _node_info; // Info needed per node
225 CloneMap& _clone_map; // map of nodes created in cloning
226
229 GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs
230
231 DepGraph _dg; // Dependence graph
232
233 // Scratch pads
234 VectorSet _visited; // Visited set
235 VectorSet _post_visited; // Post-visited set
236 Node_Stack _n_idx_list; // List of (node,index) pairs
237 GrowableArray<Node*> _nlist; // List of nodes
238 GrowableArray<Node*> _stk; // Stack of nodes
239
240 public:
241 SuperWord(PhaseIdealLoop* phase);
242
243 void transform_loop(IdealLoopTree* lpt);
244
245 // Accessors for SWPointer
246 PhaseIdealLoop* phase() { return _phase; }
247 IdealLoopTree* lpt() { return _lpt; }
248 PhiNode* iv() { return _iv; }
249 #ifndef PRODUCT
250 bool is_debug() { return _vector_loop_debug > 0; }
251 bool is_trace_alignment() { return (_vector_loop_debug & 2) > 0; }
252 bool is_trace_mem_slice() { return (_vector_loop_debug & 4) > 0; }
253 bool is_trace_loop() { return (_vector_loop_debug & 8) > 0; }
254 bool is_trace_adjacent() { return (_vector_loop_debug & 16) > 0; }
255 #endif
256 bool do_vector_loop() { return _do_vector_loop; }
257 private:
258 IdealLoopTree* _lpt; // Current loop tree node
259 LoopNode* _lp; // Current LoopNode
260 Node* _bb; // Current basic block
261 PhiNode* _iv; // Induction var
262 bool _race_possible; // In cases where SDMU is true
263 bool _do_vector_loop; // whether to do vectorization/simd style
264 int _num_work_vecs; // Number of non memory vector operations
265 int _num_reductions; // Number of reduction expressions applied
266 int _ii_first; // generation with direct deps from mem phi
267 int _ii_last; // generation with direct deps to mem phi
268 GrowableArray<int> _ii_order;
269 #ifndef PRODUCT
270 uintx _vector_loop_debug; // provide more printing in debug mode
271 #endif
272
273 // Accessors
274 Arena* arena() { return _arena; }
275
276 Node* bb() { return _bb; }
277 void set_bb(Node* bb) { _bb = bb; }
278
279 void set_lpt(IdealLoopTree* lpt) { _lpt = lpt; }
280
281 LoopNode* lp() { return _lp; }
282 void set_lp(LoopNode* lp) { _lp = lp;
283 _iv = lp->as_CountedLoop()->phi()->as_Phi(); }
284 int iv_stride() { return lp()->as_CountedLoop()->stride_con(); }
285
286 int vector_width(Node* n) {
287 BasicType bt = velt_basic_type(n);
288 return MIN2(ABS(iv_stride()), Matcher::max_vector_size(bt));
289 }
290 int vector_width_in_bytes(Node* n) {
291 BasicType bt = velt_basic_type(n);
314 void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); }
315
316 // memory alignment for a node
317 int alignment(Node* n) { return _node_info.adr_at(bb_idx(n))->_alignment; }
318 void set_alignment(Node* n, int a) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; }
319
320 // Max expression (DAG) depth from beginning of the block for each node
321 int depth(Node* n) { return _node_info.adr_at(bb_idx(n))->_depth; }
322 void set_depth(Node* n, int d) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_depth = d; }
323
324 // vector element type
325 const Type* velt_type(Node* n) { return _node_info.adr_at(bb_idx(n))->_velt_type; }
326 BasicType velt_basic_type(Node* n) { return velt_type(n)->array_element_basic_type(); }
327 void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; }
328 bool same_velt_type(Node* n1, Node* n2);
329
330 // my_pack
331 Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
332 void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
333
334 // CloneMap utilities
335 bool same_origin_idx(Node* a, Node* b) const;
336 bool same_generation(Node* a, Node* b) const;
337
338 // methods
339
340 // Extract the superword level parallelism
341 void SLP_extract();
342 // Find the adjacent memory references and create pack pairs for them.
343 void find_adjacent_refs();
344 // Find a memory reference to align the loop induction variable to.
345 MemNode* find_align_to_ref(Node_List &memops);
346 // Calculate loop's iv adjustment for this memory ops.
347 int get_iv_adjustment(MemNode* mem);
348 // Can the preloop align the reference to position zero in the vector?
349 bool ref_is_alignable(SWPointer& p);
350 // rebuild the graph so all loads in different iterations of cloned loop become dependant on phi node (in _do_vector_loop only)
351 bool hoist_loads_in_graph();
352 // Test whether MemNode::Memory dependency to the same load but in the first iteration of this loop is coming from memory phi
353 // Return false if failed
354 Node* find_phi_for_mem_dep(LoadNode* ld);
355 // Return same node but from the first generation. Return 0, if not found
356 Node* first_node(Node* nd);
357 // Return same node as this but from the last generation. Return 0, if not found
358 Node* last_node(Node* n);
359 // Mark nodes belonging to first and last generation
360 // returns first generation index or -1 if vectorization/simd is impossible
361 int mark_generations();
362 // swapping inputs of commutative instruction (Add or Mul)
363 bool fix_commutative_inputs(Node* gold, Node* fix);
364 // make packs forcefully (in _do_vector_loop only)
365 bool pack_parallel();
366 // Construct dependency graph.
367 void dependence_graph();
368 // Return a memory slice (node list) in predecessor order starting at "start"
369 void mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds);
370 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and s1 aligned at "align"
371 bool stmts_can_pack(Node* s1, Node* s2, int align);
372 // Does s exist in a pack at position pos?
373 bool exists_at(Node* s, uint pos);
374 // Is s1 immediately before s2 in memory?
375 bool are_adjacent_refs(Node* s1, Node* s2);
376 // Are s1 and s2 similar?
377 bool isomorphic(Node* s1, Node* s2);
378 // Is there no data path from s1 to s2 or s2 to s1?
379 bool independent(Node* s1, Node* s2);
468 char* blank(uint depth);
469
470 void packset_sort(int n);
471 };
472
473
474
475 //------------------------------SWPointer---------------------------
476 // Information about an address for dependence checking and vector alignment
477 class SWPointer VALUE_OBJ_CLASS_SPEC {
478 protected:
479 MemNode* _mem; // My memory reference node
480 SuperWord* _slp; // SuperWord class
481
482 Node* _base; // NULL if unsafe nonheap reference
483 Node* _adr; // address pointer
484 jint _scale; // multiplier for iv (in bytes), 0 if no loop iv
485 jint _offset; // constant offset (in bytes)
486 Node* _invar; // invariant offset (in bytes), NULL if none
487 bool _negate_invar; // if true then use: (0 - _invar)
488 #ifndef PRODUCT
489 static int _depth; // depth of the current syntax clause in the syntax expression (for debug only)
490 #endif
491 PhaseIdealLoop* phase() { return _slp->phase(); }
492 IdealLoopTree* lpt() { return _slp->lpt(); }
493 PhiNode* iv() { return _slp->iv(); } // Induction var
494
495 bool invariant(Node* n);
496
497 // Match: k*iv + offset
498 bool scaled_iv_plus_offset(Node* n);
499 // Match: k*iv where k is a constant that's not zero
500 bool scaled_iv(Node* n);
501 // Match: offset is (k [+/- invariant])
502 bool offset_plus_k(Node* n, bool negate = false);
503
504 public:
505 enum CMP {
506 Less = 1,
507 Greater = 2,
508 Equal = 4,
509 NotEqual = (Less | Greater),
510 NotComparable = (Less | Greater | Equal)
511 };
512
513 SWPointer(MemNode* mem, SuperWord* slp);
514 // Following is used to create a temporary object during
515 // the pattern match of an address expression.
533 (_adr == q._adr || _base == _adr && q._base == q._adr) &&
534 _scale == q._scale &&
535 _invar == q._invar &&
536 _negate_invar == q._negate_invar) {
537 bool overlap = q._offset < _offset + memory_size() &&
538 _offset < q._offset + q.memory_size();
539 return overlap ? Equal : (_offset < q._offset ? Less : Greater);
540 } else {
541 return NotComparable;
542 }
543 }
544
545 bool not_equal(SWPointer& q) { return not_equal(cmp(q)); }
546 bool equal(SWPointer& q) { return equal(cmp(q)); }
547 bool comparable(SWPointer& q) { return comparable(cmp(q)); }
548 static bool not_equal(int cmp) { return cmp <= NotEqual; }
549 static bool equal(int cmp) { return cmp == Equal; }
550 static bool comparable(int cmp) { return cmp < NotComparable; }
551
552 void print();
553 #ifndef PRODUCT
554 void print_depth();
555 int depth() { return _depth; }
556 void set_depth(int d) { _depth = d; }
557 void inc_depth() { _depth++;}
558 void dec_depth() { if (_depth > 0) _depth--;}
559 #endif
560
561 class Depth {
562 friend SWPointer;
563 #ifndef PRODUCT
564 Depth() { ++_depth; }
565 ~Depth() { if (_depth > 0) _depth--;}
566 #endif
567 };
568 };
569
570
571 //------------------------------OrderedPair---------------------------
572 // Ordered pair of Node*.
573 class OrderedPair VALUE_OBJ_CLASS_SPEC {
574 protected:
575 Node* _p1;
576 Node* _p2;
577 public:
578 OrderedPair() : _p1(NULL), _p2(NULL) {}
579 OrderedPair(Node* p1, Node* p2) {
580 if (p1->_idx < p2->_idx) {
581 _p1 = p1; _p2 = p2;
582 } else {
583 _p1 = p2; _p2 = p1;
584 }
585 }
586
587 bool operator==(const OrderedPair &rhs) {
|