49 _arena(phase->C->comp_arena()),
50 _packset(arena(), 8, 0, NULL), // packs for the current block
51 _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
52 _block(arena(), 8, 0, NULL), // nodes in current block
53 _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside
54 _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads
55 _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
56 _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
57 _align_to_ref(NULL), // memory reference to align vectors to
58 _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
59 _dg(_arena), // dependence graph
60 _visited(arena()), // visited node set
61 _post_visited(arena()), // post visited node set
62 _n_idx_list(arena(), 8), // scratch list of (node,index) pairs
63 _stk(arena(), 8, 0, NULL), // scratch stack of nodes
64 _nlist(arena(), 8, 0, NULL), // scratch list of nodes
65 _lpt(NULL), // loop tree node
66 _lp(NULL), // LoopNode
67 _bb(NULL), // basic block
68 _iv(NULL), // induction var
69 _race_possible(false) // cases where SDMU is true
70 {}
71
72 //------------------------------transform_loop---------------------------
73 void SuperWord::transform_loop(IdealLoopTree* lpt) {
74 assert(UseSuperWord, "should be");
75 // Do vectors exist on this architecture?
76 if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
77
78 assert(lpt->_head->is_CountedLoop(), "must be");
79 CountedLoopNode *cl = lpt->_head->as_CountedLoop();
80
81 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
82
83 if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
84
85 // Check for no control flow in body (other than exit)
86 Node *cl_exit = cl->loopexit();
87 if (cl_exit->in(0) != lpt->_head) return;
88
89 // Make sure the are no extra control users of the loop backedge
90 if (cl->back_control()->outcnt() != 1) {
91 return;
92 }
93
94 // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
95 CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
96 if (pre_end == NULL) return;
97 Node *pre_opaq1 = pre_end->limit();
98 if (pre_opaq1->Opcode() != Op_Opaque1) return;
99
100 init(); // initialize data structures
101
102 set_lpt(lpt);
103 set_lp(cl);
104
105 // For now, define one block which is the entire loop body
106 set_bb(cl);
107
108 assert(_packset.length() == 0, "packset must be empty");
109 SLP_extract();
110 }
111
112 //------------------------------SLP_extract---------------------------
113 // Extract the superword level parallelism
114 //
115 // 1) A reverse post-order of nodes in the block is constructed. By scanning
116 // this list from first to last, all definitions are visited before their uses.
117 //
118 // 2) A point-to-point dependence graph is constructed between memory references.
119 // This simplies the upcoming "independence" checker.
120 //
121 // 3) The maximum depth in the node graph from the beginning of the block
122 // to each node is computed. This is used to prune the graph search
123 // in the independence checker.
124 //
125 // 4) For integer types, the necessary bit width is propagated backwards
126 // from stores to allow packed operations on byte, char, and short
127 // integers. This reverses the promotion to type "int" that javac
128 // did for operations like: char c1,c2,c3; c1 = c2 + c3.
129 //
193
194 Node_List align_to_refs;
195 int best_iv_adjustment = 0;
196 MemNode* best_align_to_mem_ref = NULL;
197
198 while (memops.size() != 0) {
199 // Find a memory reference to align to.
200 MemNode* mem_ref = find_align_to_ref(memops);
201 if (mem_ref == NULL) break;
202 align_to_refs.push(mem_ref);
203 int iv_adjustment = get_iv_adjustment(mem_ref);
204
205 if (best_align_to_mem_ref == NULL) {
206 // Set memory reference which is the best from all memory operations
207 // to be used for alignment. The pre-loop trip count is modified to align
208 // this reference to a vector-aligned address.
209 best_align_to_mem_ref = mem_ref;
210 best_iv_adjustment = iv_adjustment;
211 }
212
213 SWPointer align_to_ref_p(mem_ref, this);
214 // Set alignment relative to "align_to_ref" for all related memory operations.
215 for (int i = memops.size() - 1; i >= 0; i--) {
216 MemNode* s = memops.at(i)->as_Mem();
217 if (isomorphic(s, mem_ref)) {
218 SWPointer p2(s, this);
219 if (p2.comparable(align_to_ref_p)) {
220 int align = memory_alignment(s, iv_adjustment);
221 set_alignment(s, align);
222 }
223 }
224 }
225
226 // Create initial pack pairs of memory operations for which
227 // alignment is set and vectors will be aligned.
228 bool create_pack = true;
229 if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
230 if (!Matcher::misaligned_vectors_ok()) {
231 int vw = vector_width(mem_ref);
232 int vw_best = vector_width(best_align_to_mem_ref);
233 if (vw > vw_best) {
234 // Do not vectorize a memory access with more elements per vector
235 // if unaligned memory access is not allowed because number of
236 // iterations in pre-loop will be not enough to align it.
237 create_pack = false;
238 }
327 set_align_to_ref(best_align_to_mem_ref);
328
329 #ifndef PRODUCT
330 if (TraceSuperWord) {
331 tty->print_cr("\nAfter find_adjacent_refs");
332 print_packset();
333 }
334 #endif
335 }
336
337 //------------------------------find_align_to_ref---------------------------
338 // Find a memory reference to align the loop induction variable to.
339 // Looks first at stores then at loads, looking for a memory reference
340 // with the largest number of references similar to it.
341 MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
342 GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
343
344 // Count number of comparable memory ops
345 for (uint i = 0; i < memops.size(); i++) {
346 MemNode* s1 = memops.at(i)->as_Mem();
347 SWPointer p1(s1, this);
348 // Discard if pre loop can't align this reference
349 if (!ref_is_alignable(p1)) {
350 *cmp_ct.adr_at(i) = 0;
351 continue;
352 }
353 for (uint j = i+1; j < memops.size(); j++) {
354 MemNode* s2 = memops.at(j)->as_Mem();
355 if (isomorphic(s1, s2)) {
356 SWPointer p2(s2, this);
357 if (p1.comparable(p2)) {
358 (*cmp_ct.adr_at(i))++;
359 (*cmp_ct.adr_at(j))++;
360 }
361 }
362 }
363 }
364
365 // Find Store (or Load) with the greatest number of "comparable" references,
366 // biggest vector size, smallest data size and smallest iv offset.
367 int max_ct = 0;
368 int max_vw = 0;
369 int max_idx = -1;
370 int min_size = max_jint;
371 int min_iv_offset = max_jint;
372 for (uint j = 0; j < memops.size(); j++) {
373 MemNode* s = memops.at(j)->as_Mem();
374 if (s->is_Store()) {
375 int vw = vector_width_in_bytes(s);
376 assert(vw > 1, "sanity");
377 SWPointer p(s, this);
378 if (cmp_ct.at(j) > max_ct ||
379 cmp_ct.at(j) == max_ct &&
380 (vw > max_vw ||
381 vw == max_vw &&
382 (data_size(s) < min_size ||
383 data_size(s) == min_size &&
384 (p.offset_in_bytes() < min_iv_offset)))) {
385 max_ct = cmp_ct.at(j);
386 max_vw = vw;
387 max_idx = j;
388 min_size = data_size(s);
389 min_iv_offset = p.offset_in_bytes();
390 }
391 }
392 }
393 // If no stores, look at loads
394 if (max_ct == 0) {
395 for (uint j = 0; j < memops.size(); j++) {
396 MemNode* s = memops.at(j)->as_Mem();
397 if (s->is_Load()) {
398 int vw = vector_width_in_bytes(s);
399 assert(vw > 1, "sanity");
400 SWPointer p(s, this);
401 if (cmp_ct.at(j) > max_ct ||
402 cmp_ct.at(j) == max_ct &&
403 (vw > max_vw ||
404 vw == max_vw &&
405 (data_size(s) < min_size ||
406 data_size(s) == min_size &&
407 (p.offset_in_bytes() < min_iv_offset)))) {
408 max_ct = cmp_ct.at(j);
409 max_vw = vw;
410 max_idx = j;
411 min_size = data_size(s);
412 min_iv_offset = p.offset_in_bytes();
413 }
414 }
415 }
416 }
417
418 #ifdef ASSERT
419 if (TraceSuperWord && Verbose) {
420 tty->print_cr("\nVector memops after find_align_to_refs");
465 if (init_nd->is_Con() && p.invar() == NULL) {
466 int init = init_nd->bottom_type()->is_int()->get_con();
467
468 int init_offset = init * p.scale_in_bytes() + offset;
469 assert(init_offset >= 0, "positive offset from object start");
470
471 if (span > 0) {
472 return (vw - (init_offset % vw)) % span == 0;
473 } else {
474 assert(span < 0, "nonzero stride * scale");
475 return (init_offset % vw) % -span == 0;
476 }
477 }
478 }
479 return false;
480 }
481
482 //---------------------------get_iv_adjustment---------------------------
483 // Calculate loop's iv adjustment for this memory ops.
484 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
485 SWPointer align_to_ref_p(mem_ref, this);
486 int offset = align_to_ref_p.offset_in_bytes();
487 int scale = align_to_ref_p.scale_in_bytes();
488 int vw = vector_width_in_bytes(mem_ref);
489 assert(vw > 1, "sanity");
490 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
491 // At least one iteration is executed in pre-loop by default. As result
492 // several iterations are needed to align memory operations in main-loop even
493 // if offset is 0.
494 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
495 int elt_size = align_to_ref_p.memory_size();
496 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
497 err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
498 int iv_adjustment = iv_adjustment_in_bytes/elt_size;
499
500 #ifndef PRODUCT
501 if (TraceSuperWord)
502 tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
503 offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
504 #endif
505 return iv_adjustment;
525
526 // Get slice in predecessor order (last is first)
527 mem_slice_preds(n_tail, n, _nlist);
528
529 // Make the slice dependent on the root
530 DepMem* slice = _dg.dep(n);
531 _dg.make_edge(_dg.root(), slice);
532
533 // Create a sink for the slice
534 DepMem* slice_sink = _dg.make_node(NULL);
535 _dg.make_edge(slice_sink, _dg.tail());
536
537 // Now visit each pair of memory ops, creating the edges
538 for (int j = _nlist.length() - 1; j >= 0 ; j--) {
539 Node* s1 = _nlist.at(j);
540
541 // If no dependency yet, use slice
542 if (_dg.dep(s1)->in_cnt() == 0) {
543 _dg.make_edge(slice, s1);
544 }
545 SWPointer p1(s1->as_Mem(), this);
546 bool sink_dependent = true;
547 for (int k = j - 1; k >= 0; k--) {
548 Node* s2 = _nlist.at(k);
549 if (s1->is_Load() && s2->is_Load())
550 continue;
551 SWPointer p2(s2->as_Mem(), this);
552
553 int cmp = p1.cmp(p2);
554 if (SuperWordRTDepCheck &&
555 p1.base() != p2.base() && p1.valid() && p2.valid()) {
556 // Create a runtime check to disambiguate
557 OrderedPair pp(p1.base(), p2.base());
558 _disjoint_ptrs.append_if_missing(pp);
559 } else if (!SWPointer::not_equal(cmp)) {
560 // Possibly same address
561 _dg.make_edge(s1, s2);
562 sink_dependent = false;
563 }
564 }
565 if (sink_dependent) {
566 _dg.make_edge(s1, slice_sink);
567 }
568 }
569 #ifndef PRODUCT
570 if (TraceSuperWord) {
571 tty->print_cr("\nDependence graph for slice: %d", n->_idx);
671 return false;
672 }
673
674 //------------------------------are_adjacent_refs---------------------------
675 // Is s1 immediately before s2 in memory?
676 bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
677 if (!s1->is_Mem() || !s2->is_Mem()) return false;
678 if (!in_bb(s1) || !in_bb(s2)) return false;
679
680 // Do not use superword for non-primitives
681 if (!is_java_primitive(s1->as_Mem()->memory_type()) ||
682 !is_java_primitive(s2->as_Mem()->memory_type())) {
683 return false;
684 }
685
686 // FIXME - co_locate_pack fails on Stores in different mem-slices, so
687 // only pack memops that are in the same alias set until that's fixed.
688 if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
689 _phase->C->get_alias_index(s2->as_Mem()->adr_type()))
690 return false;
691 SWPointer p1(s1->as_Mem(), this);
692 SWPointer p2(s2->as_Mem(), this);
693 if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
694 int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
695 return diff == data_size(s1);
696 }
697
698 //------------------------------isomorphic---------------------------
699 // Are s1 and s2 similar?
700 bool SuperWord::isomorphic(Node* s1, Node* s2) {
701 if (s1->Opcode() != s2->Opcode()) return false;
702 if (s1->req() != s2->req()) return false;
703 if (s1->in(0) != s2->in(0)) return false;
704 if (!same_velt_type(s1, s2)) return false;
705 return true;
706 }
707
708 //------------------------------independent---------------------------
709 // Is there no data path from s1 to s2 or s2 to s1?
710 bool SuperWord::independent(Node* s1, Node* s2) {
711 // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
712 int d1 = depth(s1);
1480 // Insert extract (unpack) operations for scalar uses
1481 for (int i = 0; i < _packset.length(); i++) {
1482 insert_extracts(_packset.at(i));
1483 }
1484
1485 Compile* C = _phase->C;
1486 uint max_vlen_in_bytes = 0;
1487 for (int i = 0; i < _block.length(); i++) {
1488 Node* n = _block.at(i);
1489 Node_List* p = my_pack(n);
1490 if (p && n == executed_last(p)) {
1491 uint vlen = p->size();
1492 uint vlen_in_bytes = 0;
1493 Node* vn = NULL;
1494 Node* low_adr = p->at(0);
1495 Node* first = executed_first(p);
1496 int opc = n->Opcode();
1497 if (n->is_Load()) {
1498 Node* ctl = n->in(MemNode::Control);
1499 Node* mem = first->in(MemNode::Memory);
1500 SWPointer p1(n->as_Mem(), this);
1501 // Identify the memory dependency for the new loadVector node by
1502 // walking up through memory chain.
1503 // This is done to give flexibility to the new loadVector node so that
1504 // it can move above independent storeVector nodes.
1505 while (mem->is_StoreVector()) {
1506 SWPointer p2(mem->as_Mem(), this);
1507 int cmp = p1.cmp(p2);
1508 if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
1509 mem = mem->in(MemNode::Memory);
1510 } else {
1511 break; // dependent memory
1512 }
1513 }
1514 Node* adr = low_adr->in(MemNode::Address);
1515 const TypePtr* atyp = n->adr_type();
1516 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
1517 vlen_in_bytes = vn->as_LoadVector()->memory_size();
1518 } else if (n->is_Store()) {
1519 // Promote value to be stored to vector
1520 Node* val = vector_opd(p, MemNode::ValueIn);
1521 Node* ctl = n->in(MemNode::Control);
1522 Node* mem = first->in(MemNode::Memory);
1523 Node* adr = low_adr->in(MemNode::Address);
1524 const TypePtr* atyp = n->adr_type();
1525 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
1526 vlen_in_bytes = vn->as_StoreVector()->memory_size();
2003 }
2004 }
2005 }
2006 }
2007 }
2008 #ifndef PRODUCT
2009 if (TraceSuperWord && Verbose) {
2010 for (int i = 0; i < _block.length(); i++) {
2011 Node* n = _block.at(i);
2012 velt_type(n)->dump();
2013 tty->print("\t");
2014 n->dump();
2015 }
2016 }
2017 #endif
2018 }
2019
2020 //------------------------------memory_alignment---------------------------
2021 // Alignment within a vector memory reference
2022 int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
2023 SWPointer p(s, this);
2024 if (!p.valid()) {
2025 return bottom_align;
2026 }
2027 int vw = vector_width_in_bytes(s);
2028 if (vw < 2) {
2029 return bottom_align; // No vectors for this type
2030 }
2031 int offset = p.offset_in_bytes();
2032 offset += iv_adjust*p.memory_size();
2033 int off_rem = offset % vw;
2034 int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
2035 return off_mod;
2036 }
2037
2038 //---------------------------container_type---------------------------
2039 // Smallest type containing range of values
2040 const Type* SuperWord::container_type(Node* n) {
2041 if (n->is_Mem()) {
2042 BasicType bt = n->as_Mem()->memory_type();
2043 if (n->is_Store() && (bt == T_CHAR)) {
2167 // to align_to_ref will be a position zero in the vector.
2168 // (iv + k) mod vector_align == 0
2169 void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
2170 CountedLoopNode *main_head = lp()->as_CountedLoop();
2171 assert(main_head->is_main_loop(), "");
2172 CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);
2173 assert(pre_end != NULL, "we must have a correct pre-loop");
2174 Node *pre_opaq1 = pre_end->limit();
2175 assert(pre_opaq1->Opcode() == Op_Opaque1, "");
2176 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2177 Node *lim0 = pre_opaq->in(1);
2178
2179 // Where we put new limit calculations
2180 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2181
2182 // Ensure the original loop limit is available from the
2183 // pre-loop Opaque1 node.
2184 Node *orig_limit = pre_opaq->original_loop_limit();
2185 assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
2186
2187 SWPointer align_to_ref_p(align_to_ref, this);
2188 assert(align_to_ref_p.valid(), "sanity");
2189
2190 // Given:
2191 // lim0 == original pre loop limit
2192 // V == v_align (power of 2)
2193 // invar == extra invariant piece of the address expression
2194 // e == offset [ +/- invar ]
2195 //
2196 // When reassociating expressions involving '%' the basic rules are:
2197 // (a - b) % k == 0 => a % k == b % k
2198 // and:
2199 // (a + b) % k == 0 => a % k == (k - b) % k
2200 //
2201 // For stride > 0 && scale > 0,
2202 // Derive the new pre-loop limit "lim" such that the two constraints:
2203 // (1) lim = lim0 + N (where N is some positive integer < V)
2204 // (2) (e + lim) % V == 0
2205 // are true.
2206 //
2207 // Substituting (1) into (2),
2338 if (loop_node == NULL || !loop_node->is_pre_loop()) return NULL;
2339 return pre_end;
2340 }
2341
2342
2343 //------------------------------init---------------------------
2344 void SuperWord::init() {
2345 _dg.init();
2346 _packset.clear();
2347 _disjoint_ptrs.clear();
2348 _block.clear();
2349 _data_entry.clear();
2350 _mem_slice_head.clear();
2351 _mem_slice_tail.clear();
2352 _node_info.clear();
2353 _align_to_ref = NULL;
2354 _lpt = NULL;
2355 _lp = NULL;
2356 _bb = NULL;
2357 _iv = NULL;
2358 }
2359
2360 //------------------------------print_packset---------------------------
2361 void SuperWord::print_packset() {
2362 #ifndef PRODUCT
2363 tty->print_cr("packset");
2364 for (int i = 0; i < _packset.length(); i++) {
2365 tty->print_cr("Pack: %d", i);
2366 Node_List* p = _packset.at(i);
2367 print_pack(p);
2368 }
2369 #endif
2370 }
2371
2372 //------------------------------print_pack---------------------------
2373 void SuperWord::print_pack(Node_List* p) {
2374 for (uint i = 0; i < p->size(); i++) {
2375 print_stmt(p->at(i));
2376 }
2377 }
2394 void SuperWord::print_stmt(Node* s) {
2395 #ifndef PRODUCT
2396 tty->print(" align: %d \t", alignment(s));
2397 s->dump();
2398 #endif
2399 }
2400
2401 //------------------------------blank---------------------------
2402 char* SuperWord::blank(uint depth) {
2403 static char blanks[101];
2404 assert(depth < 101, "too deep");
2405 for (uint i = 0; i < depth; i++) blanks[i] = ' ';
2406 blanks[depth] = '\0';
2407 return blanks;
2408 }
2409
2410
2411 //==============================SWPointer===========================
2412
2413 //----------------------------SWPointer------------------------
2414 SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
2415 _mem(mem), _slp(slp), _base(NULL), _adr(NULL),
2416 _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {
2417
2418 Node* adr = mem->in(MemNode::Address);
2419 if (!adr->is_AddP()) {
2420 assert(!valid(), "too complex");
2421 return;
2422 }
2423 // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
2424 Node* base = adr->in(AddPNode::Base);
2425 //unsafe reference could not be aligned appropriately without runtime checking
2426 if (base == NULL || base->bottom_type() == Type::TOP) {
2427 assert(!valid(), "unsafe access");
2428 return;
2429 }
2430 for (int i = 0; i < 3; i++) {
2431 if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
2432 assert(!valid(), "too complex");
2433 return;
2434 }
2435 adr = adr->in(AddPNode::Address);
2436 if (base == adr || !adr->is_AddP()) {
2437 break; // stop looking at addp's
2438 }
2439 }
2440 _base = base;
2441 _adr = adr;
2442 assert(valid(), "Usable");
2443 }
2444
2445 // Following is used to create a temporary object during
2446 // the pattern match of an address expression.
2447 SWPointer::SWPointer(SWPointer* p) :
2448 _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL),
2449 _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {}
2450
2451 //------------------------scaled_iv_plus_offset--------------------
2452 // Match: k*iv + offset
2453 // where: k is a constant that maybe zero, and
2454 // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
2455 bool SWPointer::scaled_iv_plus_offset(Node* n) {
2456 if (scaled_iv(n)) {
2457 return true;
2458 }
2459 if (offset_plus_k(n)) {
2460 return true;
2461 }
2462 int opc = n->Opcode();
2463 if (opc == Op_AddI) {
2464 if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {
2465 return true;
2466 }
2467 if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
2468 return true;
2469 }
2472 return true;
2473 }
2474 if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
2475 _scale *= -1;
2476 return true;
2477 }
2478 }
2479 return false;
2480 }
2481
2482 //----------------------------scaled_iv------------------------
2483 // Match: k*iv where k is a constant that's not zero
2484 bool SWPointer::scaled_iv(Node* n) {
2485 if (_scale != 0) {
2486 return false; // already found a scale
2487 }
2488 if (n == iv()) {
2489 _scale = 1;
2490 return true;
2491 }
2492 int opc = n->Opcode();
2493 if (opc == Op_MulI) {
2494 if (n->in(1) == iv() && n->in(2)->is_Con()) {
2495 _scale = n->in(2)->get_int();
2496 return true;
2497 } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
2498 _scale = n->in(1)->get_int();
2499 return true;
2500 }
2501 } else if (opc == Op_LShiftI) {
2502 if (n->in(1) == iv() && n->in(2)->is_Con()) {
2503 _scale = 1 << n->in(2)->get_int();
2504 return true;
2505 }
2506 } else if (opc == Op_ConvI2L) {
2507 if (scaled_iv_plus_offset(n->in(1))) {
2508 return true;
2509 }
2510 } else if (opc == Op_LShiftL) {
2511 if (!has_iv() && _invar == NULL) {
2529 //----------------------------offset_plus_k------------------------
2530 // Match: offset is (k [+/- invariant])
2531 // where k maybe zero and invariant is optional, but not both.
2532 bool SWPointer::offset_plus_k(Node* n, bool negate) {
2533 int opc = n->Opcode();
2534 if (opc == Op_ConI) {
2535 _offset += negate ? -(n->get_int()) : n->get_int();
2536 return true;
2537 } else if (opc == Op_ConL) {
2538 // Okay if value fits into an int
2539 const TypeLong* t = n->find_long_type();
2540 if (t->higher_equal(TypeLong::INT)) {
2541 jlong loff = n->get_long();
2542 jint off = (jint)loff;
2543 _offset += negate ? -off : loff;
2544 return true;
2545 }
2546 return false;
2547 }
2548 if (_invar != NULL) return false; // already have an invariant
2549 if (opc == Op_AddI) {
2550 if (n->in(2)->is_Con() && invariant(n->in(1))) {
2551 _negate_invar = negate;
2552 _invar = n->in(1);
2553 _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
2554 return true;
2555 } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
2556 _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
2557 _negate_invar = negate;
2558 _invar = n->in(2);
2559 return true;
2560 }
2561 }
2562 if (opc == Op_SubI) {
2563 if (n->in(2)->is_Con() && invariant(n->in(1))) {
2564 _negate_invar = negate;
2565 _invar = n->in(1);
2566 _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
2567 return true;
2568 } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
|
49 _arena(phase->C->comp_arena()),
50 _packset(arena(), 8, 0, NULL), // packs for the current block
51 _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
52 _block(arena(), 8, 0, NULL), // nodes in current block
53 _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside
54 _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads
55 _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
56 _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
57 _align_to_ref(NULL), // memory reference to align vectors to
58 _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
59 _dg(_arena), // dependence graph
60 _visited(arena()), // visited node set
61 _post_visited(arena()), // post visited node set
62 _n_idx_list(arena(), 8), // scratch list of (node,index) pairs
63 _stk(arena(), 8, 0, NULL), // scratch stack of nodes
64 _nlist(arena(), 8, 0, NULL), // scratch list of nodes
65 _lpt(NULL), // loop tree node
66 _lp(NULL), // LoopNode
67 _bb(NULL), // basic block
68 _iv(NULL), // induction var
69 _race_possible(false), // cases where SDMU is true
70 _early_return(true)
71 {}
72
73 //------------------------------transform_loop---------------------------
74 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
75 assert(UseSuperWord, "should be");
76 // Do vectors exist on this architecture?
77 if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
78
79 assert(lpt->_head->is_CountedLoop(), "must be");
80 CountedLoopNode *cl = lpt->_head->as_CountedLoop();
81
82 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
83
84 if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
85
86 // Check for no control flow in body (other than exit)
87 Node *cl_exit = cl->loopexit();
88 if (cl_exit->in(0) != lpt->_head) return;
89
90 // Make sure the are no extra control users of the loop backedge
91 if (cl->back_control()->outcnt() != 1) {
92 return;
93 }
94
95 // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
96 CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
97 if (pre_end == NULL) return;
98 Node *pre_opaq1 = pre_end->limit();
99 if (pre_opaq1->Opcode() != Op_Opaque1) return;
100
101 init(); // initialize data structures
102
103 set_lpt(lpt);
104 set_lp(cl);
105
106 // For now, define one block which is the entire loop body
107 set_bb(cl);
108
109 if (do_optimization) {
110 assert(_packset.length() == 0, "packset must be empty");
111 SLP_extract();
112 }
113 }
114
115 //------------------------------SLP_extract---------------------------
116 // Extract the superword level parallelism
117 //
118 // 1) A reverse post-order of nodes in the block is constructed. By scanning
119 // this list from first to last, all definitions are visited before their uses.
120 //
121 // 2) A point-to-point dependence graph is constructed between memory references.
122 // This simplies the upcoming "independence" checker.
123 //
124 // 3) The maximum depth in the node graph from the beginning of the block
125 // to each node is computed. This is used to prune the graph search
126 // in the independence checker.
127 //
128 // 4) For integer types, the necessary bit width is propagated backwards
129 // from stores to allow packed operations on byte, char, and short
130 // integers. This reverses the promotion to type "int" that javac
131 // did for operations like: char c1,c2,c3; c1 = c2 + c3.
132 //
196
197 Node_List align_to_refs;
198 int best_iv_adjustment = 0;
199 MemNode* best_align_to_mem_ref = NULL;
200
201 while (memops.size() != 0) {
202 // Find a memory reference to align to.
203 MemNode* mem_ref = find_align_to_ref(memops);
204 if (mem_ref == NULL) break;
205 align_to_refs.push(mem_ref);
206 int iv_adjustment = get_iv_adjustment(mem_ref);
207
208 if (best_align_to_mem_ref == NULL) {
209 // Set memory reference which is the best from all memory operations
210 // to be used for alignment. The pre-loop trip count is modified to align
211 // this reference to a vector-aligned address.
212 best_align_to_mem_ref = mem_ref;
213 best_iv_adjustment = iv_adjustment;
214 }
215
216 SWPointer align_to_ref_p(mem_ref, this, NULL, false);
217 // Set alignment relative to "align_to_ref" for all related memory operations.
218 for (int i = memops.size() - 1; i >= 0; i--) {
219 MemNode* s = memops.at(i)->as_Mem();
220 if (isomorphic(s, mem_ref)) {
221 SWPointer p2(s, this, NULL, false);
222 if (p2.comparable(align_to_ref_p)) {
223 int align = memory_alignment(s, iv_adjustment);
224 set_alignment(s, align);
225 }
226 }
227 }
228
229 // Create initial pack pairs of memory operations for which
230 // alignment is set and vectors will be aligned.
231 bool create_pack = true;
232 if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
233 if (!Matcher::misaligned_vectors_ok()) {
234 int vw = vector_width(mem_ref);
235 int vw_best = vector_width(best_align_to_mem_ref);
236 if (vw > vw_best) {
237 // Do not vectorize a memory access with more elements per vector
238 // if unaligned memory access is not allowed because number of
239 // iterations in pre-loop will be not enough to align it.
240 create_pack = false;
241 }
330 set_align_to_ref(best_align_to_mem_ref);
331
332 #ifndef PRODUCT
333 if (TraceSuperWord) {
334 tty->print_cr("\nAfter find_adjacent_refs");
335 print_packset();
336 }
337 #endif
338 }
339
340 //------------------------------find_align_to_ref---------------------------
341 // Find a memory reference to align the loop induction variable to.
342 // Looks first at stores then at loads, looking for a memory reference
343 // with the largest number of references similar to it.
344 MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
345 GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
346
347 // Count number of comparable memory ops
348 for (uint i = 0; i < memops.size(); i++) {
349 MemNode* s1 = memops.at(i)->as_Mem();
350 SWPointer p1(s1, this, NULL, false);
351 // Discard if pre loop can't align this reference
352 if (!ref_is_alignable(p1)) {
353 *cmp_ct.adr_at(i) = 0;
354 continue;
355 }
356 for (uint j = i+1; j < memops.size(); j++) {
357 MemNode* s2 = memops.at(j)->as_Mem();
358 if (isomorphic(s1, s2)) {
359 SWPointer p2(s2, this, NULL, false);
360 if (p1.comparable(p2)) {
361 (*cmp_ct.adr_at(i))++;
362 (*cmp_ct.adr_at(j))++;
363 }
364 }
365 }
366 }
367
368 // Find Store (or Load) with the greatest number of "comparable" references,
369 // biggest vector size, smallest data size and smallest iv offset.
370 int max_ct = 0;
371 int max_vw = 0;
372 int max_idx = -1;
373 int min_size = max_jint;
374 int min_iv_offset = max_jint;
375 for (uint j = 0; j < memops.size(); j++) {
376 MemNode* s = memops.at(j)->as_Mem();
377 if (s->is_Store()) {
378 int vw = vector_width_in_bytes(s);
379 assert(vw > 1, "sanity");
380 SWPointer p(s, this, NULL, false);
381 if (cmp_ct.at(j) > max_ct ||
382 cmp_ct.at(j) == max_ct &&
383 (vw > max_vw ||
384 vw == max_vw &&
385 (data_size(s) < min_size ||
386 data_size(s) == min_size &&
387 (p.offset_in_bytes() < min_iv_offset)))) {
388 max_ct = cmp_ct.at(j);
389 max_vw = vw;
390 max_idx = j;
391 min_size = data_size(s);
392 min_iv_offset = p.offset_in_bytes();
393 }
394 }
395 }
396 // If no stores, look at loads
397 if (max_ct == 0) {
398 for (uint j = 0; j < memops.size(); j++) {
399 MemNode* s = memops.at(j)->as_Mem();
400 if (s->is_Load()) {
401 int vw = vector_width_in_bytes(s);
402 assert(vw > 1, "sanity");
403 SWPointer p(s, this, NULL, false);
404 if (cmp_ct.at(j) > max_ct ||
405 cmp_ct.at(j) == max_ct &&
406 (vw > max_vw ||
407 vw == max_vw &&
408 (data_size(s) < min_size ||
409 data_size(s) == min_size &&
410 (p.offset_in_bytes() < min_iv_offset)))) {
411 max_ct = cmp_ct.at(j);
412 max_vw = vw;
413 max_idx = j;
414 min_size = data_size(s);
415 min_iv_offset = p.offset_in_bytes();
416 }
417 }
418 }
419 }
420
421 #ifdef ASSERT
422 if (TraceSuperWord && Verbose) {
423 tty->print_cr("\nVector memops after find_align_to_refs");
468 if (init_nd->is_Con() && p.invar() == NULL) {
469 int init = init_nd->bottom_type()->is_int()->get_con();
470
471 int init_offset = init * p.scale_in_bytes() + offset;
472 assert(init_offset >= 0, "positive offset from object start");
473
474 if (span > 0) {
475 return (vw - (init_offset % vw)) % span == 0;
476 } else {
477 assert(span < 0, "nonzero stride * scale");
478 return (init_offset % vw) % -span == 0;
479 }
480 }
481 }
482 return false;
483 }
484
485 //---------------------------get_iv_adjustment---------------------------
486 // Calculate loop's iv adjustment for this memory ops.
487 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
488 SWPointer align_to_ref_p(mem_ref, this, NULL, false);
489 int offset = align_to_ref_p.offset_in_bytes();
490 int scale = align_to_ref_p.scale_in_bytes();
491 int vw = vector_width_in_bytes(mem_ref);
492 assert(vw > 1, "sanity");
493 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
494 // At least one iteration is executed in pre-loop by default. As result
495 // several iterations are needed to align memory operations in main-loop even
496 // if offset is 0.
497 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
498 int elt_size = align_to_ref_p.memory_size();
499 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
500 err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
501 int iv_adjustment = iv_adjustment_in_bytes/elt_size;
502
503 #ifndef PRODUCT
504 if (TraceSuperWord)
505 tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
506 offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
507 #endif
508 return iv_adjustment;
528
529 // Get slice in predecessor order (last is first)
530 mem_slice_preds(n_tail, n, _nlist);
531
532 // Make the slice dependent on the root
533 DepMem* slice = _dg.dep(n);
534 _dg.make_edge(_dg.root(), slice);
535
536 // Create a sink for the slice
537 DepMem* slice_sink = _dg.make_node(NULL);
538 _dg.make_edge(slice_sink, _dg.tail());
539
540 // Now visit each pair of memory ops, creating the edges
541 for (int j = _nlist.length() - 1; j >= 0 ; j--) {
542 Node* s1 = _nlist.at(j);
543
544 // If no dependency yet, use slice
545 if (_dg.dep(s1)->in_cnt() == 0) {
546 _dg.make_edge(slice, s1);
547 }
548 SWPointer p1(s1->as_Mem(), this, NULL, false);
549 bool sink_dependent = true;
550 for (int k = j - 1; k >= 0; k--) {
551 Node* s2 = _nlist.at(k);
552 if (s1->is_Load() && s2->is_Load())
553 continue;
554 SWPointer p2(s2->as_Mem(), this, NULL, false);
555
556 int cmp = p1.cmp(p2);
557 if (SuperWordRTDepCheck &&
558 p1.base() != p2.base() && p1.valid() && p2.valid()) {
559 // Create a runtime check to disambiguate
560 OrderedPair pp(p1.base(), p2.base());
561 _disjoint_ptrs.append_if_missing(pp);
562 } else if (!SWPointer::not_equal(cmp)) {
563 // Possibly same address
564 _dg.make_edge(s1, s2);
565 sink_dependent = false;
566 }
567 }
568 if (sink_dependent) {
569 _dg.make_edge(s1, slice_sink);
570 }
571 }
572 #ifndef PRODUCT
573 if (TraceSuperWord) {
574 tty->print_cr("\nDependence graph for slice: %d", n->_idx);
674 return false;
675 }
676
677 //------------------------------are_adjacent_refs---------------------------
678 // Is s1 immediately before s2 in memory?
679 bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
680 if (!s1->is_Mem() || !s2->is_Mem()) return false;
681 if (!in_bb(s1) || !in_bb(s2)) return false;
682
683 // Do not use superword for non-primitives
684 if (!is_java_primitive(s1->as_Mem()->memory_type()) ||
685 !is_java_primitive(s2->as_Mem()->memory_type())) {
686 return false;
687 }
688
689 // FIXME - co_locate_pack fails on Stores in different mem-slices, so
690 // only pack memops that are in the same alias set until that's fixed.
691 if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
692 _phase->C->get_alias_index(s2->as_Mem()->adr_type()))
693 return false;
694 SWPointer p1(s1->as_Mem(), this, NULL, false);
695 SWPointer p2(s2->as_Mem(), this, NULL, false);
696 if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
697 int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
698 return diff == data_size(s1);
699 }
700
701 //------------------------------isomorphic---------------------------
702 // Are s1 and s2 similar?
703 bool SuperWord::isomorphic(Node* s1, Node* s2) {
704 if (s1->Opcode() != s2->Opcode()) return false;
705 if (s1->req() != s2->req()) return false;
706 if (s1->in(0) != s2->in(0)) return false;
707 if (!same_velt_type(s1, s2)) return false;
708 return true;
709 }
710
711 //------------------------------independent---------------------------
712 // Is there no data path from s1 to s2 or s2 to s1?
713 bool SuperWord::independent(Node* s1, Node* s2) {
714 // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
715 int d1 = depth(s1);
1483 // Insert extract (unpack) operations for scalar uses
1484 for (int i = 0; i < _packset.length(); i++) {
1485 insert_extracts(_packset.at(i));
1486 }
1487
1488 Compile* C = _phase->C;
1489 uint max_vlen_in_bytes = 0;
1490 for (int i = 0; i < _block.length(); i++) {
1491 Node* n = _block.at(i);
1492 Node_List* p = my_pack(n);
1493 if (p && n == executed_last(p)) {
1494 uint vlen = p->size();
1495 uint vlen_in_bytes = 0;
1496 Node* vn = NULL;
1497 Node* low_adr = p->at(0);
1498 Node* first = executed_first(p);
1499 int opc = n->Opcode();
1500 if (n->is_Load()) {
1501 Node* ctl = n->in(MemNode::Control);
1502 Node* mem = first->in(MemNode::Memory);
1503 SWPointer p1(n->as_Mem(), this, NULL, false);
1504 // Identify the memory dependency for the new loadVector node by
1505 // walking up through memory chain.
1506 // This is done to give flexibility to the new loadVector node so that
1507 // it can move above independent storeVector nodes.
1508 while (mem->is_StoreVector()) {
1509 SWPointer p2(mem->as_Mem(), this, NULL, false);
1510 int cmp = p1.cmp(p2);
1511 if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
1512 mem = mem->in(MemNode::Memory);
1513 } else {
1514 break; // dependent memory
1515 }
1516 }
1517 Node* adr = low_adr->in(MemNode::Address);
1518 const TypePtr* atyp = n->adr_type();
1519 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
1520 vlen_in_bytes = vn->as_LoadVector()->memory_size();
1521 } else if (n->is_Store()) {
1522 // Promote value to be stored to vector
1523 Node* val = vector_opd(p, MemNode::ValueIn);
1524 Node* ctl = n->in(MemNode::Control);
1525 Node* mem = first->in(MemNode::Memory);
1526 Node* adr = low_adr->in(MemNode::Address);
1527 const TypePtr* atyp = n->adr_type();
1528 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
1529 vlen_in_bytes = vn->as_StoreVector()->memory_size();
2006 }
2007 }
2008 }
2009 }
2010 }
2011 #ifndef PRODUCT
2012 if (TraceSuperWord && Verbose) {
2013 for (int i = 0; i < _block.length(); i++) {
2014 Node* n = _block.at(i);
2015 velt_type(n)->dump();
2016 tty->print("\t");
2017 n->dump();
2018 }
2019 }
2020 #endif
2021 }
2022
2023 //------------------------------memory_alignment---------------------------
2024 // Alignment within a vector memory reference
2025 int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
2026 SWPointer p(s, this, NULL, false);
2027 if (!p.valid()) {
2028 return bottom_align;
2029 }
2030 int vw = vector_width_in_bytes(s);
2031 if (vw < 2) {
2032 return bottom_align; // No vectors for this type
2033 }
2034 int offset = p.offset_in_bytes();
2035 offset += iv_adjust*p.memory_size();
2036 int off_rem = offset % vw;
2037 int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
2038 return off_mod;
2039 }
2040
2041 //---------------------------container_type---------------------------
2042 // Smallest type containing range of values
2043 const Type* SuperWord::container_type(Node* n) {
2044 if (n->is_Mem()) {
2045 BasicType bt = n->as_Mem()->memory_type();
2046 if (n->is_Store() && (bt == T_CHAR)) {
2170 // to align_to_ref will be a position zero in the vector.
2171 // (iv + k) mod vector_align == 0
2172 void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
2173 CountedLoopNode *main_head = lp()->as_CountedLoop();
2174 assert(main_head->is_main_loop(), "");
2175 CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);
2176 assert(pre_end != NULL, "we must have a correct pre-loop");
2177 Node *pre_opaq1 = pre_end->limit();
2178 assert(pre_opaq1->Opcode() == Op_Opaque1, "");
2179 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
2180 Node *lim0 = pre_opaq->in(1);
2181
2182 // Where we put new limit calculations
2183 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2184
2185 // Ensure the original loop limit is available from the
2186 // pre-loop Opaque1 node.
2187 Node *orig_limit = pre_opaq->original_loop_limit();
2188 assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
2189
2190 SWPointer align_to_ref_p(align_to_ref, this, NULL, false);
2191 assert(align_to_ref_p.valid(), "sanity");
2192
2193 // Given:
2194 // lim0 == original pre loop limit
2195 // V == v_align (power of 2)
2196 // invar == extra invariant piece of the address expression
2197 // e == offset [ +/- invar ]
2198 //
2199 // When reassociating expressions involving '%' the basic rules are:
2200 // (a - b) % k == 0 => a % k == b % k
2201 // and:
2202 // (a + b) % k == 0 => a % k == (k - b) % k
2203 //
2204 // For stride > 0 && scale > 0,
2205 // Derive the new pre-loop limit "lim" such that the two constraints:
2206 // (1) lim = lim0 + N (where N is some positive integer < V)
2207 // (2) (e + lim) % V == 0
2208 // are true.
2209 //
2210 // Substituting (1) into (2),
2341 if (loop_node == NULL || !loop_node->is_pre_loop()) return NULL;
2342 return pre_end;
2343 }
2344
2345
2346 //------------------------------init---------------------------
2347 void SuperWord::init() {
2348 _dg.init();
2349 _packset.clear();
2350 _disjoint_ptrs.clear();
2351 _block.clear();
2352 _data_entry.clear();
2353 _mem_slice_head.clear();
2354 _mem_slice_tail.clear();
2355 _node_info.clear();
2356 _align_to_ref = NULL;
2357 _lpt = NULL;
2358 _lp = NULL;
2359 _bb = NULL;
2360 _iv = NULL;
2361 _early_return = false;
2362 }
2363
2364 //------------------------------print_packset---------------------------
2365 void SuperWord::print_packset() {
2366 #ifndef PRODUCT
2367 tty->print_cr("packset");
2368 for (int i = 0; i < _packset.length(); i++) {
2369 tty->print_cr("Pack: %d", i);
2370 Node_List* p = _packset.at(i);
2371 print_pack(p);
2372 }
2373 #endif
2374 }
2375
2376 //------------------------------print_pack---------------------------
2377 void SuperWord::print_pack(Node_List* p) {
2378 for (uint i = 0; i < p->size(); i++) {
2379 print_stmt(p->at(i));
2380 }
2381 }
2398 void SuperWord::print_stmt(Node* s) {
2399 #ifndef PRODUCT
2400 tty->print(" align: %d \t", alignment(s));
2401 s->dump();
2402 #endif
2403 }
2404
2405 //------------------------------blank---------------------------
2406 char* SuperWord::blank(uint depth) {
2407 static char blanks[101];
2408 assert(depth < 101, "too deep");
2409 for (uint i = 0; i < depth; i++) blanks[i] = ' ';
2410 blanks[depth] = '\0';
2411 return blanks;
2412 }
2413
2414
2415 //==============================SWPointer===========================
2416
2417 //----------------------------SWPointer------------------------
2418 SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) :
2419 _mem(mem), _slp(slp), _base(NULL), _adr(NULL),
2420 _scale(0), _offset(0), _invar(NULL), _negate_invar(false),
2421 _nstack(nstack), _analyze_only(analyze_only),
2422 _stack_idx(0) {
2423
2424 Node* adr = mem->in(MemNode::Address);
2425 if (!adr->is_AddP()) {
2426 assert(!valid(), "too complex");
2427 return;
2428 }
2429 // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
2430 Node* base = adr->in(AddPNode::Base);
2431 //unsafe reference could not be aligned appropriately without runtime checking
2432 if (base == NULL || base->bottom_type() == Type::TOP) {
2433 assert(!valid(), "unsafe access");
2434 return;
2435 }
2436 for (int i = 0; i < 3; i++) {
2437 if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
2438 assert(!valid(), "too complex");
2439 return;
2440 }
2441 adr = adr->in(AddPNode::Address);
2442 if (base == adr || !adr->is_AddP()) {
2443 break; // stop looking at addp's
2444 }
2445 }
2446 _base = base;
2447 _adr = adr;
2448 assert(valid(), "Usable");
2449 }
2450
2451 // Following is used to create a temporary object during
2452 // the pattern match of an address expression.
2453 SWPointer::SWPointer(SWPointer* p) :
2454 _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL),
2455 _scale(0), _offset(0), _invar(NULL), _negate_invar(false),
2456 _nstack(p->_nstack), _analyze_only(p->_analyze_only),
2457 _stack_idx(p->_stack_idx) {}
2458
2459 //------------------------scaled_iv_plus_offset--------------------
2460 // Match: k*iv + offset
2461 // where: k is a constant that maybe zero, and
2462 // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
2463 bool SWPointer::scaled_iv_plus_offset(Node* n) {
2464 if (scaled_iv(n)) {
2465 return true;
2466 }
2467 if (offset_plus_k(n)) {
2468 return true;
2469 }
2470 int opc = n->Opcode();
2471 if (opc == Op_AddI) {
2472 if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {
2473 return true;
2474 }
2475 if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
2476 return true;
2477 }
2480 return true;
2481 }
2482 if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
2483 _scale *= -1;
2484 return true;
2485 }
2486 }
2487 return false;
2488 }
2489
2490 //----------------------------scaled_iv------------------------
2491 // Match: k*iv where k is a constant that's not zero
2492 bool SWPointer::scaled_iv(Node* n) {
2493 if (_scale != 0) {
2494 return false; // already found a scale
2495 }
2496 if (n == iv()) {
2497 _scale = 1;
2498 return true;
2499 }
2500 if (_analyze_only && (invariant(n) == false)) {
2501 _nstack->push(n, _stack_idx++);
2502 }
2503 int opc = n->Opcode();
2504 if (opc == Op_MulI) {
2505 if (n->in(1) == iv() && n->in(2)->is_Con()) {
2506 _scale = n->in(2)->get_int();
2507 return true;
2508 } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
2509 _scale = n->in(1)->get_int();
2510 return true;
2511 }
2512 } else if (opc == Op_LShiftI) {
2513 if (n->in(1) == iv() && n->in(2)->is_Con()) {
2514 _scale = 1 << n->in(2)->get_int();
2515 return true;
2516 }
2517 } else if (opc == Op_ConvI2L) {
2518 if (scaled_iv_plus_offset(n->in(1))) {
2519 return true;
2520 }
2521 } else if (opc == Op_LShiftL) {
2522 if (!has_iv() && _invar == NULL) {
2540 //----------------------------offset_plus_k------------------------
2541 // Match: offset is (k [+/- invariant])
2542 // where k maybe zero and invariant is optional, but not both.
2543 bool SWPointer::offset_plus_k(Node* n, bool negate) {
2544 int opc = n->Opcode();
2545 if (opc == Op_ConI) {
2546 _offset += negate ? -(n->get_int()) : n->get_int();
2547 return true;
2548 } else if (opc == Op_ConL) {
2549 // Okay if value fits into an int
2550 const TypeLong* t = n->find_long_type();
2551 if (t->higher_equal(TypeLong::INT)) {
2552 jlong loff = n->get_long();
2553 jint off = (jint)loff;
2554 _offset += negate ? -off : loff;
2555 return true;
2556 }
2557 return false;
2558 }
2559 if (_invar != NULL) return false; // already have an invariant
2560 if (_analyze_only && (invariant(n) == false)) {
2561 _nstack->push(n, _stack_idx++);
2562 }
2563 if (opc == Op_AddI) {
2564 if (n->in(2)->is_Con() && invariant(n->in(1))) {
2565 _negate_invar = negate;
2566 _invar = n->in(1);
2567 _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
2568 return true;
2569 } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
2570 _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
2571 _negate_invar = negate;
2572 _invar = n->in(2);
2573 return true;
2574 }
2575 }
2576 if (opc == Op_SubI) {
2577 if (n->in(2)->is_Con() && invariant(n->in(1))) {
2578 _negate_invar = negate;
2579 _invar = n->in(1);
2580 _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
2581 return true;
2582 } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
|