src/share/vm/opto/superword.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File
*** old/src/share/vm/opto/superword.cpp	Sat Jun  2 20:04:20 2012
--- new/src/share/vm/opto/superword.cpp	Sat Jun  2 20:04:20 2012

*** 1,7 **** --- 1,7 ---- /* ! * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation.
*** 65,74 **** --- 65,78 ---- _iv(NULL) // induction var {} //------------------------------transform_loop--------------------------- void SuperWord::transform_loop(IdealLoopTree* lpt) { + assert(UseSuperWord, "should be"); + // Do vectors exist on this architecture? + if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; + assert(lpt->_head->is_CountedLoop(), "must be"); CountedLoopNode *cl = lpt->_head->as_CountedLoop(); if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
*** 87,99 **** --- 91,100 ---- CountedLoopEndNode* pre_end = get_pre_loop_end(cl); if (pre_end == NULL) return; Node *pre_opaq1 = pre_end->limit(); if (pre_opaq1->Opcode() != Op_Opaque1) return; // Do vectors exist on this architecture? if (vector_width_in_bytes() == 0) return; init(); // initialize data structures set_lpt(lpt); set_lp(cl);
*** 175,241 **** --- 176,321 ---- void SuperWord::find_adjacent_refs() { // Get list of memory operations Node_List memops; for (int i = 0; i < _block.length(); i++) { Node* n = _block.at(i); ! if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) && is_java_primitive(n->as_Mem()->memory_type())) { int align = memory_alignment(n->as_Mem(), 0); if (align != bottom_align) { memops.push(n); } } } if (memops.size() == 0) return; // Find a memory reference to align to. The pre-loop trip count // is modified to align this reference to a vector-aligned address ! find_align_to_ref(memops); ! if (align_to_ref() == NULL) return; + Node_List align_to_refs; + const Type* best_vt = NULL; ! int best_iv_adjustment = 0; ! MemNode* best_align_to_mem_ref = NULL; SWPointer align_to_ref_p(align_to_ref(), this); int offset = align_to_ref_p.offset_in_bytes(); ! int scale = align_to_ref_p.scale_in_bytes(); ! int vw = vector_width_in_bytes(); ! int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; ! int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; + while (memops.size() != 0) { + // Find a memory reference to align to. ! MemNode* mem_ref = find_align_to_ref(memops); ! if (mem_ref == NULL) break; ! align_to_refs.push(mem_ref); ! const Type* vt = velt_type(mem_ref); + int iv_adjustment = get_iv_adjustment(mem_ref); #ifndef PRODUCT if (TraceSuperWord) tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d scale = %d iv_stride = %d", offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride()); #endif + if (best_align_to_mem_ref == NULL) { + // Set memory reference which is the best from all memory operations + // to be used for alignment. The pre-loop trip count is modified to align + // this reference to a vector-aligned address. + best_vt = vt; + best_align_to_mem_ref = mem_ref; + best_iv_adjustment = iv_adjustment; + } // Set alignment relative to "align_to_ref" + SWPointer align_to_ref_p(mem_ref, this); + // Set alignment relative to "align_to_ref" for all related memory operations. for (int i = memops.size() - 1; i >= 0; i--) { MemNode* s = memops.at(i)->as_Mem(); + if (isomorphic(s, mem_ref)) { SWPointer p2(s, this); if (p2.comparable(align_to_ref_p)) { int align = memory_alignment(s, iv_adjustment); set_alignment(s, align); } else { memops.remove(i); } } + } ! // Create initial pack pairs of memory operations for which + // alignment is set and vectors will be aligned. + bool create_pack = true; + if (memory_alignment(mem_ref, best_iv_adjustment) != 0) { + if (vt == best_vt) { + // Can't allow vectorization of unaligned memory accesses with the + // same type since it could be overlapped accesses to the same array. + create_pack = false; + } else { + // Allow independent (different type) unaligned memory operations + // if HW supports them. + if (!Matcher::misaligned_vectors_ok()) { + create_pack = false; + } else { + // Check if packs of the same memory type but + // with a different alignment were created before. + for (uint i = 0; i < align_to_refs.size(); i++) { + MemNode* mr = align_to_refs.at(i)->as_Mem(); + if (velt_type(mr) == vt && memory_alignment(mr, iv_adjustment) != 0) + create_pack = false; + } + } + } + } + if (create_pack) { for (uint i = 0; i < memops.size(); i++) { Node* s1 = memops.at(i); + int align = alignment(s1); + if (align == top_align) continue; for (uint j = 0; j < memops.size(); j++) { Node* s2 = memops.at(j); + if (alignment(s2) == top_align) continue; if (s1 != s2 && are_adjacent_refs(s1, s2)) { int align = alignment(s1); if (stmts_can_pack(s1, s2, align)) { Node_List* pair = new Node_List(); pair->push(s1); pair->push(s2); _packset.append(pair); } } } } + } else { // Don't create unaligned pack + // First, remove remaining memory ops of the same type from the list. + for (int i = memops.size() - 1; i >= 0; i--) { + MemNode* s = memops.at(i)->as_Mem(); + if (velt_type(s) == vt) { + memops.remove(i); + } + } + // Second, removed already constructed packs of the same type. + for (int i = _packset.length() - 1; i >= 0; i--) { + Node_List* p = _packset.at(i); + MemNode* s = p->at(0)->as_Mem(); + if (velt_type(s) == vt) { + remove_pack_at(i); + } + } + + // If needed find the best memory reference for loop alignment again. + if (best_vt == vt) { + // Put memory ops from remaining packs back on memops list for + // the best alignment search. + uint orig_msize = memops.size(); + for (int i = 0; i < _packset.length(); i++) { + Node_List* p = _packset.at(i); + MemNode* s = p->at(0)->as_Mem(); + assert(velt_type(s) != vt, "sanity"); + memops.push(s); + } + MemNode* best_align_to_mem_ref = find_align_to_ref(memops); + if (best_align_to_mem_ref == NULL) break; + best_vt = velt_type(best_align_to_mem_ref); + best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); + // Restore list. + while (memops.size() > orig_msize) + (void)memops.pop(); + } + } // unaligned memory accesses + + // Remove used mem nodes + for (int i = memops.size() - 1; i >= 0; i--) { + MemNode* m = memops.at(i)->as_Mem(); + if (alignment(m) != top_align) { + memops.remove(i); + } + } + + } // while (memops.size() != 0 + set_align_to_ref(best_align_to_mem_ref); + #ifndef PRODUCT if (TraceSuperWord) { tty->print_cr("\nAfter find_adjacent_refs"); print_packset(); }
*** 244,254 **** --- 324,334 ---- //------------------------------find_align_to_ref--------------------------- // Find a memory reference to align the loop induction variable to. // Looks first at stores then at loads, looking for a memory reference // with the largest number of references similar to it. ! void SuperWord::find_align_to_ref(Node_List &memops) { ! MemNode* SuperWord::find_align_to_ref(Node_List &memops) { GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0); // Count number of comparable memory ops for (uint i = 0; i < memops.size(); i++) { MemNode* s1 = memops.at(i)->as_Mem();
*** 268,291 **** --- 348,379 ---- } } } } ! // Find Store (or Load) with the greatest number of "comparable" references, + // biggest vector size, smallest data size and smallest iv offset. int max_ct = 0; + int max_vw = 0; int max_idx = -1; int min_size = max_jint; int min_iv_offset = max_jint; for (uint j = 0; j < memops.size(); j++) { MemNode* s = memops.at(j)->as_Mem(); if (s->is_Store()) { + int vw = vector_width_in_bytes(velt_basic_type(s)); + assert(vw > 1, "sanity"); SWPointer p(s, this); if (cmp_ct.at(j) > max_ct || - cmp_ct.at(j) == max_ct && (data_size(s) < min_size || + (vw > max_vw || + vw == max_vw && + (data_size(s) < min_size || data_size(s) == min_size && ! p.offset_in_bytes() < min_iv_offset)) { ! (p.offset_in_bytes() < min_iv_offset)))) { max_ct = cmp_ct.at(j); + max_vw = vw; max_idx = j; min_size = data_size(s); min_iv_offset = p.offset_in_bytes(); } }
*** 293,328 **** --- 381,430 ---- // If no stores, look at loads if (max_ct == 0) { for (uint j = 0; j < memops.size(); j++) { MemNode* s = memops.at(j)->as_Mem(); if (s->is_Load()) { + int vw = vector_width_in_bytes(velt_basic_type(s)); + assert(vw > 1, "sanity"); SWPointer p(s, this); if (cmp_ct.at(j) > max_ct || - cmp_ct.at(j) == max_ct && (data_size(s) < min_size || + (vw > max_vw || + vw == max_vw && + (data_size(s) < min_size || data_size(s) == min_size && ! p.offset_in_bytes() < min_iv_offset)) { ! (p.offset_in_bytes() < min_iv_offset)))) { max_ct = cmp_ct.at(j); + max_vw = vw; max_idx = j; min_size = data_size(s); min_iv_offset = p.offset_in_bytes(); } } } } if (max_ct > 0) set_align_to_ref(memops.at(max_idx)->as_Mem()); #ifndef PRODUCT + #ifdef ASSERT if (TraceSuperWord && Verbose) { tty->print_cr("\nVector memops after find_align_to_refs"); for (uint i = 0; i < memops.size(); i++) { MemNode* s = memops.at(i)->as_Mem(); s->dump(); } } #endif + + if (max_ct > 0) { + #ifdef ASSERT + if (TraceSuperWord) { + tty->print("\nVector align to node: "); + memops.at(max_idx)->as_Mem()->dump(); + } + #endif + return memops.at(max_idx)->as_Mem(); + } + return NULL; } //------------------------------ref_is_alignable--------------------------- // Can the preloop align the reference to position zero in the vector? bool SuperWord::ref_is_alignable(SWPointer& p) {
*** 339,349 **** --- 441,453 ---- if (ABS(span) == p.memory_size()) return true; // If initial offset from start of object is computable, // compute alignment within the vector. ! int vw = vector_width_in_bytes(); ! BasicType bt = velt_basic_type(p.mem()); + int vw = vector_width_in_bytes(bt); + assert(vw > 1, "sanity"); if (vw % span == 0) { Node* init_nd = pre_end->init_trip(); if (init_nd->is_Con() && p.invar() == NULL) { int init = init_nd->bottom_type()->is_int()->get_con();
*** 359,368 **** --- 463,492 ---- } } return false; } + //---------------------------get_iv_adjustment--------------------------- + // Calculate loop's iv adjustment for this memory ops. + int SuperWord::get_iv_adjustment(MemNode* mem_ref) { + SWPointer align_to_ref_p(mem_ref, this); + int offset = align_to_ref_p.offset_in_bytes(); + int scale = align_to_ref_p.scale_in_bytes(); + BasicType bt = velt_basic_type(mem_ref); + int vw = vector_width_in_bytes(bt); + assert(vw > 1, "sanity"); + int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; + int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; + + #ifndef PRODUCT + if (TraceSuperWord) + tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d", + offset, iv_adjustment, align_to_ref_p.memory_size(), scale, iv_stride(), vw); + #endif + return iv_adjustment; + } + //---------------------------dependence_graph--------------------------- // Construct dependency graph. // Add dependence edges to load/store nodes for memory dependence // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x) void SuperWord::dependence_graph() {
*** 486,498 **** --- 610,626 ---- // Can s1 and s2 be in a pack with s1 immediately preceding s2 and // s1 aligned at "align" bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { // Do not use superword for non-primitives if((s1->is_Mem() && !is_java_primitive(s1->as_Mem()->memory_type())) || (s2->is_Mem() && !is_java_primitive(s2->as_Mem()->memory_type()))) + BasicType bt1 = velt_basic_type(s1); + BasicType bt2 = velt_basic_type(s2); + if(!is_java_primitive(bt1) || !is_java_primitive(bt2)) return false; + if (Matcher::max_vector_size(bt1) < 2) { + return false; // No vectors for this type + } if (isomorphic(s1, s2)) { if (independent(s1, s2)) { if (!exists_at(s1, 0) && !exists_at(s2, 1)) { if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
*** 593,610 **** --- 721,740 ---- } //------------------------------set_alignment--------------------------- void SuperWord::set_alignment(Node* s1, Node* s2, int align) { set_alignment(s1, align); + if (align == top_align || align == bottom_align) { + set_alignment(s2, align); + } else { set_alignment(s2, align + data_size(s1)); + } } //------------------------------data_size--------------------------- int SuperWord::data_size(Node* s) { ! const Type* t = velt_type(s); BasicType bt = t->array_element_basic_type(); int bsize = type2aelembytes(bt); ! int bsize = type2aelembytes(velt_basic_type(s)); assert(bsize != 0, "valid size"); return bsize; } //------------------------------extend_packlist---------------------------
*** 629,641 **** --- 759,771 ---- } //------------------------------follow_use_defs--------------------------- // Extend the packset by visiting operand definitions of nodes in pack p bool SuperWord::follow_use_defs(Node_List* p) { + assert(p->size() == 2, "just checking"); Node* s1 = p->at(0); Node* s2 = p->at(1); assert(p->size() == 2, "just checking"); assert(s1->req() == s2->req(), "just checking"); assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); if (s1->is_Load()) return false;
*** 716,753 **** --- 846,889 ---- uint i2 = 0; do { for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; if (i1 != i2) { + if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) { + // Further analysis relies on operands position matching. + u2->swap_edges(i1, i2); + } else { return false; } + } } while (i1 < ct); return true; } //------------------------------est_savings--------------------------- // Estimate the savings from executing s1 and s2 as a pack int SuperWord::est_savings(Node* s1, Node* s2) { ! int save_in = 2 - 1; // 2 operations per instruction in packed form // inputs for (uint i = 1; i < s1->req(); i++) { Node* x1 = s1->in(i); Node* x2 = s2->in(i); if (x1 != x2) { if (are_adjacent_refs(x1, x2)) { ! save_in += adjacent_profit(x1, x2); } else if (!in_packset(x1, x2)) { ! save_in -= pack_cost(2); } else { ! save_in += unpack_cost(2); } } } // uses of result uint ct = 0; + int save_use = 0; for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { Node* s1_use = s1->fast_out(i); for (int j = 0; j < _packset.length(); j++) { Node_List* p = _packset.at(j); if (p->at(0) == s1_use) {
*** 754,775 **** --- 890,911 ---- for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { Node* s2_use = s2->fast_out(k); if (p->at(p->size()-1) == s2_use) { ct++; if (are_adjacent_refs(s1_use, s2_use)) { ! save_use += adjacent_profit(s1_use, s2_use); } } } } } } ! if (ct < s1->outcnt()) save_use += unpack_cost(1); ! if (ct < s2->outcnt()) save_use += unpack_cost(1); ! return MAX2(save_in, save_use); } //------------------------------costs--------------------------- int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } int SuperWord::pack_cost(int ct) { return ct; }
*** 776,805 **** --- 912,974 ---- int SuperWord::unpack_cost(int ct) { return ct; } //------------------------------combine_packs--------------------------- // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last void SuperWord::combine_packs() { ! bool changed = true; do { + // Combine packs regardless max vector size. + while (changed) { changed = false; for (int i = 0; i < _packset.length(); i++) { Node_List* p1 = _packset.at(i); if (p1 == NULL) continue; for (int j = 0; j < _packset.length(); j++) { Node_List* p2 = _packset.at(j); if (p2 == NULL) continue; + if (i == j) continue; if (p1->at(p1->size()-1) == p2->at(0)) { for (uint k = 1; k < p2->size(); k++) { p1->push(p2->at(k)); } _packset.at_put(j, NULL); changed = true; } } } } while (changed); + } + // Split packs which have size greater then max vector size. + for (int i = 0; i < _packset.length(); i++) { + Node_List* p1 = _packset.at(i); + if (p1 != NULL) { + BasicType bt = velt_basic_type(p1->at(0)); + uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector + assert(is_power_of_2(max_vlen), "sanity"); + uint psize = p1->size(); + if (!is_power_of_2(psize)) { + // Skip pack which can't be vector. + // case1: for(...) { a[i] = i; } elements values are different (i+x) + // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store + _packset.at_put(i, NULL); + continue; + } + if (psize > max_vlen) { + Node_List* pack = new Node_List(); + for (uint j = 0; j < psize; j++) { + pack->push(p1->at(j)); + if (pack->size() >= max_vlen) { + assert(is_power_of_2(pack->size()), "sanity"); + _packset.append(pack); + pack = new Node_List(); + } + } + _packset.at_put(i, NULL); + } + } + } + + // Compress list. for (int i = _packset.length() - 1; i >= 0; i--) { Node_List* p1 = _packset.at(i); if (p1 == NULL) { _packset.remove_at(i); }
*** 878,889 **** --- 1047,1057 ---- //------------------------------implemented--------------------------- // Can code be generated for pack p? bool SuperWord::implemented(Node_List* p) { Node* p0 = p->at(0); ! int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0)); return vopc > 0 && Matcher::has_match_rule(vopc); ! return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0)); } //------------------------------profitable--------------------------- // For pack p, are all operands and all uses (with in the block) vector? bool SuperWord::profitable(Node_List* p) {
*** 937,977 **** --- 1105,1150 ---- co_locate_pack(_packset.at(i)); } } //-------------------------------remove_and_insert------------------- ! //remove "current" from its current position in the memory graph and insert //it after the appropriate insertion point (lip or uip) ! // Remove "current" from its current position in the memory graph and insert + // it after the appropriate insertion point (lip or uip). void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, Node *uip, Unique_Node_List &sched_before) { Node* my_mem = current->in(MemNode::Memory); ! _igvn.hash_delete(current); _igvn.hash_delete(my_mem); ! bool sched_up = sched_before.member(current); ! // remove current_store from its current position in the memmory graph for (DUIterator i = current->outs(); current->has_out(i); i++) { Node* use = current->out(i); if (use->is_Mem()) { assert(use->in(MemNode::Memory) == current, "must be"); _igvn.hash_delete(use); if (use == prev) { // connect prev to my_mem + _igvn.hash_delete(use); use->set_req(MemNode::Memory, my_mem); + _igvn._worklist.push(use); + --i; //deleted this edge; rescan position } else if (sched_before.member(use)) { _igvn.hash_delete(uip); + if (!sched_up) { // Will be moved together with current + _igvn.hash_delete(use); use->set_req(MemNode::Memory, uip); + _igvn._worklist.push(use); + --i; //deleted this edge; rescan position + } } else { _igvn.hash_delete(lip); + if (sched_up) { // Will be moved together with current + _igvn.hash_delete(use); use->set_req(MemNode::Memory, lip); } _igvn._worklist.push(use); --i; //deleted this edge; rescan position } } + } + } bool sched_up = sched_before.member(current); Node *insert_pt = sched_up ? uip : lip; _igvn.hash_delete(insert_pt); // all uses of insert_pt's memory state should use current's instead for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { Node* use = insert_pt->out(i); if (use->is_Mem()) {
*** 980,1000 **** --- 1153,1174 ---- use->set_req(MemNode::Memory, current); _igvn._worklist.push(use); --i; //deleted this edge; rescan position } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) { uint pos; //lip (lower insert point) must be the last one in the memory slice _igvn.hash_delete(use); for (pos=1; pos < use->req(); pos++) { if (use->in(pos) == insert_pt) break; } + _igvn.hash_delete(use); use->set_req(pos, current); _igvn._worklist.push(use); --i; } } //connect current to insert_pt + _igvn.hash_delete(current); current->set_req(MemNode::Memory, insert_pt); _igvn._worklist.push(current); } //------------------------------co_locate_pack----------------------------------
*** 1029,1039 **** --- 1203,1213 ---- for (DUIterator i = current->outs(); current->has_out(i); i++) { Node* use = current->out(i); if (use->is_Mem() && use != previous) memops.push(use); } ! if (current == first) break; previous = current; current = current->in(MemNode::Memory)->as_Mem(); } // determine which memory operations should be scheduled before the pack
*** 1042,1090 **** --- 1216,1271 ---- if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) { for (uint j = 0; j< i; j++) { Node *s2 = memops.at(j); if (!independent(s1, s2)) { if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { ! schedule_before_pack.push(s1); // s1 must be scheduled before Node_List* mem_pk = my_pack(s1); if (mem_pk != NULL) { for (uint ii = 0; ii < mem_pk->size(); ii++) { Node* s = mem_pk->at(ii); // follow partner if (memops.member(s) && !schedule_before_pack.member(s)) schedule_before_pack.push(s); } } + break; } } } } } MemNode* lower_insert_pt = last; Node* upper_insert_pt = first->in(MemNode::Memory); + // Following code moves loads connected to upper_insert_pt below aliased stores. + // Collect such loads here and reconnect them back to upper_insert_pt later. + memops.clear(); + for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) { + Node* use = upper_insert_pt->out(i); + if (!use->is_Store()) + memops.push(use); + } + + MemNode* lower_insert_pt = last; previous = last; //previous store in pk current = last->in(MemNode::Memory)->as_Mem(); ! // start scheduling from "last" to "first" while (true) { assert(in_bb(current), "stay in block"); assert(in_pack(previous, pk), "previous stays in pack"); Node* my_mem = current->in(MemNode::Memory); if (in_pack(current, pk)) { // Forward users of my memory state (except "previous) to my input memory state _igvn.hash_delete(current); for (DUIterator i = current->outs(); current->has_out(i); i++) { Node* use = current->out(i); if (use->is_Mem() && use != previous) { assert(use->in(MemNode::Memory) == current, "must be"); _igvn.hash_delete(use); if (schedule_before_pack.member(use)) { _igvn.hash_delete(upper_insert_pt); use->set_req(MemNode::Memory, upper_insert_pt); } else { _igvn.hash_delete(lower_insert_pt); use->set_req(MemNode::Memory, lower_insert_pt); } _igvn._worklist.push(use); --i; // deleted this edge; rescan position }
*** 1095,1104 **** --- 1276,1295 ---- } if (current == first) break; current = my_mem->as_Mem(); } // end while + + // Reconect loads back to upper_insert_pt. + for (uint i = 0; i < memops.size(); i++) { + Node *ld = memops.at(i); + if (ld->in(MemNode::Memory) != upper_insert_pt) { + _igvn.hash_delete(ld); + ld->set_req(MemNode::Memory, upper_insert_pt); + _igvn._worklist.push(ld); + } + } } else if (pk->at(0)->is_Load()) { //load // all loads in the pack should have the same memory state. By default, // we use the memory state of the last load. However, if any load could // not be moved down due to the dependence constraint, we use the memory // state of the first load.
*** 1157,1204 **** --- 1348,1396 ---- if (p && n == executed_last(p)) { uint vlen = p->size(); Node* vn = NULL; Node* low_adr = p->at(0); Node* first = executed_first(p); if (n->is_Load()) { int opc = n->Opcode(); + if (n->is_Load()) { Node* ctl = n->in(MemNode::Control); Node* mem = first->in(MemNode::Memory); Node* adr = low_adr->in(MemNode::Address); const TypePtr* atyp = n->adr_type(); ! vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen); ! vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n)); } else if (n->is_Store()) { // Promote value to be stored to vector Node* val = vector_opd(p, MemNode::ValueIn); int opc = n->Opcode(); Node* ctl = n->in(MemNode::Control); Node* mem = first->in(MemNode::Memory); Node* adr = low_adr->in(MemNode::Address); const TypePtr* atyp = n->adr_type(); ! vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); ! vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); } else if (n->req() == 3) { // Promote operands to vector Node* in1 = vector_opd(p, 1); Node* in2 = vector_opd(p, 2); ! vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n)); ! vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); } else { ShouldNotReachHere(); } + assert(vn != NULL, "sanity"); _phase->_igvn.register_new_node_with_optimizer(vn); _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); for (uint j = 0; j < p->size(); j++) { Node* pm = p->at(j); _igvn.replace_node(pm, vn); } _igvn._worklist.push(vn); + #ifdef ASSERT + if (TraceSuperWord) { + tty->print("\nnew Vector node: "); + vn->dump(); } + #endif } + } } //------------------------------vector_opd--------------------------- // Create a vector operand for the nodes in pack p for operand: in(opd_idx) Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
*** 1215,1228 **** --- 1407,1420 ---- break; } } if (same_opd) { ! if (opd->is_Vector() || opd->is_VectorLoad()) { ! if (opd->is_Vector() || opd->is_LoadVector()) { return opd; // input is matching vector } ! assert(!opd->is_VectorStore(), "such vector is not expected here"); ! assert(!opd->is_StoreVector(), "such vector is not expected here"); // Convert scalar input to vector with the same number of elements as // p0's vector. Use p0's type because size of operand's container in // vector should match p0's size regardless operand's size. const Type* p0_t = velt_type(p0); VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t);
*** 1231,1250 **** --- 1423,1442 ---- _phase->set_ctrl(vn, _phase->get_ctrl(opd)); return vn; } // Insert pack operation ! const Type* p0_t = velt_type(p0); ! PackNode* pk = PackNode::make(_phase->C, opd, p0_t); ! BasicType bt = velt_basic_type(p0); ! PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt); DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); ) for (uint i = 1; i < vlen; i++) { Node* pi = p->at(i); Node* in = pi->in(opd_idx); assert(my_pack(in) == NULL, "Should already have been unpacked"); assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); ! pk->add_opd(i, in); } _phase->_igvn.register_new_node_with_optimizer(pk); _phase->set_ctrl(pk, _phase->get_ctrl(opd)); return pk; }
*** 1282,1302 **** --- 1474,1493 ---- // Insert extract operation _igvn.hash_delete(def); _igvn.hash_delete(use); int def_pos = alignment(def) / data_size(def); const Type* def_t = velt_type(def); ! Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t); ! Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def)); _phase->_igvn.register_new_node_with_optimizer(ex); _phase->set_ctrl(ex, _phase->get_ctrl(def)); use->set_req(idx, ex); _igvn._worklist.push(def); _igvn._worklist.push(use); bb_insert_after(ex, bb_idx(def)); ! set_velt_type(ex, def_t); ! set_velt_type(ex, velt_type(def)); } } //------------------------------is_vector_use--------------------------- // Is use->in(u_idx) a vector use?
*** 1585,1598 **** --- 1776,1793 ---- int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { SWPointer p(s, this); if (!p.valid()) { return bottom_align; } + int vw = vector_width_in_bytes(velt_basic_type(s)); + if (vw < 2) { + return bottom_align; // No vectors for this type + } int offset = p.offset_in_bytes(); offset += iv_adjust_in_bytes; ! int off_rem = offset % vector_width_in_bytes(); ! int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes(); ! int off_rem = offset % vw; ! int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; return off_mod; } //---------------------------container_type--------------------------- // Smallest type containing range of values
*** 1613,1623 **** --- 1808,1819 ---- //-------------------------vector_opd_range----------------------- // (Start, end] half-open range defining which operands are vector void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) { switch (n->Opcode()) { ! case Op_LoadB: case Op_LoadUS: ! case Op_LoadB: case Op_LoadUB: + case Op_LoadS: case Op_LoadUS: case Op_LoadI: case Op_LoadL: case Op_LoadF: case Op_LoadD: case Op_LoadP: *start = 0; *end = 0;
*** 1731,1740 **** --- 1927,1937 ---- // pre-loop Opaque1 node. Node *orig_limit = pre_opaq->original_loop_limit(); assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); SWPointer align_to_ref_p(align_to_ref, this); + assert(align_to_ref_p.valid(), "sanity"); // Given: // lim0 == original pre loop limit // V == v_align (power of 2) // invar == extra invariant piece of the address expression
*** 1783,1796 **** --- 1980,1995 ---- // Solving for lim: // (e - lim0 + N) % V == 0 // N = (V - (e - lim0)) % V // lim = lim0 - (V - (e - lim0)) % V + int vw = vector_width_in_bytes(velt_basic_type(align_to_ref)); + assert(vw > 1, "sanity"); int stride = iv_stride(); int scale = align_to_ref_p.scale_in_bytes(); int elt_size = align_to_ref_p.memory_size(); ! int v_align = vector_width_in_bytes() / elt_size; ! int v_align = vw / elt_size; int k = align_to_ref_p.offset_in_bytes() / elt_size; Node *kn = _igvn.intcon(k); Node *e = kn;
*** 1805,1814 **** --- 2004,2032 ---- } else { e = new (_phase->C, 3) AddINode(e, aref); } _phase->_igvn.register_new_node_with_optimizer(e); _phase->set_ctrl(e, pre_ctrl); + } + if (vw > ObjectAlignmentInBytes) { + // incorporate base e +/- base && Mask >>> log2(elt) + Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw))); + Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); + _phase->_igvn.register_new_node_with_optimizer(xbase); + Node* masked_xbase = new (_phase->C, 3) AndXNode(xbase, mask); + _phase->_igvn.register_new_node_with_optimizer(masked_xbase); + #ifdef _LP64 + masked_xbase = new (_phase->C, 2) ConvL2INode(masked_xbase); + _phase->_igvn.register_new_node_with_optimizer(masked_xbase); + #endif + Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); + Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); + _phase->_igvn.register_new_node_with_optimizer(bref); + _phase->set_ctrl(bref, pre_ctrl); + e = new (_phase->C, 3) AddINode(e, bref); + _phase->_igvn.register_new_node_with_optimizer(e); + _phase->set_ctrl(e, pre_ctrl); } // compute e +/- lim0 if (scale < 0) { e = new (_phase->C, 3) SubINode(e, lim0);

src/share/vm/opto/superword.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File