src/share/vm/opto/superword.cpp
Index
Unified diffs
Context diffs
Sdiffs
Wdiffs
Patch
New
Old
Previous File
Next File
*** old/src/share/vm/opto/superword.cpp Sat Jun 2 20:04:20 2012
--- new/src/share/vm/opto/superword.cpp Sat Jun 2 20:04:20 2012
*** 1,7 ****
--- 1,7 ----
/*
! * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
! * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*** 65,74 ****
--- 65,78 ----
_iv(NULL) // induction var
{}
//------------------------------transform_loop---------------------------
void SuperWord::transform_loop(IdealLoopTree* lpt) {
+ assert(UseSuperWord, "should be");
+ // Do vectors exist on this architecture?
+ if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
+
assert(lpt->_head->is_CountedLoop(), "must be");
CountedLoopNode *cl = lpt->_head->as_CountedLoop();
if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
*** 87,99 ****
--- 91,100 ----
CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
if (pre_end == NULL) return;
Node *pre_opaq1 = pre_end->limit();
if (pre_opaq1->Opcode() != Op_Opaque1) return;
// Do vectors exist on this architecture?
if (vector_width_in_bytes() == 0) return;
init(); // initialize data structures
set_lpt(lpt);
set_lp(cl);
*** 175,241 ****
--- 176,321 ----
void SuperWord::find_adjacent_refs() {
// Get list of memory operations
Node_List memops;
for (int i = 0; i < _block.length(); i++) {
Node* n = _block.at(i);
! if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
is_java_primitive(n->as_Mem()->memory_type())) {
int align = memory_alignment(n->as_Mem(), 0);
if (align != bottom_align) {
memops.push(n);
}
}
}
if (memops.size() == 0) return;
// Find a memory reference to align to. The pre-loop trip count
// is modified to align this reference to a vector-aligned address
! find_align_to_ref(memops);
! if (align_to_ref() == NULL) return;
+ Node_List align_to_refs;
+ const Type* best_vt = NULL;
! int best_iv_adjustment = 0;
! MemNode* best_align_to_mem_ref = NULL;
SWPointer align_to_ref_p(align_to_ref(), this);
int offset = align_to_ref_p.offset_in_bytes();
! int scale = align_to_ref_p.scale_in_bytes();
! int vw = vector_width_in_bytes();
! int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
! int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw;
+ while (memops.size() != 0) {
+ // Find a memory reference to align to.
! MemNode* mem_ref = find_align_to_ref(memops);
! if (mem_ref == NULL) break;
! align_to_refs.push(mem_ref);
! const Type* vt = velt_type(mem_ref);
+ int iv_adjustment = get_iv_adjustment(mem_ref);
#ifndef PRODUCT
if (TraceSuperWord)
tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d scale = %d iv_stride = %d",
offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride());
#endif
+ if (best_align_to_mem_ref == NULL) {
+ // Set memory reference which is the best from all memory operations
+ // to be used for alignment. The pre-loop trip count is modified to align
+ // this reference to a vector-aligned address.
+ best_vt = vt;
+ best_align_to_mem_ref = mem_ref;
+ best_iv_adjustment = iv_adjustment;
+ }
// Set alignment relative to "align_to_ref"
+ SWPointer align_to_ref_p(mem_ref, this);
+ // Set alignment relative to "align_to_ref" for all related memory operations.
for (int i = memops.size() - 1; i >= 0; i--) {
MemNode* s = memops.at(i)->as_Mem();
+ if (isomorphic(s, mem_ref)) {
SWPointer p2(s, this);
if (p2.comparable(align_to_ref_p)) {
int align = memory_alignment(s, iv_adjustment);
set_alignment(s, align);
} else {
memops.remove(i);
}
}
+ }
! // Create initial pack pairs of memory operations for which
+ // alignment is set and vectors will be aligned.
+ bool create_pack = true;
+ if (memory_alignment(mem_ref, best_iv_adjustment) != 0) {
+ if (vt == best_vt) {
+ // Can't allow vectorization of unaligned memory accesses with the
+ // same type since it could be overlapped accesses to the same array.
+ create_pack = false;
+ } else {
+ // Allow independent (different type) unaligned memory operations
+ // if HW supports them.
+ if (!Matcher::misaligned_vectors_ok()) {
+ create_pack = false;
+ } else {
+ // Check if packs of the same memory type but
+ // with a different alignment were created before.
+ for (uint i = 0; i < align_to_refs.size(); i++) {
+ MemNode* mr = align_to_refs.at(i)->as_Mem();
+ if (velt_type(mr) == vt && memory_alignment(mr, iv_adjustment) != 0)
+ create_pack = false;
+ }
+ }
+ }
+ }
+ if (create_pack) {
for (uint i = 0; i < memops.size(); i++) {
Node* s1 = memops.at(i);
+ int align = alignment(s1);
+ if (align == top_align) continue;
for (uint j = 0; j < memops.size(); j++) {
Node* s2 = memops.at(j);
+ if (alignment(s2) == top_align) continue;
if (s1 != s2 && are_adjacent_refs(s1, s2)) {
int align = alignment(s1);
if (stmts_can_pack(s1, s2, align)) {
Node_List* pair = new Node_List();
pair->push(s1);
pair->push(s2);
_packset.append(pair);
}
}
}
}
+ } else { // Don't create unaligned pack
+ // First, remove remaining memory ops of the same type from the list.
+ for (int i = memops.size() - 1; i >= 0; i--) {
+ MemNode* s = memops.at(i)->as_Mem();
+ if (velt_type(s) == vt) {
+ memops.remove(i);
+ }
+ }
+ // Second, removed already constructed packs of the same type.
+ for (int i = _packset.length() - 1; i >= 0; i--) {
+ Node_List* p = _packset.at(i);
+ MemNode* s = p->at(0)->as_Mem();
+ if (velt_type(s) == vt) {
+ remove_pack_at(i);
+ }
+ }
+
+ // If needed find the best memory reference for loop alignment again.
+ if (best_vt == vt) {
+ // Put memory ops from remaining packs back on memops list for
+ // the best alignment search.
+ uint orig_msize = memops.size();
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p = _packset.at(i);
+ MemNode* s = p->at(0)->as_Mem();
+ assert(velt_type(s) != vt, "sanity");
+ memops.push(s);
+ }
+ MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
+ if (best_align_to_mem_ref == NULL) break;
+ best_vt = velt_type(best_align_to_mem_ref);
+ best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
+ // Restore list.
+ while (memops.size() > orig_msize)
+ (void)memops.pop();
+ }
+ } // unaligned memory accesses
+
+ // Remove used mem nodes
+ for (int i = memops.size() - 1; i >= 0; i--) {
+ MemNode* m = memops.at(i)->as_Mem();
+ if (alignment(m) != top_align) {
+ memops.remove(i);
+ }
+ }
+
+ } // while (memops.size() != 0
+ set_align_to_ref(best_align_to_mem_ref);
+
#ifndef PRODUCT
if (TraceSuperWord) {
tty->print_cr("\nAfter find_adjacent_refs");
print_packset();
}
*** 244,254 ****
--- 324,334 ----
//------------------------------find_align_to_ref---------------------------
// Find a memory reference to align the loop induction variable to.
// Looks first at stores then at loads, looking for a memory reference
// with the largest number of references similar to it.
! void SuperWord::find_align_to_ref(Node_List &memops) {
! MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
// Count number of comparable memory ops
for (uint i = 0; i < memops.size(); i++) {
MemNode* s1 = memops.at(i)->as_Mem();
*** 268,291 ****
--- 348,379 ----
}
}
}
}
! // Find Store (or Load) with the greatest number of "comparable" references,
+ // biggest vector size, smallest data size and smallest iv offset.
int max_ct = 0;
+ int max_vw = 0;
int max_idx = -1;
int min_size = max_jint;
int min_iv_offset = max_jint;
for (uint j = 0; j < memops.size(); j++) {
MemNode* s = memops.at(j)->as_Mem();
if (s->is_Store()) {
+ int vw = vector_width_in_bytes(velt_basic_type(s));
+ assert(vw > 1, "sanity");
SWPointer p(s, this);
if (cmp_ct.at(j) > max_ct ||
- cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
+ (vw > max_vw ||
+ vw == max_vw &&
+ (data_size(s) < min_size ||
data_size(s) == min_size &&
! p.offset_in_bytes() < min_iv_offset)) {
! (p.offset_in_bytes() < min_iv_offset)))) {
max_ct = cmp_ct.at(j);
+ max_vw = vw;
max_idx = j;
min_size = data_size(s);
min_iv_offset = p.offset_in_bytes();
}
}
*** 293,328 ****
--- 381,430 ----
// If no stores, look at loads
if (max_ct == 0) {
for (uint j = 0; j < memops.size(); j++) {
MemNode* s = memops.at(j)->as_Mem();
if (s->is_Load()) {
+ int vw = vector_width_in_bytes(velt_basic_type(s));
+ assert(vw > 1, "sanity");
SWPointer p(s, this);
if (cmp_ct.at(j) > max_ct ||
- cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
+ (vw > max_vw ||
+ vw == max_vw &&
+ (data_size(s) < min_size ||
data_size(s) == min_size &&
! p.offset_in_bytes() < min_iv_offset)) {
! (p.offset_in_bytes() < min_iv_offset)))) {
max_ct = cmp_ct.at(j);
+ max_vw = vw;
max_idx = j;
min_size = data_size(s);
min_iv_offset = p.offset_in_bytes();
}
}
}
}
if (max_ct > 0)
set_align_to_ref(memops.at(max_idx)->as_Mem());
#ifndef PRODUCT
+ #ifdef ASSERT
if (TraceSuperWord && Verbose) {
tty->print_cr("\nVector memops after find_align_to_refs");
for (uint i = 0; i < memops.size(); i++) {
MemNode* s = memops.at(i)->as_Mem();
s->dump();
}
}
#endif
+
+ if (max_ct > 0) {
+ #ifdef ASSERT
+ if (TraceSuperWord) {
+ tty->print("\nVector align to node: ");
+ memops.at(max_idx)->as_Mem()->dump();
+ }
+ #endif
+ return memops.at(max_idx)->as_Mem();
+ }
+ return NULL;
}
//------------------------------ref_is_alignable---------------------------
// Can the preloop align the reference to position zero in the vector?
bool SuperWord::ref_is_alignable(SWPointer& p) {
*** 339,349 ****
--- 441,453 ----
if (ABS(span) == p.memory_size())
return true;
// If initial offset from start of object is computable,
// compute alignment within the vector.
! int vw = vector_width_in_bytes();
! BasicType bt = velt_basic_type(p.mem());
+ int vw = vector_width_in_bytes(bt);
+ assert(vw > 1, "sanity");
if (vw % span == 0) {
Node* init_nd = pre_end->init_trip();
if (init_nd->is_Con() && p.invar() == NULL) {
int init = init_nd->bottom_type()->is_int()->get_con();
*** 359,368 ****
--- 463,492 ----
}
}
return false;
}
+ //---------------------------get_iv_adjustment---------------------------
+ // Calculate loop's iv adjustment for this memory ops.
+ int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
+ SWPointer align_to_ref_p(mem_ref, this);
+ int offset = align_to_ref_p.offset_in_bytes();
+ int scale = align_to_ref_p.scale_in_bytes();
+ BasicType bt = velt_basic_type(mem_ref);
+ int vw = vector_width_in_bytes(bt);
+ assert(vw > 1, "sanity");
+ int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
+ int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw;
+
+ #ifndef PRODUCT
+ if (TraceSuperWord)
+ tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
+ offset, iv_adjustment, align_to_ref_p.memory_size(), scale, iv_stride(), vw);
+ #endif
+ return iv_adjustment;
+ }
+
//---------------------------dependence_graph---------------------------
// Construct dependency graph.
// Add dependence edges to load/store nodes for memory dependence
// A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
void SuperWord::dependence_graph() {
*** 486,498 ****
--- 610,626 ----
// Can s1 and s2 be in a pack with s1 immediately preceding s2 and
// s1 aligned at "align"
bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
// Do not use superword for non-primitives
if((s1->is_Mem() && !is_java_primitive(s1->as_Mem()->memory_type())) ||
(s2->is_Mem() && !is_java_primitive(s2->as_Mem()->memory_type())))
+ BasicType bt1 = velt_basic_type(s1);
+ BasicType bt2 = velt_basic_type(s2);
+ if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
return false;
+ if (Matcher::max_vector_size(bt1) < 2) {
+ return false; // No vectors for this type
+ }
if (isomorphic(s1, s2)) {
if (independent(s1, s2)) {
if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
*** 593,610 ****
--- 721,740 ----
}
//------------------------------set_alignment---------------------------
void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
set_alignment(s1, align);
+ if (align == top_align || align == bottom_align) {
+ set_alignment(s2, align);
+ } else {
set_alignment(s2, align + data_size(s1));
+ }
}
//------------------------------data_size---------------------------
int SuperWord::data_size(Node* s) {
! const Type* t = velt_type(s);
BasicType bt = t->array_element_basic_type();
int bsize = type2aelembytes(bt);
! int bsize = type2aelembytes(velt_basic_type(s));
assert(bsize != 0, "valid size");
return bsize;
}
//------------------------------extend_packlist---------------------------
*** 629,641 ****
--- 759,771 ----
}
//------------------------------follow_use_defs---------------------------
// Extend the packset by visiting operand definitions of nodes in pack p
bool SuperWord::follow_use_defs(Node_List* p) {
+ assert(p->size() == 2, "just checking");
Node* s1 = p->at(0);
Node* s2 = p->at(1);
assert(p->size() == 2, "just checking");
assert(s1->req() == s2->req(), "just checking");
assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
if (s1->is_Load()) return false;
*** 716,753 ****
--- 846,889 ----
uint i2 = 0;
do {
for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
if (i1 != i2) {
+ if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
+ // Further analysis relies on operands position matching.
+ u2->swap_edges(i1, i2);
+ } else {
return false;
}
+ }
} while (i1 < ct);
return true;
}
//------------------------------est_savings---------------------------
// Estimate the savings from executing s1 and s2 as a pack
int SuperWord::est_savings(Node* s1, Node* s2) {
! int save_in = 2 - 1; // 2 operations per instruction in packed form
// inputs
for (uint i = 1; i < s1->req(); i++) {
Node* x1 = s1->in(i);
Node* x2 = s2->in(i);
if (x1 != x2) {
if (are_adjacent_refs(x1, x2)) {
! save_in += adjacent_profit(x1, x2);
} else if (!in_packset(x1, x2)) {
! save_in -= pack_cost(2);
} else {
! save_in += unpack_cost(2);
}
}
}
// uses of result
uint ct = 0;
+ int save_use = 0;
for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
Node* s1_use = s1->fast_out(i);
for (int j = 0; j < _packset.length(); j++) {
Node_List* p = _packset.at(j);
if (p->at(0) == s1_use) {
*** 754,775 ****
--- 890,911 ----
for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) {
Node* s2_use = s2->fast_out(k);
if (p->at(p->size()-1) == s2_use) {
ct++;
if (are_adjacent_refs(s1_use, s2_use)) {
! save_use += adjacent_profit(s1_use, s2_use);
}
}
}
}
}
}
! if (ct < s1->outcnt()) save_use += unpack_cost(1);
! if (ct < s2->outcnt()) save_use += unpack_cost(1);
! return MAX2(save_in, save_use);
}
//------------------------------costs---------------------------
int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }
int SuperWord::pack_cost(int ct) { return ct; }
*** 776,805 ****
--- 912,974 ----
int SuperWord::unpack_cost(int ct) { return ct; }
//------------------------------combine_packs---------------------------
// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
void SuperWord::combine_packs() {
! bool changed = true;
do {
+ // Combine packs regardless max vector size.
+ while (changed) {
changed = false;
for (int i = 0; i < _packset.length(); i++) {
Node_List* p1 = _packset.at(i);
if (p1 == NULL) continue;
for (int j = 0; j < _packset.length(); j++) {
Node_List* p2 = _packset.at(j);
if (p2 == NULL) continue;
+ if (i == j) continue;
if (p1->at(p1->size()-1) == p2->at(0)) {
for (uint k = 1; k < p2->size(); k++) {
p1->push(p2->at(k));
}
_packset.at_put(j, NULL);
changed = true;
}
}
}
} while (changed);
+ }
+ // Split packs which have size greater then max vector size.
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p1 = _packset.at(i);
+ if (p1 != NULL) {
+ BasicType bt = velt_basic_type(p1->at(0));
+ uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
+ assert(is_power_of_2(max_vlen), "sanity");
+ uint psize = p1->size();
+ if (!is_power_of_2(psize)) {
+ // Skip pack which can't be vector.
+ // case1: for(...) { a[i] = i; } elements values are different (i+x)
+ // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store
+ _packset.at_put(i, NULL);
+ continue;
+ }
+ if (psize > max_vlen) {
+ Node_List* pack = new Node_List();
+ for (uint j = 0; j < psize; j++) {
+ pack->push(p1->at(j));
+ if (pack->size() >= max_vlen) {
+ assert(is_power_of_2(pack->size()), "sanity");
+ _packset.append(pack);
+ pack = new Node_List();
+ }
+ }
+ _packset.at_put(i, NULL);
+ }
+ }
+ }
+
+ // Compress list.
for (int i = _packset.length() - 1; i >= 0; i--) {
Node_List* p1 = _packset.at(i);
if (p1 == NULL) {
_packset.remove_at(i);
}
*** 878,889 ****
--- 1047,1057 ----
//------------------------------implemented---------------------------
// Can code be generated for pack p?
bool SuperWord::implemented(Node_List* p) {
Node* p0 = p->at(0);
! int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0));
return vopc > 0 && Matcher::has_match_rule(vopc);
! return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));
}
//------------------------------profitable---------------------------
// For pack p, are all operands and all uses (with in the block) vector?
bool SuperWord::profitable(Node_List* p) {
*** 937,977 ****
--- 1105,1150 ----
co_locate_pack(_packset.at(i));
}
}
//-------------------------------remove_and_insert-------------------
! //remove "current" from its current position in the memory graph and insert
//it after the appropriate insertion point (lip or uip)
! // Remove "current" from its current position in the memory graph and insert
+ // it after the appropriate insertion point (lip or uip).
void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
Node *uip, Unique_Node_List &sched_before) {
Node* my_mem = current->in(MemNode::Memory);
! _igvn.hash_delete(current);
_igvn.hash_delete(my_mem);
! bool sched_up = sched_before.member(current);
! // remove current_store from its current position in the memmory graph
for (DUIterator i = current->outs(); current->has_out(i); i++) {
Node* use = current->out(i);
if (use->is_Mem()) {
assert(use->in(MemNode::Memory) == current, "must be");
_igvn.hash_delete(use);
if (use == prev) { // connect prev to my_mem
+ _igvn.hash_delete(use);
use->set_req(MemNode::Memory, my_mem);
+ _igvn._worklist.push(use);
+ --i; //deleted this edge; rescan position
} else if (sched_before.member(use)) {
_igvn.hash_delete(uip);
+ if (!sched_up) { // Will be moved together with current
+ _igvn.hash_delete(use);
use->set_req(MemNode::Memory, uip);
+ _igvn._worklist.push(use);
+ --i; //deleted this edge; rescan position
+ }
} else {
_igvn.hash_delete(lip);
+ if (sched_up) { // Will be moved together with current
+ _igvn.hash_delete(use);
use->set_req(MemNode::Memory, lip);
}
_igvn._worklist.push(use);
--i; //deleted this edge; rescan position
}
}
+ }
+ }
bool sched_up = sched_before.member(current);
Node *insert_pt = sched_up ? uip : lip;
_igvn.hash_delete(insert_pt);
// all uses of insert_pt's memory state should use current's instead
for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {
Node* use = insert_pt->out(i);
if (use->is_Mem()) {
*** 980,1000 ****
--- 1153,1174 ----
use->set_req(MemNode::Memory, current);
_igvn._worklist.push(use);
--i; //deleted this edge; rescan position
} else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) {
uint pos; //lip (lower insert point) must be the last one in the memory slice
_igvn.hash_delete(use);
for (pos=1; pos < use->req(); pos++) {
if (use->in(pos) == insert_pt) break;
}
+ _igvn.hash_delete(use);
use->set_req(pos, current);
_igvn._worklist.push(use);
--i;
}
}
//connect current to insert_pt
+ _igvn.hash_delete(current);
current->set_req(MemNode::Memory, insert_pt);
_igvn._worklist.push(current);
}
//------------------------------co_locate_pack----------------------------------
*** 1029,1039 ****
--- 1203,1213 ----
for (DUIterator i = current->outs(); current->has_out(i); i++) {
Node* use = current->out(i);
if (use->is_Mem() && use != previous)
memops.push(use);
}
! if (current == first) break;
previous = current;
current = current->in(MemNode::Memory)->as_Mem();
}
// determine which memory operations should be scheduled before the pack
*** 1042,1090 ****
--- 1216,1271 ----
if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) {
for (uint j = 0; j< i; j++) {
Node *s2 = memops.at(j);
if (!independent(s1, s2)) {
if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {
! schedule_before_pack.push(s1); // s1 must be scheduled before
Node_List* mem_pk = my_pack(s1);
if (mem_pk != NULL) {
for (uint ii = 0; ii < mem_pk->size(); ii++) {
Node* s = mem_pk->at(ii); // follow partner
if (memops.member(s) && !schedule_before_pack.member(s))
schedule_before_pack.push(s);
}
}
+ break;
}
}
}
}
}
MemNode* lower_insert_pt = last;
Node* upper_insert_pt = first->in(MemNode::Memory);
+ // Following code moves loads connected to upper_insert_pt below aliased stores.
+ // Collect such loads here and reconnect them back to upper_insert_pt later.
+ memops.clear();
+ for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) {
+ Node* use = upper_insert_pt->out(i);
+ if (!use->is_Store())
+ memops.push(use);
+ }
+
+ MemNode* lower_insert_pt = last;
previous = last; //previous store in pk
current = last->in(MemNode::Memory)->as_Mem();
! // start scheduling from "last" to "first"
while (true) {
assert(in_bb(current), "stay in block");
assert(in_pack(previous, pk), "previous stays in pack");
Node* my_mem = current->in(MemNode::Memory);
if (in_pack(current, pk)) {
// Forward users of my memory state (except "previous) to my input memory state
_igvn.hash_delete(current);
for (DUIterator i = current->outs(); current->has_out(i); i++) {
Node* use = current->out(i);
if (use->is_Mem() && use != previous) {
assert(use->in(MemNode::Memory) == current, "must be");
_igvn.hash_delete(use);
if (schedule_before_pack.member(use)) {
_igvn.hash_delete(upper_insert_pt);
use->set_req(MemNode::Memory, upper_insert_pt);
} else {
_igvn.hash_delete(lower_insert_pt);
use->set_req(MemNode::Memory, lower_insert_pt);
}
_igvn._worklist.push(use);
--i; // deleted this edge; rescan position
}
*** 1095,1104 ****
--- 1276,1295 ----
}
if (current == first) break;
current = my_mem->as_Mem();
} // end while
+
+ // Reconect loads back to upper_insert_pt.
+ for (uint i = 0; i < memops.size(); i++) {
+ Node *ld = memops.at(i);
+ if (ld->in(MemNode::Memory) != upper_insert_pt) {
+ _igvn.hash_delete(ld);
+ ld->set_req(MemNode::Memory, upper_insert_pt);
+ _igvn._worklist.push(ld);
+ }
+ }
} else if (pk->at(0)->is_Load()) { //load
// all loads in the pack should have the same memory state. By default,
// we use the memory state of the last load. However, if any load could
// not be moved down due to the dependence constraint, we use the memory
// state of the first load.
*** 1157,1204 ****
--- 1348,1396 ----
if (p && n == executed_last(p)) {
uint vlen = p->size();
Node* vn = NULL;
Node* low_adr = p->at(0);
Node* first = executed_first(p);
if (n->is_Load()) {
int opc = n->Opcode();
+ if (n->is_Load()) {
Node* ctl = n->in(MemNode::Control);
Node* mem = first->in(MemNode::Memory);
Node* adr = low_adr->in(MemNode::Address);
const TypePtr* atyp = n->adr_type();
! vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen);
! vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
} else if (n->is_Store()) {
// Promote value to be stored to vector
Node* val = vector_opd(p, MemNode::ValueIn);
int opc = n->Opcode();
Node* ctl = n->in(MemNode::Control);
Node* mem = first->in(MemNode::Memory);
Node* adr = low_adr->in(MemNode::Address);
const TypePtr* atyp = n->adr_type();
! vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
! vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
} else if (n->req() == 3) {
// Promote operands to vector
Node* in1 = vector_opd(p, 1);
Node* in2 = vector_opd(p, 2);
! vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n));
! vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n));
} else {
ShouldNotReachHere();
}
+ assert(vn != NULL, "sanity");
_phase->_igvn.register_new_node_with_optimizer(vn);
_phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
for (uint j = 0; j < p->size(); j++) {
Node* pm = p->at(j);
_igvn.replace_node(pm, vn);
}
_igvn._worklist.push(vn);
+ #ifdef ASSERT
+ if (TraceSuperWord) {
+ tty->print("\nnew Vector node: ");
+ vn->dump();
}
+ #endif
}
+ }
}
//------------------------------vector_opd---------------------------
// Create a vector operand for the nodes in pack p for operand: in(opd_idx)
Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
*** 1215,1228 ****
--- 1407,1420 ----
break;
}
}
if (same_opd) {
! if (opd->is_Vector() || opd->is_VectorLoad()) {
! if (opd->is_Vector() || opd->is_LoadVector()) {
return opd; // input is matching vector
}
! assert(!opd->is_VectorStore(), "such vector is not expected here");
! assert(!opd->is_StoreVector(), "such vector is not expected here");
// Convert scalar input to vector with the same number of elements as
// p0's vector. Use p0's type because size of operand's container in
// vector should match p0's size regardless operand's size.
const Type* p0_t = velt_type(p0);
VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t);
*** 1231,1250 ****
--- 1423,1442 ----
_phase->set_ctrl(vn, _phase->get_ctrl(opd));
return vn;
}
// Insert pack operation
! const Type* p0_t = velt_type(p0);
! PackNode* pk = PackNode::make(_phase->C, opd, p0_t);
! BasicType bt = velt_basic_type(p0);
! PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt);
DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
for (uint i = 1; i < vlen; i++) {
Node* pi = p->at(i);
Node* in = pi->in(opd_idx);
assert(my_pack(in) == NULL, "Should already have been unpacked");
assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
! pk->add_opd(i, in);
}
_phase->_igvn.register_new_node_with_optimizer(pk);
_phase->set_ctrl(pk, _phase->get_ctrl(opd));
return pk;
}
*** 1282,1302 ****
--- 1474,1493 ----
// Insert extract operation
_igvn.hash_delete(def);
_igvn.hash_delete(use);
int def_pos = alignment(def) / data_size(def);
const Type* def_t = velt_type(def);
! Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t);
! Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def));
_phase->_igvn.register_new_node_with_optimizer(ex);
_phase->set_ctrl(ex, _phase->get_ctrl(def));
use->set_req(idx, ex);
_igvn._worklist.push(def);
_igvn._worklist.push(use);
bb_insert_after(ex, bb_idx(def));
! set_velt_type(ex, def_t);
! set_velt_type(ex, velt_type(def));
}
}
//------------------------------is_vector_use---------------------------
// Is use->in(u_idx) a vector use?
*** 1585,1598 ****
--- 1776,1793 ----
int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) {
SWPointer p(s, this);
if (!p.valid()) {
return bottom_align;
}
+ int vw = vector_width_in_bytes(velt_basic_type(s));
+ if (vw < 2) {
+ return bottom_align; // No vectors for this type
+ }
int offset = p.offset_in_bytes();
offset += iv_adjust_in_bytes;
! int off_rem = offset % vector_width_in_bytes();
! int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes();
! int off_rem = offset % vw;
! int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
return off_mod;
}
//---------------------------container_type---------------------------
// Smallest type containing range of values
*** 1613,1623 ****
--- 1808,1819 ----
//-------------------------vector_opd_range-----------------------
// (Start, end] half-open range defining which operands are vector
void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) {
switch (n->Opcode()) {
! case Op_LoadB: case Op_LoadUS:
! case Op_LoadB: case Op_LoadUB:
+ case Op_LoadS: case Op_LoadUS:
case Op_LoadI: case Op_LoadL:
case Op_LoadF: case Op_LoadD:
case Op_LoadP:
*start = 0;
*end = 0;
*** 1731,1740 ****
--- 1927,1937 ----
// pre-loop Opaque1 node.
Node *orig_limit = pre_opaq->original_loop_limit();
assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
SWPointer align_to_ref_p(align_to_ref, this);
+ assert(align_to_ref_p.valid(), "sanity");
// Given:
// lim0 == original pre loop limit
// V == v_align (power of 2)
// invar == extra invariant piece of the address expression
*** 1783,1796 ****
--- 1980,1995 ----
// Solving for lim:
// (e - lim0 + N) % V == 0
// N = (V - (e - lim0)) % V
// lim = lim0 - (V - (e - lim0)) % V
+ int vw = vector_width_in_bytes(velt_basic_type(align_to_ref));
+ assert(vw > 1, "sanity");
int stride = iv_stride();
int scale = align_to_ref_p.scale_in_bytes();
int elt_size = align_to_ref_p.memory_size();
! int v_align = vector_width_in_bytes() / elt_size;
! int v_align = vw / elt_size;
int k = align_to_ref_p.offset_in_bytes() / elt_size;
Node *kn = _igvn.intcon(k);
Node *e = kn;
*** 1805,1814 ****
--- 2004,2032 ----
} else {
e = new (_phase->C, 3) AddINode(e, aref);
}
_phase->_igvn.register_new_node_with_optimizer(e);
_phase->set_ctrl(e, pre_ctrl);
+ }
+ if (vw > ObjectAlignmentInBytes) {
+ // incorporate base e +/- base && Mask >>> log2(elt)
+ Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw)));
+ Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base());
+ _phase->_igvn.register_new_node_with_optimizer(xbase);
+ Node* masked_xbase = new (_phase->C, 3) AndXNode(xbase, mask);
+ _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
+ #ifdef _LP64
+ masked_xbase = new (_phase->C, 2) ConvL2INode(masked_xbase);
+ _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
+ #endif
+ Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
+ Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt);
+ _phase->_igvn.register_new_node_with_optimizer(bref);
+ _phase->set_ctrl(bref, pre_ctrl);
+ e = new (_phase->C, 3) AddINode(e, bref);
+ _phase->_igvn.register_new_node_with_optimizer(e);
+ _phase->set_ctrl(e, pre_ctrl);
}
// compute e +/- lim0
if (scale < 0) {
e = new (_phase->C, 3) SubINode(e, lim0);
src/share/vm/opto/superword.cpp
Index
Unified diffs
Context diffs
Sdiffs
Wdiffs
Patch
New
Old
Previous File
Next File