< prev index next >
src/hotspot/share/opto/superword.cpp
Print this page
@@ -643,10 +643,14 @@
} else {
// Check if packs of the same memory type but
// with a different alignment were created before.
for (uint i = 0; i < align_to_refs.size(); i++) {
MemNode* mr = align_to_refs.at(i)->as_Mem();
+ if (mr == mem_ref) {
+ // Skip when we are looking at same memory operation.
+ continue;
+ }
if (same_velt_type(mr, mem_ref) &&
memory_alignment(mr, iv_adjustment) != 0)
create_pack = false;
}
}
@@ -844,10 +848,31 @@
return memops.at(max_idx)->as_Mem();
}
return NULL;
}
+//------------------span_works_for_memory_size-----------------------------
+static bool span_works_for_memory_size(MemNode* mem, int span, int mem_size, int offset) {
+ bool span_matches_memory = false;
+ if ((mem_size == type2aelembytes(T_BYTE) || mem_size == type2aelembytes(T_SHORT))
+ && ABS(span) == type2aelembytes(T_INT)) {
+ // There is a mismatch on span size compared to memory.
+ for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) {
+ Node* use = mem->fast_out(j);
+ if (!VectorNode::is_type_transition_to_int(use)) {
+ return false;
+ }
+ }
+ // If all uses transition to integer, it means that we can successfully align even on mismatch.
+ return true;
+ }
+ else {
+ span_matches_memory = ABS(span) == mem_size;
+ }
+ return span_matches_memory && (ABS(offset) % mem_size) == 0;
+}
+
//------------------------------ref_is_alignable---------------------------
// Can the preloop align the reference to position zero in the vector?
bool SuperWord::ref_is_alignable(SWPointer& p) {
if (!p.has_iv()) {
return true; // no induction variable
@@ -860,11 +885,11 @@
int span = preloop_stride * p.scale_in_bytes();
int mem_size = p.memory_size();
int offset = p.offset_in_bytes();
// Stride one accesses are alignable if offset is aligned to memory operation size.
// Offset can be unaligned when UseUnalignedAccesses is used.
- if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) {
+ if (span_works_for_memory_size(p.mem(), span, mem_size, offset)) {
return true;
}
// If the initial offset from start of the object is computable,
// check if the pre-loop can align the final offset accordingly.
//
@@ -913,19 +938,41 @@
return (init_offset % vw) == 0;
}
}
return false;
}
+//---------------------------get_vw_bytes_special------------------------
+int SuperWord::get_vw_bytes_special(MemNode* s) {
+ // Get the vector width in bytes.
+ int vw = vector_width_in_bytes(s);
+
+ // Check for special case where there is an MulAddS2I usage where short vectors are going to need combined.
+ BasicType btype = velt_basic_type(s);
+ if (type2aelembytes(btype) == 2) {
+ bool should_combine_adjacent = true;
+ for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
+ Node* user = s->fast_out(i);
+ if (!VectorNode::is_muladds2i(user)) {
+ should_combine_adjacent = false;
+ }
+ }
+ if (should_combine_adjacent) {
+ vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2);
+ }
+ }
+
+ return vw;
+}
//---------------------------get_iv_adjustment---------------------------
// Calculate loop's iv adjustment for this memory ops.
int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
SWPointer align_to_ref_p(mem_ref, this, NULL, false);
int offset = align_to_ref_p.offset_in_bytes();
int scale = align_to_ref_p.scale_in_bytes();
int elt_size = align_to_ref_p.memory_size();
- int vw = vector_width_in_bytes(mem_ref);
+ int vw = get_vw_bytes_special(mem_ref);
assert(vw > 1, "sanity");
int iv_adjustment;
if (scale != 0) {
int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
// At least one iteration is executed in pre-loop by default. As result
@@ -2301,10 +2348,16 @@
Node* mem = first->in(MemNode::Memory);
Node* adr = low_adr->in(MemNode::Address);
const TypePtr* atyp = n->adr_type();
vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
vlen_in_bytes = vn->as_StoreVector()->memory_size();
+ } else if (VectorNode::is_muladds2i(n)) {
+ assert(n->req() == 5u, "MulAddS2I should have 4 operands.");
+ Node* in1 = vector_opd(p, 1);
+ Node* in2 = vector_opd(p, 2);
+ vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
+ vlen_in_bytes = vn->as_Vector()->length_in_bytes();
} else if (n->req() == 3 && !is_cmov_pack(p)) {
// Promote operands to vector
Node* in1 = NULL;
bool node_isa_reduction = n->is_reduction();
if (node_isa_reduction) {
@@ -2613,10 +2666,20 @@
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})
return NULL;
}
assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
pk->add_opd(in);
+ if (VectorNode::is_muladds2i(pi)) {
+ Node* in2 = pi->in(opd_idx + 2);
+ assert(my_pack(in2) == NULL, "Should already have been unpacked");
+ if (my_pack(in2) != NULL) {
+ NOT_PRODUCT(if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr("Should already have been unpacked"); })
+ return NULL;
+ }
+ assert(opd_bt == in2->bottom_type()->basic_type(), "all same type");
+ pk->add_opd(in2);
+ }
}
_igvn.register_new_node_with_optimizer(pk);
_phase->set_ctrl(pk, _phase->get_ctrl(opd));
#ifdef ASSERT
if (TraceNewVectors) {
@@ -2690,10 +2753,25 @@
for (uint i = 1; i < u_pk->size(); i++) {
if (u_pk->at(i)->in(u_idx) != n) return false;
}
return true;
}
+ if (VectorNode::is_muladds2i(use)) {
+ // MulAddS2I takes shorts and produces ints - hence the special checks
+ // on alignment and size.
+ if (u_pk->size() * 2 != d_pk->size()) {
+ return false;
+ }
+ for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
+ Node* ui = u_pk->at(i);
+ Node* di = d_pk->at(i);
+ if (alignment(ui) != alignment(di) * 2) {
+ return false;
+ }
+ }
+ return true;
+ }
if (u_pk->size() != d_pk->size())
return false;
for (uint i = 0; i < u_pk->size(); i++) {
Node* ui = u_pk->at(i);
Node* di = d_pk->at(i);
@@ -3015,11 +3093,11 @@
SWPointer p(s, this, NULL, false);
if (!p.valid()) {
NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)
return bottom_align;
}
- int vw = vector_width_in_bytes(s);
+ int vw = get_vw_bytes_special(s);
if (vw < 2) {
NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)
return bottom_align; // No vectors for this type
}
int offset = p.offset_in_bytes();
< prev index next >