< prev index next >

src/hotspot/share/opto/superword.cpp

Print this page

        

*** 643,652 **** --- 643,656 ---- } else { // Check if packs of the same memory type but // with a different alignment were created before. for (uint i = 0; i < align_to_refs.size(); i++) { MemNode* mr = align_to_refs.at(i)->as_Mem(); + if (mr == mem_ref) { + // Skip when we are looking at same memory operation. + continue; + } if (same_velt_type(mr, mem_ref) && memory_alignment(mr, iv_adjustment) != 0) create_pack = false; } }
*** 844,853 **** --- 848,878 ---- return memops.at(max_idx)->as_Mem(); } return NULL; } + //------------------span_works_for_memory_size----------------------------- + static bool span_works_for_memory_size(MemNode* mem, int span, int mem_size, int offset) { + bool span_matches_memory = false; + if ((mem_size == type2aelembytes(T_BYTE) || mem_size == type2aelembytes(T_SHORT)) + && ABS(span) == type2aelembytes(T_INT)) { + // There is a mismatch on span size compared to memory. + for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) { + Node* use = mem->fast_out(j); + if (!VectorNode::is_type_transition_to_int(use)) { + return false; + } + } + // If all uses transition to integer, it means that we can successfully align even on mismatch. + return true; + } + else { + span_matches_memory = ABS(span) == mem_size; + } + return span_matches_memory && (ABS(offset) % mem_size) == 0; + } + //------------------------------ref_is_alignable--------------------------- // Can the preloop align the reference to position zero in the vector? bool SuperWord::ref_is_alignable(SWPointer& p) { if (!p.has_iv()) { return true; // no induction variable
*** 860,870 **** int span = preloop_stride * p.scale_in_bytes(); int mem_size = p.memory_size(); int offset = p.offset_in_bytes(); // Stride one accesses are alignable if offset is aligned to memory operation size. // Offset can be unaligned when UseUnalignedAccesses is used. ! if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) { return true; } // If the initial offset from start of the object is computable, // check if the pre-loop can align the final offset accordingly. // --- 885,895 ---- int span = preloop_stride * p.scale_in_bytes(); int mem_size = p.memory_size(); int offset = p.offset_in_bytes(); // Stride one accesses are alignable if offset is aligned to memory operation size. // Offset can be unaligned when UseUnalignedAccesses is used. ! if (span_works_for_memory_size(p.mem(), span, mem_size, offset)) { return true; } // If the initial offset from start of the object is computable, // check if the pre-loop can align the final offset accordingly. //
*** 913,931 **** return (init_offset % vw) == 0; } } return false; } //---------------------------get_iv_adjustment--------------------------- // Calculate loop's iv adjustment for this memory ops. int SuperWord::get_iv_adjustment(MemNode* mem_ref) { SWPointer align_to_ref_p(mem_ref, this, NULL, false); int offset = align_to_ref_p.offset_in_bytes(); int scale = align_to_ref_p.scale_in_bytes(); int elt_size = align_to_ref_p.memory_size(); ! int vw = vector_width_in_bytes(mem_ref); assert(vw > 1, "sanity"); int iv_adjustment; if (scale != 0) { int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; // At least one iteration is executed in pre-loop by default. As result --- 938,978 ---- return (init_offset % vw) == 0; } } return false; } + //---------------------------get_vw_bytes_special------------------------ + int SuperWord::get_vw_bytes_special(MemNode* s) { + // Get the vector width in bytes. + int vw = vector_width_in_bytes(s); + + // Check for special case where there is an MulAddS2I usage where short vectors are going to need combined. + BasicType btype = velt_basic_type(s); + if (type2aelembytes(btype) == 2) { + bool should_combine_adjacent = true; + for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { + Node* user = s->fast_out(i); + if (!VectorNode::is_muladds2i(user)) { + should_combine_adjacent = false; + } + } + if (should_combine_adjacent) { + vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2); + } + } + + return vw; + } //---------------------------get_iv_adjustment--------------------------- // Calculate loop's iv adjustment for this memory ops. int SuperWord::get_iv_adjustment(MemNode* mem_ref) { SWPointer align_to_ref_p(mem_ref, this, NULL, false); int offset = align_to_ref_p.offset_in_bytes(); int scale = align_to_ref_p.scale_in_bytes(); int elt_size = align_to_ref_p.memory_size(); ! int vw = get_vw_bytes_special(mem_ref); assert(vw > 1, "sanity"); int iv_adjustment; if (scale != 0) { int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; // At least one iteration is executed in pre-loop by default. As result
*** 2301,2310 **** --- 2348,2363 ---- Node* mem = first->in(MemNode::Memory); Node* adr = low_adr->in(MemNode::Address); const TypePtr* atyp = n->adr_type(); vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen); vlen_in_bytes = vn->as_StoreVector()->memory_size(); + } else if (VectorNode::is_muladds2i(n)) { + assert(n->req() == 5u, "MulAddS2I should have 4 operands."); + Node* in1 = vector_opd(p, 1); + Node* in2 = vector_opd(p, 2); + vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); + vlen_in_bytes = vn->as_Vector()->length_in_bytes(); } else if (n->req() == 3 && !is_cmov_pack(p)) { // Promote operands to vector Node* in1 = NULL; bool node_isa_reduction = n->is_reduction(); if (node_isa_reduction) {
*** 2613,2622 **** --- 2666,2685 ---- NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");}) return NULL; } assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); pk->add_opd(in); + if (VectorNode::is_muladds2i(pi)) { + Node* in2 = pi->in(opd_idx + 2); + assert(my_pack(in2) == NULL, "Should already have been unpacked"); + if (my_pack(in2) != NULL) { + NOT_PRODUCT(if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr("Should already have been unpacked"); }) + return NULL; + } + assert(opd_bt == in2->bottom_type()->basic_type(), "all same type"); + pk->add_opd(in2); + } } _igvn.register_new_node_with_optimizer(pk); _phase->set_ctrl(pk, _phase->get_ctrl(opd)); #ifdef ASSERT if (TraceNewVectors) {
*** 2690,2699 **** --- 2753,2777 ---- for (uint i = 1; i < u_pk->size(); i++) { if (u_pk->at(i)->in(u_idx) != n) return false; } return true; } + if (VectorNode::is_muladds2i(use)) { + // MulAddS2I takes shorts and produces ints - hence the special checks + // on alignment and size. + if (u_pk->size() * 2 != d_pk->size()) { + return false; + } + for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) { + Node* ui = u_pk->at(i); + Node* di = d_pk->at(i); + if (alignment(ui) != alignment(di) * 2) { + return false; + } + } + return true; + } if (u_pk->size() != d_pk->size()) return false; for (uint i = 0; i < u_pk->size(); i++) { Node* ui = u_pk->at(i); Node* di = d_pk->at(i);
*** 3015,3025 **** SWPointer p(s, this, NULL, false); if (!p.valid()) { NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");) return bottom_align; } ! int vw = vector_width_in_bytes(s); if (vw < 2) { NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");) return bottom_align; // No vectors for this type } int offset = p.offset_in_bytes(); --- 3093,3103 ---- SWPointer p(s, this, NULL, false); if (!p.valid()) { NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");) return bottom_align; } ! int vw = get_vw_bytes_special(s); if (vw < 2) { NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");) return bottom_align; // No vectors for this type } int offset = p.offset_in_bytes();
< prev index next >