--- old/src/hotspot/cpu/aarch64/aarch64.ad 2020-07-23 15:51:17.686877628 +0800 +++ new/src/hotspot/cpu/aarch64/aarch64.ad 2020-07-23 15:51:17.286873459 +0800 @@ -2079,7 +2079,7 @@ // Identify extra cases that we might want to provide match rules for vector nodes and // other intrinsics guarded with vector length (vlen) and element type (bt). const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - if (!match_rule_supported(opcode)) { + if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { return false; } @@ -2091,6 +2091,12 @@ } break; } + case Op_VectorLoadShuffle: + case Op_VectorRearrange: + if (vlen < 4) { + return false; + } + break; } return true; // Per default match rules are supported. @@ -2100,6 +2106,10 @@ return false; } +bool Matcher::supports_vector_variable_shifts(void) { + return true; +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -2146,15 +2156,26 @@ return vector_width_in_bytes(bt)/type2aelembytes(bt); } const int Matcher::min_vector_size(const BasicType bt) { -// For the moment limit the vector size to 8 bytes - int size = 8 / type2aelembytes(bt); - if (size < 2) size = 2; - return size; + int max_size = max_vector_size(bt); + // Limit the vector size to 8 bytes + int size = 8 / type2aelembytes(bt); + if (bt == T_BYTE) { + // To support vector api shuffle/rearrange. + size = 4; + } else if (bt == T_BOOLEAN) { + // To support vector api load/store mask. + size = 2; + } + if (size < 2) size = 2; + return MIN2(size,max_size); } // Vector ideal reg. const uint Matcher::vector_ideal_reg(int len) { switch(len) { + // For 16-bit/32-bit mask vector, reuse VecD. + case 2: + case 4: case 8: return Op_VecD; case 16: return Op_VecX; } @@ -2797,6 +2818,12 @@ // END Non-volatile memory access // Vector loads and stores + enc_class aarch64_enc_ldrvH(vecD dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::H, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S, @@ -2815,6 +2842,12 @@ $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} + enc_class aarch64_enc_strvH(vecD src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::H, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + enc_class aarch64_enc_strvS(vecD src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S, @@ -3899,6 +3932,26 @@ interface(CONST_INTER); %} +operand immI_2() +%{ + predicate(n->get_int() == 2); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_4() +%{ + predicate(n->get_int() == 4); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + operand immI_8() %{ predicate(n->get_int() == 8); @@ -10801,6 +10854,7 @@ %} // BEGIN This section of the file is automatically generated. Do not edit -------------- +// This section is generated from aarch64_ad.m4 instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1, @@ -15991,6 +16045,7 @@ instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP tmp, TEMP tmp2); @@ -16010,6 +16065,7 @@ instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP vtmp, TEMP itmp); @@ -16028,6 +16084,7 @@ instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP tmp, TEMP dst); @@ -16047,6 +16104,7 @@ instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP vtmp, TEMP itmp, TEMP dst); @@ -17128,8 +17186,7 @@ ins_cost(INSN_COST * 3); format %{ "fabs $dst,$src\t# vector (2S)" %} ins_encode %{ - __ fabs(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg)); + __ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg)); %} ins_pipe(vunop_fp64); %} @@ -17141,8 +17198,7 @@ ins_cost(INSN_COST * 3); format %{ "fabs $dst,$src\t# vector (4S)" %} ins_encode %{ - __ fabs(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg)); + __ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg)); %} ins_pipe(vunop_fp128); %} @@ -17154,8 +17210,7 @@ ins_cost(INSN_COST * 3); format %{ "fabs $dst,$src\t# vector (2D)" %} ins_encode %{ - __ fabs(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg)); + __ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg)); %} ins_pipe(vunop_fp128); %} @@ -17296,7 +17351,8 @@ // ------------------------------ Shift --------------------------------------- instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{ - predicate(n->as_Vector()->length_in_bytes() == 8); + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); match(Set dst (LShiftCntV cnt)); match(Set dst (RShiftCntV cnt)); format %{ "dup $dst, $cnt\t# shift count vector (8B)" %} @@ -18120,12 +18176,12 @@ "uaddlp $dst, $dst\t# vector (8H)" %} ins_encode %{ - __ cnt(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($dst$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($dst$$reg)); + __ cnt(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($dst$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_class_default); %} @@ -18139,12 +18195,12 @@ "uaddlp $dst, $dst\t# vector (4H)" %} ins_encode %{ - __ cnt(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($dst$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($dst$$reg)); + __ cnt(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($dst$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_class_default); %}