diff a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1168,11 +1168,11 @@ P2, P3, P4, P5, P6, - P7, + // P7, non-allocatable, preserved with all elements preset to TRUE. P8, P9, P10, P11, P12, @@ -1189,11 +1189,11 @@ P2, P3, P4, P5, P6, - P7 + // P7, non-allocatable, preserved with all elements preset to TRUE. ); // Singleton class for condition codes reg_class int_flags(RFLAGS); @@ -1896,10 +1896,14 @@ __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier); __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); __ bind(L_skip_barrier); } + if (UseSVE > 0 && C->max_vector_size() >= 16) { + __ reinitialize_ptrue(); + } + int bangsize = C->output()->bang_size_in_bytes(); if (C->output()->need_stack_bang(bangsize) && UseStackBanging) __ generate_stack_overflow_check(bangsize); __ build_frame(framesize); @@ -2060,11 +2064,29 @@ int dst_offset = ra_->reg2offset(dst_lo); if (bottom_type()->isa_vect() != NULL) { uint ireg = ideal_reg(); if (ireg == Op_VecA && cbuf) { - Unimplemented(); + C2_MacroAssembler _masm(cbuf); + int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + // stack->stack + __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset, + sve_vector_reg_size_in_bytes); + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { + __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo), + sve_vector_reg_size_in_bytes); + } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { + __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo), + sve_vector_reg_size_in_bytes); + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { + __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else { + ShouldNotReachHere(); + } } else if (cbuf) { assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); C2_MacroAssembler _masm(cbuf); assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { @@ -2353,24 +2375,33 @@ } // Identify extra cases that we might want to provide match rules for vector nodes and // other intrinsics guarded with vector length (vlen) and element type (bt). const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - if (!match_rule_supported(opcode)) { + if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { return false; } - - // Special cases which require vector length - switch (opcode) { - case Op_MulAddVS2VI: { - if (vlen != 4) { + int bit_size = vlen * type2aelembytes(bt) * 8; + if (UseSVE == 0 && bit_size > 128) { + return false; + } + if (UseSVE > 0) { + return op_sve_supported(opcode); + } else { // NEON + // Special cases + switch (opcode) { + case Op_MulAddVS2VI: + if (bit_size < 128) { return false; } break; + case Op_MulVL: + return false; + default: + break; } } - return true; // Per default match rules are supported. } const bool Matcher::has_predicated_vectors(void) { return UseSVE > 0; @@ -2407,11 +2438,12 @@ return true; } // Vector width in bytes. const int Matcher::vector_width_in_bytes(BasicType bt) { - int size = MIN2(16, (int)MaxVectorSize); + // The MaxVectorSize should have been set by detecting SVE max vector register size. + int size = MIN2((UseSVE > 0) ? 256 : 16, (int)MaxVectorSize); // Minimum 2 values in vector if (size < 2*type2aelembytes(bt)) size = 0; // But never < 4 if (size < 4) size = 0; return size; @@ -3714,20 +3746,27 @@ } } if (call == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return; + } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + // Only non uncommon_trap calls need to reinitialize ptrue. + if (uncommon_trap_request() == 0) { + __ reinitialize_ptrue(); + } } %} enc_class aarch64_enc_java_dynamic_call(method meth) %{ C2_MacroAssembler _masm(&cbuf); int method_index = resolved_method_index(cbuf); address call = __ ic_call((address)$meth$$method, method_index); if (call == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return; + } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + __ reinitialize_ptrue(); } %} enc_class aarch64_enc_call_epilog() %{ C2_MacroAssembler _masm(&cbuf); @@ -3760,19 +3799,27 @@ __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize))); __ blr(rscratch1); __ bind(retaddr); __ add(sp, sp, 2 * wordSize); } + if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + __ reinitialize_ptrue(); + } %} enc_class aarch64_enc_rethrow() %{ C2_MacroAssembler _masm(&cbuf); __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); %} enc_class aarch64_enc_ret() %{ C2_MacroAssembler _masm(&cbuf); +#ifdef ASSERT + if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + __ verify_ptrue(); + } +#endif __ ret(lr); %} enc_class aarch64_enc_tail_call(iRegP jump_target) %{ C2_MacroAssembler _masm(&cbuf); @@ -4532,10 +4579,45 @@ op_cost(0); format %{ %} interface(CONST_INTER); %} +// 8 bit signed value. +operand immI8() +%{ + predicate(n->get_int() <= 127 && n->get_int() >= -128); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 8 bit signed value (simm8), or #simm8 LSL 8. +operand immI8_shift8() +%{ + predicate((n->get_int() <= 127 && n->get_int() >= -128) || + (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 8 bit signed value (simm8), or #simm8 LSL 8. +operand immL8_shift8() +%{ + predicate((n->get_long() <= 127 && n->get_long() >= -128) || + (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // 32 bit integer valid for add sub immediate operand immIAddSub() %{ predicate(Assembler::operand_valid_for_add_sub_immediate((int64_t)n->get_int())); match(ConI); @@ -16400,11 +16482,11 @@ %} // Load Vector (128 bits) instruct loadV16(vecX dst, vmem16 mem) %{ - predicate(n->as_LoadVector()->memory_size() == 16); + predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16); match(Set dst (LoadVector mem)); ins_cost(4 * INSN_COST); format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} ins_encode( aarch64_enc_ldrvQ(dst, mem) ); ins_pipe(vload_reg_mem128); @@ -16456,11 +16538,11 @@ ins_pipe(vdup_reg_reg64); %} instruct replicate16B(vecX dst, iRegIorL2I src) %{ - predicate(n->as_Vector()->length() == 16); + predicate(UseSVE == 0 && n->as_Vector()->length() == 16); match(Set dst (ReplicateB src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (16B)" %} ins_encode %{ __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg)); @@ -16481,11 +16563,11 @@ ins_pipe(vmovi_reg_imm64); %} instruct replicate16B_imm(vecX dst, immI con) %{ - predicate(n->as_Vector()->length() == 16); + predicate(UseSVE == 0 && n->as_Vector()->length() == 16); match(Set dst (ReplicateB con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(16B)" %} ins_encode %{ __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff); @@ -16506,11 +16588,11 @@ ins_pipe(vdup_reg_reg64); %} instruct replicate8S(vecX dst, iRegIorL2I src) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseSVE == 0 && n->as_Vector()->length() == 8); match(Set dst (ReplicateS src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (8S)" %} ins_encode %{ __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg)); @@ -16531,11 +16613,11 @@ ins_pipe(vmovi_reg_imm64); %} instruct replicate8S_imm(vecX dst, immI con) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseSVE == 0 && n->as_Vector()->length() == 8); match(Set dst (ReplicateS con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(8H)" %} ins_encode %{ __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff); @@ -16555,11 +16637,11 @@ ins_pipe(vdup_reg_reg64); %} instruct replicate4I(vecX dst, iRegIorL2I src) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseSVE == 0 && n->as_Vector()->length() == 4); match(Set dst (ReplicateI src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4I)" %} ins_encode %{ __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg)); @@ -16579,11 +16661,11 @@ ins_pipe(vmovi_reg_imm64); %} instruct replicate4I_imm(vecX dst, immI con) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseSVE == 0 && n->as_Vector()->length() == 4); match(Set dst (ReplicateI con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(4I)" %} ins_encode %{ __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant); @@ -16591,11 +16673,11 @@ ins_pipe(vmovi_reg_imm128); %} instruct replicate2L(vecX dst, iRegL src) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); match(Set dst (ReplicateL src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2L)" %} ins_encode %{ __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg)); @@ -16603,11 +16685,11 @@ ins_pipe(vdup_reg_reg128); %} instruct replicate2L_zero(vecX dst, immI0 zero) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); match(Set dst (ReplicateI zero)); ins_cost(INSN_COST); format %{ "movi $dst, $zero\t# vector(4I)" %} ins_encode %{ __ eor(as_FloatRegister($dst$$reg), __ T16B, @@ -16630,11 +16712,11 @@ ins_pipe(vdup_reg_freg64); %} instruct replicate4F(vecX dst, vRegF src) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseSVE == 0 && n->as_Vector()->length() == 4); match(Set dst (ReplicateF src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4F)" %} ins_encode %{ __ dup(as_FloatRegister($dst$$reg), __ T4S, @@ -16643,11 +16725,11 @@ ins_pipe(vdup_reg_freg128); %} instruct replicate2D(vecX dst, vRegD src) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); match(Set dst (ReplicateD src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2D)" %} ins_encode %{ __ dup(as_FloatRegister($dst$$reg), __ T2D,