1 //
   2 // Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2020, Arm Ltd. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
  27 
  28 // AArch64 SVE Architecture Description File
  29 
  30 
  31 // 4 bit signed offset -- for predicated load/store
  32 
  33 operand vmemA_immIOffset4()
  34 %{
  35   predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4,
  36             Matcher::scalable_vector_reg_size(T_BYTE)));
  37   match(ConI);
  38 
  39   op_cost(0);
  40   format %{ %}
  41   interface(CONST_INTER);
  42 %}
  43 
  44 operand vmemA_immLOffset4()
  45 %{
  46   predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4,
  47             Matcher::scalable_vector_reg_size(T_BYTE)));
  48   match(ConL);
  49 
  50   op_cost(0);
  51   format %{ %}
  52   interface(CONST_INTER);
  53 %}
  54 
  55 
  56 operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off)
  57 %{
  58   constraint(ALLOC_IN_RC(ptr_reg));
  59   match(AddP reg off);
  60   op_cost(0);
  61   format %{ "[$reg, $off, MUL VL]" %}
  62   interface(MEMORY_INTER) %{
  63     base($reg);
  64     index(0xffffffff);
  65     scale(0x0);
  66     disp($off);
  67   %}
  68 %}
  69 
  70 operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off)
  71 %{
  72   constraint(ALLOC_IN_RC(ptr_reg));
  73   match(AddP reg off);
  74   op_cost(0);
  75   format %{ "[$reg, $off, MUL VL]" %}
  76   interface(MEMORY_INTER) %{
  77     base($reg);
  78     index(0xffffffff);
  79     scale(0x0);
  80     disp($off);
  81   %}
  82 %}
  83 
  84 opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
  85 
  86 source_hpp %{
  87   bool op_sve_supported(int opcode);
  88 %}
  89 
  90 source %{
  91 
  92   static inline BasicType vector_element_basic_type(const MachNode* n) {
  93     const TypeVect* vt = n->bottom_type()->is_vect();
  94     return vt->element_basic_type();
  95   }
  96 
  97   static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
  98     int def_idx = use->operand_index(opnd);
  99     Node* def = use->in(def_idx);
 100     const TypeVect* vt = def->bottom_type()->is_vect();
 101     return vt->element_basic_type();
 102   }
 103 
 104   typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
 105                                                              PRegister Pg, const Address &adr);
 106 
 107   // Predicated load/store, with optional ptrue to all elements of given predicate register.
 108   static void loadStoreA_predicate(C2_MacroAssembler masm, bool is_store,
 109                                    FloatRegister reg, PRegister pg, BasicType bt,
 110                                    int opcode, Register base, int index, int size, int disp) {
 111     sve_mem_insn_predicate insn;
 112     Assembler::SIMD_RegVariant type;
 113     int esize = type2aelembytes(bt);
 114     if (index == -1) {
 115       assert(size == 0, "unsupported address mode: scale size = %d", size);
 116       switch(esize) {
 117       case 1:
 118         insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b;
 119         type = Assembler::B;
 120         break;
 121       case 2:
 122         insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h;
 123         type = Assembler::H;
 124         break;
 125       case 4:
 126         insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w;
 127         type = Assembler::S;
 128         break;
 129       case 8:
 130         insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d;
 131         type = Assembler::D;
 132         break;
 133       default:
 134         assert(false, "unsupported");
 135         ShouldNotReachHere();
 136       }
 137       (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
 138     } else {
 139       assert(false, "unimplemented");
 140       ShouldNotReachHere();
 141     }
 142   }
 143 
 144   bool op_sve_supported(int opcode) {
 145     switch (opcode) {
 146       case Op_MulAddVS2VI:
 147         // No multiply reduction instructions
 148       case Op_MulReductionVD:
 149       case Op_MulReductionVF:
 150       case Op_MulReductionVI:
 151       case Op_MulReductionVL:
 152         // Others
 153       case Op_Extract:
 154       case Op_ExtractB:
 155       case Op_ExtractC:
 156       case Op_ExtractD:
 157       case Op_ExtractF:
 158       case Op_ExtractI:
 159       case Op_ExtractL:
 160       case Op_ExtractS:
 161       case Op_ExtractUB:
 162         return false;
 163       default:
 164         return true;
 165     }
 166   }
 167 
 168 %}
 169 
 170 definitions %{
 171   int_def SVE_COST             (200, 200);
 172 %}
 173 
 174 
 175 
 176 
 177 // All SVE instructions
 178 
 179 // sve vector load/store
 180 
 181 // Use predicated vector load/store
 182 instruct loadVA(vecA dst, vmemA mem) %{
 183   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
 184   match(Set dst (LoadVector mem));
 185   ins_cost(SVE_COST);
 186   format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
 187   ins_encode %{
 188     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
 189     loadStoreA_predicate(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
 190                          vector_element_basic_type(this), $mem->opcode(),
 191                          as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 192   %}
 193   ins_pipe(pipe_slow);
 194 %}
 195 
 196 instruct storeVA(vecA src, vmemA mem) %{
 197   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
 198   match(Set mem (StoreVector mem src));
 199   ins_cost(SVE_COST);
 200   format %{ "sve_str $mem, $src\t # vector (sve)" %}
 201   ins_encode %{
 202     FloatRegister src_reg = as_FloatRegister($src$$reg);
 203     loadStoreA_predicate(C2_MacroAssembler(&cbuf), true, src_reg, ptrue,
 204                          vector_element_basic_type(this, $src), $mem->opcode(),
 205                          as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 206   %}
 207   ins_pipe(pipe_slow);
 208 %}
 209 
 210 
 211 // sve abs
 212 
 213 instruct vabsAB(vecA dst, vecA src) %{
 214   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 &&
 215             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 216   match(Set dst (AbsVB src));
 217   ins_cost(SVE_COST);
 218   format %{ "sve_abs $dst, $src\t# vector (sve) (B)" %}
 219   ins_encode %{
 220     __ sve_abs(as_FloatRegister($dst$$reg), __ B,
 221          ptrue, as_FloatRegister($src$$reg));
 222   %}
 223   ins_pipe(pipe_slow);
 224 %}
 225 
 226 instruct vabsAS(vecA dst, vecA src) %{
 227   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 &&
 228             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 229   match(Set dst (AbsVS src));
 230   ins_cost(SVE_COST);
 231   format %{ "sve_abs $dst, $src\t# vector (sve) (H)" %}
 232   ins_encode %{
 233     __ sve_abs(as_FloatRegister($dst$$reg), __ H,
 234          ptrue, as_FloatRegister($src$$reg));
 235   %}
 236   ins_pipe(pipe_slow);
 237 %}
 238 
 239 instruct vabsAI(vecA dst, vecA src) %{
 240   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
 241             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 242   match(Set dst (AbsVI src));
 243   ins_cost(SVE_COST);
 244   format %{ "sve_abs $dst, $src\t# vector (sve) (S)" %}
 245   ins_encode %{
 246     __ sve_abs(as_FloatRegister($dst$$reg), __ S,
 247          ptrue, as_FloatRegister($src$$reg));
 248   %}
 249   ins_pipe(pipe_slow);
 250 %}
 251 
 252 instruct vabsAL(vecA dst, vecA src) %{
 253   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
 254             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 255   match(Set dst (AbsVL src));
 256   ins_cost(SVE_COST);
 257   format %{ "sve_abs $dst, $src\t# vector (sve) (D)" %}
 258   ins_encode %{
 259     __ sve_abs(as_FloatRegister($dst$$reg), __ D,
 260          ptrue, as_FloatRegister($src$$reg));
 261   %}
 262   ins_pipe(pipe_slow);
 263 %}
 264 
 265 instruct vabsAF(vecA dst, vecA src) %{
 266   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
 267             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 268   match(Set dst (AbsVF src));
 269   ins_cost(SVE_COST);
 270   format %{ "sve_fabs $dst, $src\t# vector (sve) (S)" %}
 271   ins_encode %{
 272     __ sve_fabs(as_FloatRegister($dst$$reg), __ S,
 273          ptrue, as_FloatRegister($src$$reg));
 274   %}
 275   ins_pipe(pipe_slow);
 276 %}
 277 
 278 instruct vabsAD(vecA dst, vecA src) %{
 279   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
 280             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
 281   match(Set dst (AbsVD src));
 282   ins_cost(SVE_COST);
 283   format %{ "sve_fabs $dst, $src\t# vector (sve) (D)" %}
 284   ins_encode %{
 285     __ sve_fabs(as_FloatRegister($dst$$reg), __ D,
 286          ptrue, as_FloatRegister($src$$reg));
 287   %}
 288   ins_pipe(pipe_slow);
 289 %}
 290 
 291 // sve add
 292 
 293 instruct vaddAB(vecA dst, vecA src1, vecA src2) %{
 294   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
 295   match(Set dst (AddVB src1 src2));
 296   ins_cost(SVE_COST);
 297   format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (B)" %}
 298   ins_encode %{
 299     __ sve_add(as_FloatRegister($dst$$reg), __ B,
 300          as_FloatRegister($src1$$reg),
 301          as_FloatRegister($src2$$reg));
 302   %}
 303   ins_pipe(pipe_slow);
 304 %}
 305 
 306 instruct vaddAS(vecA dst, vecA src1, vecA src2) %{
 307   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
 308   match(Set dst (AddVS src1 src2));
 309   ins_cost(SVE_COST);
 310   format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (H)" %}
 311   ins_encode %{
 312     __ sve_add(as_FloatRegister($dst$$reg), __ H,
 313          as_FloatRegister($src1$$reg),
 314          as_FloatRegister($src2$$reg));
 315   %}
 316   ins_pipe(pipe_slow);
 317 %}
 318 
 319 instruct vaddAI(vecA dst, vecA src1, vecA src2) %{
 320   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
 321   match(Set dst (AddVI src1 src2));
 322   ins_cost(SVE_COST);
 323   format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %}
 324   ins_encode %{
 325     __ sve_add(as_FloatRegister($dst$$reg), __ S,
 326          as_FloatRegister($src1$$reg),
 327          as_FloatRegister($src2$$reg));
 328   %}
 329   ins_pipe(pipe_slow);
 330 %}
 331 
 332 instruct vaddAL(vecA dst, vecA src1, vecA src2) %{
 333   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
 334   match(Set dst (AddVL src1 src2));
 335   ins_cost(SVE_COST);
 336   format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (D)" %}
 337   ins_encode %{
 338     __ sve_add(as_FloatRegister($dst$$reg), __ D,
 339          as_FloatRegister($src1$$reg),
 340          as_FloatRegister($src2$$reg));
 341   %}
 342   ins_pipe(pipe_slow);
 343 %}
 344 
 345 instruct vaddAF(vecA dst, vecA src1, vecA src2) %{
 346   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
 347   match(Set dst (AddVF src1 src2));
 348   ins_cost(SVE_COST);
 349   format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (S)" %}
 350   ins_encode %{
 351     __ sve_fadd(as_FloatRegister($dst$$reg), __ S,
 352          as_FloatRegister($src1$$reg),
 353          as_FloatRegister($src2$$reg));
 354   %}
 355   ins_pipe(pipe_slow);
 356 %}
 357 
 358 instruct vaddAD(vecA dst, vecA src1, vecA src2) %{
 359   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
 360   match(Set dst (AddVD src1 src2));
 361   ins_cost(SVE_COST);
 362   format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (D)" %}
 363   ins_encode %{
 364     __ sve_fadd(as_FloatRegister($dst$$reg), __ D,
 365          as_FloatRegister($src1$$reg),
 366          as_FloatRegister($src2$$reg));
 367   %}
 368   ins_pipe(pipe_slow);
 369 %}
 370 
 371 // sve and
 372 
 373 instruct vandA(vecA dst, vecA src1, vecA src2) %{
 374   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
 375   match(Set dst (AndV src1 src2));
 376   ins_cost(SVE_COST);
 377   format %{ "sve_and  $dst, $src1, $src2\t# vector (sve)" %}
 378   ins_encode %{
 379     __ sve_and(as_FloatRegister($dst$$reg),
 380          as_FloatRegister($src1$$reg),
 381          as_FloatRegister($src2$$reg));
 382   %}
 383   ins_pipe(pipe_slow);
 384 %}
 385 
 386 // sve or
 387 
 388 instruct vorA(vecA dst, vecA src1, vecA src2) %{
 389   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
 390   match(Set dst (OrV src1 src2));
 391   ins_cost(SVE_COST);
 392   format %{ "sve_orr  $dst, $src1, $src2\t# vector (sve)" %}
 393   ins_encode %{
 394     __ sve_orr(as_FloatRegister($dst$$reg),
 395          as_FloatRegister($src1$$reg),
 396          as_FloatRegister($src2$$reg));
 397   %}
 398   ins_pipe(pipe_slow);
 399 %}
 400 
 401 // sve xor
 402 
 403 instruct vxorA(vecA dst, vecA src1, vecA src2) %{
 404   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
 405   match(Set dst (XorV src1 src2));
 406   ins_cost(SVE_COST);
 407   format %{ "sve_eor  $dst, $src1, $src2\t# vector (sve)" %}
 408   ins_encode %{
 409     __ sve_eor(as_FloatRegister($dst$$reg),
 410          as_FloatRegister($src1$$reg),
 411          as_FloatRegister($src2$$reg));
 412   %}
 413   ins_pipe(pipe_slow);
 414 %}
 415 
 416 // sve float div
 417 
 418 instruct vdivAF(vecA dst_src1, vecA src2) %{
 419   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
 420   match(Set dst_src1 (DivVF dst_src1 src2));
 421   ins_cost(SVE_COST);
 422   format %{ "sve_fdiv  $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %}
 423   ins_encode %{
 424     __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S,
 425          ptrue, as_FloatRegister($src2$$reg));
 426   %}
 427   ins_pipe(pipe_slow);
 428 %}
 429 
 430 instruct vdivAD(vecA dst_src1, vecA src2) %{
 431   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
 432   match(Set dst_src1 (DivVD dst_src1 src2));
 433   ins_cost(SVE_COST);
 434   format %{ "sve_fdiv  $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %}
 435   ins_encode %{
 436     __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D,
 437          ptrue, as_FloatRegister($src2$$reg));
 438   %}
 439   ins_pipe(pipe_slow);
 440 %}
 441 
 442 // sve max
 443 
 444 instruct vmaxAF(vecA dst_src1, vecA src2) %{
 445   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
 446             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 447   match(Set dst_src1 (MaxV dst_src1 src2));
 448   ins_cost(SVE_COST);
 449   format %{ "sve_fmax $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %}
 450   ins_encode %{
 451     __ sve_fmax(as_FloatRegister($dst_src1$$reg), __ S,
 452          ptrue, as_FloatRegister($src2$$reg));
 453   %}
 454   ins_pipe(pipe_slow);
 455 %}
 456 
 457 instruct vmaxAD(vecA dst_src1, vecA src2) %{
 458   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
 459             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
 460   match(Set dst_src1 (MaxV dst_src1 src2));
 461   ins_cost(SVE_COST);
 462   format %{ "sve_fmax $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %}
 463   ins_encode %{
 464     __ sve_fmax(as_FloatRegister($dst_src1$$reg), __ D,
 465          ptrue, as_FloatRegister($src2$$reg));
 466   %}
 467   ins_pipe(pipe_slow);
 468 %}
 469 
 470 instruct vminAF(vecA dst_src1, vecA src2) %{
 471   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
 472             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 473   match(Set dst_src1 (MinV dst_src1 src2));
 474   ins_cost(SVE_COST);
 475   format %{ "sve_fmin $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %}
 476   ins_encode %{
 477     __ sve_fmin(as_FloatRegister($dst_src1$$reg), __ S,
 478          ptrue, as_FloatRegister($src2$$reg));
 479   %}
 480   ins_pipe(pipe_slow);
 481 %}
 482 
 483 instruct vminAD(vecA dst_src1, vecA src2) %{
 484   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
 485             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
 486   match(Set dst_src1 (MinV dst_src1 src2));
 487   ins_cost(SVE_COST);
 488   format %{ "sve_fmin $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %}
 489   ins_encode %{
 490     __ sve_fmin(as_FloatRegister($dst_src1$$reg), __ D,
 491          ptrue, as_FloatRegister($src2$$reg));
 492   %}
 493   ins_pipe(pipe_slow);
 494 %}
 495 
 496 // sve fmla
 497 
 498 // dst_src1 = dst_src1 + src2 * src3
 499 instruct vfmlaAF(vecA dst_src1, vecA src2, vecA src3) %{
 500   predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
 501   match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
 502   ins_cost(SVE_COST);
 503   format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
 504   ins_encode %{
 505     __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ S,
 506          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 507   %}
 508   ins_pipe(pipe_slow);
 509 %}
 510 
 511 // dst_src1 = dst_src1 + src2 * src3
 512 instruct vfmlaAD(vecA dst_src1, vecA src2, vecA src3) %{
 513   predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
 514   match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
 515   ins_cost(SVE_COST);
 516   format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
 517   ins_encode %{
 518     __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ D,
 519          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 520   %}
 521   ins_pipe(pipe_slow);
 522 %}
 523 
 524 // sve fmls
 525 
 526 // dst_src1 = dst_src1 + -src2 * src3
 527 // dst_src1 = dst_src1 + src2 * -src3
 528 instruct vfmlsAF(vecA dst_src1, vecA src2, vecA src3) %{
 529   predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
 530   match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
 531   match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
 532   ins_cost(SVE_COST);
 533   format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
 534   ins_encode %{
 535     __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S,
 536          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 537   %}
 538   ins_pipe(pipe_slow);
 539 %}
 540 
 541 // dst_src1 = dst_src1 + -src2 * src3
 542 // dst_src1 = dst_src1 + src2 * -src3
 543 instruct vfmlsAD(vecA dst_src1, vecA src2, vecA src3) %{
 544   predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
 545   match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
 546   match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
 547   ins_cost(SVE_COST);
 548   format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
 549   ins_encode %{
 550     __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D,
 551          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 552   %}
 553   ins_pipe(pipe_slow);
 554 %}
 555 
 556 // sve fnmla
 557 
 558 // dst_src1 = -dst_src1 + -src2 * src3
 559 // dst_src1 = -dst_src1 + src2 * -src3
 560 instruct vfnmlaAF(vecA dst_src1, vecA src2, vecA src3) %{
 561   predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
 562   match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
 563   match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
 564   ins_cost(SVE_COST);
 565   format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
 566   ins_encode %{
 567     __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S,
 568          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 569   %}
 570   ins_pipe(pipe_slow);
 571 %}
 572 
 573 // dst_src1 = -dst_src1 + -src2 * src3
 574 // dst_src1 = -dst_src1 + src2 * -src3
 575 instruct vfnmlaAD(vecA dst_src1, vecA src2, vecA src3) %{
 576   predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
 577   match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
 578   match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
 579   ins_cost(SVE_COST);
 580   format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
 581   ins_encode %{
 582     __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D,
 583          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 584   %}
 585   ins_pipe(pipe_slow);
 586 %}
 587 
 588 // sve fnmls
 589 
 590 // dst_src1 = -dst_src1 + src2 * src3
 591 instruct vfnmlsAF(vecA dst_src1, vecA src2, vecA src3) %{
 592   predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
 593   match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
 594   ins_cost(SVE_COST);
 595   format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
 596   ins_encode %{
 597     __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ S,
 598          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 599   %}
 600   ins_pipe(pipe_slow);
 601 %}
 602 
 603 // dst_src1 = -dst_src1 + src2 * src3
 604 instruct vfnmlsAD(vecA dst_src1, vecA src2, vecA src3) %{
 605   predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
 606   match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
 607   ins_cost(SVE_COST);
 608   format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
 609   ins_encode %{
 610     __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ D,
 611          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 612   %}
 613   ins_pipe(pipe_slow);
 614 %}
 615 
 616 // sve mla
 617 
 618 // dst_src1 = dst_src1 + src2 * src3
 619 instruct vmlaAB(vecA dst_src1, vecA src2, vecA src3)
 620 %{
 621   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
 622   match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
 623   ins_cost(SVE_COST);
 624   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (B)" %}
 625   ins_encode %{
 626     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ B,
 627       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 628   %}
 629   ins_pipe(pipe_slow);
 630 %}
 631 
 632 // dst_src1 = dst_src1 + src2 * src3
 633 instruct vmlaAS(vecA dst_src1, vecA src2, vecA src3)
 634 %{
 635   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
 636   match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
 637   ins_cost(SVE_COST);
 638   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %}
 639   ins_encode %{
 640     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ H,
 641       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 642   %}
 643   ins_pipe(pipe_slow);
 644 %}
 645 
 646 // dst_src1 = dst_src1 + src2 * src3
 647 instruct vmlaAI(vecA dst_src1, vecA src2, vecA src3)
 648 %{
 649   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
 650   match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
 651   ins_cost(SVE_COST);
 652   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %}
 653   ins_encode %{
 654     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ S,
 655       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 656   %}
 657   ins_pipe(pipe_slow);
 658 %}
 659 
 660 // dst_src1 = dst_src1 + src2 * src3
 661 instruct vmlaAL(vecA dst_src1, vecA src2, vecA src3)
 662 %{
 663   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
 664   match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
 665   ins_cost(SVE_COST);
 666   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %}
 667   ins_encode %{
 668     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ D,
 669       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 670   %}
 671   ins_pipe(pipe_slow);
 672 %}
 673 
 674 // sve mls
 675 
 676 // dst_src1 = dst_src1 - src2 * src3
 677 instruct vmlsAB(vecA dst_src1, vecA src2, vecA src3)
 678 %{
 679   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
 680   match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
 681   ins_cost(SVE_COST);
 682   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (B)" %}
 683   ins_encode %{
 684     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ B,
 685       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 686   %}
 687   ins_pipe(pipe_slow);
 688 %}
 689 
 690 // dst_src1 = dst_src1 - src2 * src3
 691 instruct vmlsAS(vecA dst_src1, vecA src2, vecA src3)
 692 %{
 693   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
 694   match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
 695   ins_cost(SVE_COST);
 696   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %}
 697   ins_encode %{
 698     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ H,
 699       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 700   %}
 701   ins_pipe(pipe_slow);
 702 %}
 703 
 704 // dst_src1 = dst_src1 - src2 * src3
 705 instruct vmlsAI(vecA dst_src1, vecA src2, vecA src3)
 706 %{
 707   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
 708   match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
 709   ins_cost(SVE_COST);
 710   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %}
 711   ins_encode %{
 712     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ S,
 713       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 714   %}
 715   ins_pipe(pipe_slow);
 716 %}
 717 
 718 // dst_src1 = dst_src1 - src2 * src3
 719 instruct vmlsAL(vecA dst_src1, vecA src2, vecA src3)
 720 %{
 721   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
 722   match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
 723   ins_cost(SVE_COST);
 724   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %}
 725   ins_encode %{
 726     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ D,
 727       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 728   %}
 729   ins_pipe(pipe_slow);
 730 %}
 731 
 732 
 733 // sve mul
 734 
 735 instruct vmulAB(vecA dst_src1, vecA src2) %{
 736   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
 737   match(Set dst_src1 (MulVB dst_src1 src2));
 738   ins_cost(SVE_COST);
 739   format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (B)" %}
 740   ins_encode %{
 741     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ B,
 742          ptrue, as_FloatRegister($src2$$reg));
 743   %}
 744   ins_pipe(pipe_slow);
 745 %}
 746 
 747 instruct vmulAS(vecA dst_src1, vecA src2) %{
 748   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
 749   match(Set dst_src1 (MulVS dst_src1 src2));
 750   ins_cost(SVE_COST);
 751   format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %}
 752   ins_encode %{
 753     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H,
 754          ptrue, as_FloatRegister($src2$$reg));
 755   %}
 756   ins_pipe(pipe_slow);
 757 %}
 758 
 759 instruct vmulAI(vecA dst_src1, vecA src2) %{
 760   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
 761   match(Set dst_src1 (MulVI dst_src1 src2));
 762   ins_cost(SVE_COST);
 763   format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %}
 764   ins_encode %{
 765     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S,
 766          ptrue, as_FloatRegister($src2$$reg));
 767   %}
 768   ins_pipe(pipe_slow);
 769 %}
 770 
 771 instruct vmulAL(vecA dst_src1, vecA src2) %{
 772   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
 773   match(Set dst_src1 (MulVL dst_src1 src2));
 774   ins_cost(SVE_COST);
 775   format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %}
 776   ins_encode %{
 777     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D,
 778          ptrue, as_FloatRegister($src2$$reg));
 779   %}
 780   ins_pipe(pipe_slow);
 781 %}
 782 
 783 instruct vmulAF(vecA dst, vecA src1, vecA src2) %{
 784   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
 785   match(Set dst (MulVF src1 src2));
 786   ins_cost(SVE_COST);
 787   format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %}
 788   ins_encode %{
 789     __ sve_fmul(as_FloatRegister($dst$$reg), __ S,
 790          as_FloatRegister($src1$$reg),
 791          as_FloatRegister($src2$$reg));
 792   %}
 793   ins_pipe(pipe_slow);
 794 %}
 795 
 796 instruct vmulAD(vecA dst, vecA src1, vecA src2) %{
 797   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
 798   match(Set dst (MulVD src1 src2));
 799   ins_cost(SVE_COST);
 800   format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %}
 801   ins_encode %{
 802     __ sve_fmul(as_FloatRegister($dst$$reg), __ D,
 803          as_FloatRegister($src1$$reg),
 804          as_FloatRegister($src2$$reg));
 805   %}
 806   ins_pipe(pipe_slow);
 807 %}
 808 
 809 // sve fneg
 810 
 811 instruct vnegAF(vecA dst, vecA src) %{
 812   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
 813   match(Set dst (NegVF src));
 814   ins_cost(SVE_COST);
 815   format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %}
 816   ins_encode %{
 817     __ sve_fneg(as_FloatRegister($dst$$reg), __ S,
 818          ptrue, as_FloatRegister($src$$reg));
 819   %}
 820   ins_pipe(pipe_slow);
 821 %}
 822 
 823 instruct vnegAD(vecA dst, vecA src) %{
 824   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
 825   match(Set dst (NegVD src));
 826   ins_cost(SVE_COST);
 827   format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %}
 828   ins_encode %{
 829     __ sve_fneg(as_FloatRegister($dst$$reg), __ D,
 830          ptrue, as_FloatRegister($src$$reg));
 831   %}
 832   ins_pipe(pipe_slow);
 833 %}
 834 
 835 // sve popcount vector
 836 
 837 instruct vpopcountAI(vecA dst, vecA src) %{
 838   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
 839   match(Set dst (PopCountVI src));
 840   format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t"  %}
 841   ins_encode %{
 842      __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
 843   %}
 844   ins_pipe(pipe_slow);
 845 %}
 846 
 847 // sve add reduction
 848 
 849 instruct reduce_addAI(iRegINoSp dst, iRegIorL2I src1, vecA src2, vRegD tmp) %{
 850   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
 851             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
 852   match(Set dst (AddReductionVI src1 src2));
 853   effect(TEMP_DEF dst, TEMP tmp);
 854   ins_cost(SVE_COST);
 855   format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (S)\n\t"
 856             "umov  $dst, $tmp, S, 0\n\t"
 857             "addw  $dst, $dst, $src1\t # add reduction S" %}
 858   ins_encode %{
 859     __ sve_uaddv(as_FloatRegister($tmp$$reg), __ S,
 860          ptrue, as_FloatRegister($src2$$reg));
 861     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
 862     __ addw($dst$$Register, $dst$$Register, $src1$$Register);
 863   %}
 864   ins_pipe(pipe_slow);
 865 %}
 866 
 867 instruct reduce_addAL(iRegLNoSp dst, iRegL src1, vecA src2, vRegD tmp) %{
 868   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
 869             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG));
 870   match(Set dst (AddReductionVL src1 src2));
 871   effect(TEMP_DEF dst, TEMP tmp);
 872   ins_cost(SVE_COST);
 873   format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (D)\n\t"
 874             "umov  $dst, $tmp, D, 0\n\t"
 875             "add  $dst, $dst, $src1\t # add reduction D" %}
 876   ins_encode %{
 877     __ sve_uaddv(as_FloatRegister($tmp$$reg), __ D,
 878          ptrue, as_FloatRegister($src2$$reg));
 879     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
 880     __ add($dst$$Register, $dst$$Register, $src1$$Register);
 881   %}
 882   ins_pipe(pipe_slow);
 883 %}
 884 
 885 instruct reduce_addAF(vRegF src1_dst, vecA src2) %{
 886   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
 887   match(Set src1_dst (AddReductionVF src1_dst src2));
 888   ins_cost(SVE_COST);
 889   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %}
 890   ins_encode %{
 891     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
 892          ptrue, as_FloatRegister($src2$$reg));
 893   %}
 894   ins_pipe(pipe_slow);
 895 %}
 896 
 897 instruct reduce_addAD(vRegD src1_dst, vecA src2) %{
 898   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
 899   match(Set src1_dst (AddReductionVD src1_dst src2));
 900   ins_cost(SVE_COST);
 901   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %}
 902   ins_encode %{
 903     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
 904          ptrue, as_FloatRegister($src2$$reg));
 905   %}
 906   ins_pipe(pipe_slow);
 907 %}
 908 
 909 // sve max reduction
 910 
 911 instruct reduce_maxAF(vRegF dst, vRegF src1, vecA src2) %{
 912   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
 913             n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
 914   match(Set dst (MaxReductionV src1 src2));
 915   ins_cost(INSN_COST);
 916   effect(TEMP_DEF dst);
 917   format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t"
 918             "fmaxs $dst, $dst, $src1\t # max reduction F" %}
 919   ins_encode %{
 920     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S,
 921          ptrue, as_FloatRegister($src2$$reg));
 922     __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
 923   %}
 924   ins_pipe(pipe_slow);
 925 %}
 926 
 927 instruct reduce_maxAD(vRegD dst, vRegD src1, vecA src2) %{
 928   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
 929             n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
 930   match(Set dst (MaxReductionV src1 src2));
 931   ins_cost(INSN_COST);
 932   effect(TEMP_DEF dst);
 933   format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t"
 934             "fmaxs $dst, $dst, $src1\t # max reduction D" %}
 935   ins_encode %{
 936     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D,
 937          ptrue, as_FloatRegister($src2$$reg));
 938     __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
 939   %}
 940   ins_pipe(pipe_slow);
 941 %}
 942 
 943 // sve min reduction
 944 
 945 instruct reduce_minAF(vRegF dst, vRegF src1, vecA src2) %{
 946   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
 947             n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
 948   match(Set dst (MinReductionV src1 src2));
 949   ins_cost(INSN_COST);
 950   effect(TEMP_DEF dst);
 951   format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t"
 952             "fmins $dst, $dst, $src1\t # min reduction F" %}
 953   ins_encode %{
 954     __ sve_fminv(as_FloatRegister($dst$$reg), __ S,
 955          ptrue, as_FloatRegister($src2$$reg));
 956     __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
 957   %}
 958   ins_pipe(pipe_slow);
 959 %}
 960 
 961 instruct reduce_minAD(vRegD dst, vRegD src1, vecA src2) %{
 962   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
 963             n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
 964   match(Set dst (MinReductionV src1 src2));
 965   ins_cost(INSN_COST);
 966   effect(TEMP_DEF dst);
 967   format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t"
 968             "fmins $dst, $dst, $src1\t # min reduction D" %}
 969   ins_encode %{
 970     __ sve_fminv(as_FloatRegister($dst$$reg), __ D,
 971          ptrue, as_FloatRegister($src2$$reg));
 972     __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
 973   %}
 974   ins_pipe(pipe_slow);
 975 %}
 976 
 977 // sve vector Math.rint, floor, ceil
 978 
 979 instruct vroundAD(vecA dst, vecA src, immI rmode) %{
 980   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
 981             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
 982   match(Set dst (RoundDoubleModeV src rmode));
 983   format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %}
 984   ins_encode %{
 985     switch ($rmode$$constant) {
 986       case RoundDoubleModeNode::rmode_rint:
 987         __ sve_frintn(as_FloatRegister($dst$$reg), __ D,
 988              ptrue, as_FloatRegister($src$$reg));
 989         break;
 990       case RoundDoubleModeNode::rmode_floor:
 991         __ sve_frintm(as_FloatRegister($dst$$reg), __ D,
 992              ptrue, as_FloatRegister($src$$reg));
 993         break;
 994       case RoundDoubleModeNode::rmode_ceil:
 995         __ sve_frintp(as_FloatRegister($dst$$reg), __ D,
 996              ptrue, as_FloatRegister($src$$reg));
 997         break;
 998     }
 999   %}
1000   ins_pipe(pipe_slow);
1001 %}
1002 
1003 // sve replicate
1004 
1005 instruct replicateAB(vecA dst, iRegIorL2I src) %{
1006   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
1007   match(Set dst (ReplicateB src));
1008   ins_cost(SVE_COST);
1009   format %{ "sve_dup  $dst, $src\t# vector (sve) (B)" %}
1010   ins_encode %{
1011     __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($src$$reg));
1012   %}
1013   ins_pipe(pipe_slow);
1014 %}
1015 
1016 instruct replicateAS(vecA dst, iRegIorL2I src) %{
1017   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
1018   match(Set dst (ReplicateS src));
1019   ins_cost(SVE_COST);
1020   format %{ "sve_dup  $dst, $src\t# vector (sve) (H)" %}
1021   ins_encode %{
1022     __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($src$$reg));
1023   %}
1024   ins_pipe(pipe_slow);
1025 %}
1026 
1027 instruct replicateAI(vecA dst, iRegIorL2I src) %{
1028   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1029   match(Set dst (ReplicateI src));
1030   ins_cost(SVE_COST);
1031   format %{ "sve_dup  $dst, $src\t# vector (sve) (S)" %}
1032   ins_encode %{
1033     __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($src$$reg));
1034   %}
1035   ins_pipe(pipe_slow);
1036 %}
1037 
1038 instruct replicateAL(vecA dst, iRegL src) %{
1039   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1040   match(Set dst (ReplicateL src));
1041   ins_cost(SVE_COST);
1042   format %{ "sve_dup  $dst, $src\t# vector (sve) (D)" %}
1043   ins_encode %{
1044     __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($src$$reg));
1045   %}
1046   ins_pipe(pipe_slow);
1047 %}
1048 
1049 
1050 instruct replicateAB_imm8(vecA dst, immI8 con) %{
1051   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
1052   match(Set dst (ReplicateB con));
1053   ins_cost(SVE_COST);
1054   format %{ "sve_dup  $dst, $con\t# vector (sve) (B)" %}
1055   ins_encode %{
1056     __ sve_dup(as_FloatRegister($dst$$reg), __ B, $con$$constant);
1057   %}
1058   ins_pipe(pipe_slow);
1059 %}
1060 
1061 instruct replicateAS_imm8(vecA dst, immI8_shift8 con) %{
1062   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
1063   match(Set dst (ReplicateS con));
1064   ins_cost(SVE_COST);
1065   format %{ "sve_dup  $dst, $con\t# vector (sve) (H)" %}
1066   ins_encode %{
1067     __ sve_dup(as_FloatRegister($dst$$reg), __ H, $con$$constant);
1068   %}
1069   ins_pipe(pipe_slow);
1070 %}
1071 
1072 instruct replicateAI_imm8(vecA dst, immI8_shift8 con) %{
1073   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1074   match(Set dst (ReplicateI con));
1075   ins_cost(SVE_COST);
1076   format %{ "sve_dup  $dst, $con\t# vector (sve) (S)" %}
1077   ins_encode %{
1078     __ sve_dup(as_FloatRegister($dst$$reg), __ S, $con$$constant);
1079   %}
1080   ins_pipe(pipe_slow);
1081 %}
1082 
1083 instruct replicateAL_imm8(vecA dst, immL8_shift8 con) %{
1084   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1085   match(Set dst (ReplicateL con));
1086   ins_cost(SVE_COST);
1087   format %{ "sve_dup  $dst, $con\t# vector (sve) (D)" %}
1088   ins_encode %{
1089     __ sve_dup(as_FloatRegister($dst$$reg), __ D, $con$$constant);
1090   %}
1091   ins_pipe(pipe_slow);
1092 %}
1093 
1094 
1095 instruct replicateAF(vecA dst, vRegF src) %{
1096   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1097   match(Set dst (ReplicateF src));
1098   ins_cost(SVE_COST);
1099   format %{ "sve_cpy  $dst, $src\t# vector (sve) (S)" %}
1100   ins_encode %{
1101     __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
1102          ptrue, as_FloatRegister($src$$reg));
1103   %}
1104   ins_pipe(pipe_slow);
1105 %}
1106 
1107 instruct replicateAD(vecA dst, vRegD src) %{
1108   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1109   match(Set dst (ReplicateD src));
1110   ins_cost(SVE_COST);
1111   format %{ "sve_cpy  $dst, $src\t# vector (sve) (D)" %}
1112   ins_encode %{
1113     __ sve_cpy(as_FloatRegister($dst$$reg), __ D,
1114          ptrue, as_FloatRegister($src$$reg));
1115   %}
1116   ins_pipe(pipe_slow);
1117 %}
1118 
1119 // sve shift
1120 
1121 instruct vasrAB(vecA dst, vecA shift) %{
1122   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
1123   match(Set dst (RShiftVB dst shift));
1124   ins_cost(SVE_COST);
1125   format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (B)" %}
1126   ins_encode %{
1127     __ sve_asr(as_FloatRegister($dst$$reg), __ B,
1128          ptrue, as_FloatRegister($shift$$reg));
1129   %}
1130   ins_pipe(pipe_slow);
1131 %}
1132 
1133 instruct vasrAS(vecA dst, vecA shift) %{
1134   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
1135   match(Set dst (RShiftVS dst shift));
1136   ins_cost(SVE_COST);
1137   format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (H)" %}
1138   ins_encode %{
1139     __ sve_asr(as_FloatRegister($dst$$reg), __ H,
1140          ptrue, as_FloatRegister($shift$$reg));
1141   %}
1142   ins_pipe(pipe_slow);
1143 %}
1144 
1145 instruct vasrAI(vecA dst, vecA shift) %{
1146   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1147   match(Set dst (RShiftVI dst shift));
1148   ins_cost(SVE_COST);
1149   format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (S)" %}
1150   ins_encode %{
1151     __ sve_asr(as_FloatRegister($dst$$reg), __ S,
1152          ptrue, as_FloatRegister($shift$$reg));
1153   %}
1154   ins_pipe(pipe_slow);
1155 %}
1156 
1157 instruct vasrAL(vecA dst, vecA shift) %{
1158   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1159   match(Set dst (RShiftVL dst shift));
1160   ins_cost(SVE_COST);
1161   format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (D)" %}
1162   ins_encode %{
1163     __ sve_asr(as_FloatRegister($dst$$reg), __ D,
1164          ptrue, as_FloatRegister($shift$$reg));
1165   %}
1166   ins_pipe(pipe_slow);
1167 %}
1168 
1169 instruct vlslAB(vecA dst, vecA shift) %{
1170   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
1171   match(Set dst (LShiftVB dst shift));
1172   ins_cost(SVE_COST);
1173   format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (B)" %}
1174   ins_encode %{
1175     __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
1176          ptrue, as_FloatRegister($shift$$reg));
1177   %}
1178   ins_pipe(pipe_slow);
1179 %}
1180 
1181 instruct vlslAS(vecA dst, vecA shift) %{
1182   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
1183   match(Set dst (LShiftVS dst shift));
1184   ins_cost(SVE_COST);
1185   format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (H)" %}
1186   ins_encode %{
1187     __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
1188          ptrue, as_FloatRegister($shift$$reg));
1189   %}
1190   ins_pipe(pipe_slow);
1191 %}
1192 
1193 instruct vlslAI(vecA dst, vecA shift) %{
1194   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1195   match(Set dst (LShiftVI dst shift));
1196   ins_cost(SVE_COST);
1197   format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (S)" %}
1198   ins_encode %{
1199     __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
1200          ptrue, as_FloatRegister($shift$$reg));
1201   %}
1202   ins_pipe(pipe_slow);
1203 %}
1204 
1205 instruct vlslAL(vecA dst, vecA shift) %{
1206   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1207   match(Set dst (LShiftVL dst shift));
1208   ins_cost(SVE_COST);
1209   format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (D)" %}
1210   ins_encode %{
1211     __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
1212          ptrue, as_FloatRegister($shift$$reg));
1213   %}
1214   ins_pipe(pipe_slow);
1215 %}
1216 
1217 instruct vlsrAB(vecA dst, vecA shift) %{
1218   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
1219   match(Set dst (URShiftVB dst shift));
1220   ins_cost(SVE_COST);
1221   format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (B)" %}
1222   ins_encode %{
1223     __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
1224          ptrue, as_FloatRegister($shift$$reg));
1225   %}
1226   ins_pipe(pipe_slow);
1227 %}
1228 
1229 instruct vlsrAS(vecA dst, vecA shift) %{
1230   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
1231   match(Set dst (URShiftVS dst shift));
1232   ins_cost(SVE_COST);
1233   format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (H)" %}
1234   ins_encode %{
1235     __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
1236          ptrue, as_FloatRegister($shift$$reg));
1237   %}
1238   ins_pipe(pipe_slow);
1239 %}
1240 
1241 instruct vlsrAI(vecA dst, vecA shift) %{
1242   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1243   match(Set dst (URShiftVI dst shift));
1244   ins_cost(SVE_COST);
1245   format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (S)" %}
1246   ins_encode %{
1247     __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
1248          ptrue, as_FloatRegister($shift$$reg));
1249   %}
1250   ins_pipe(pipe_slow);
1251 %}
1252 
1253 instruct vlsrAL(vecA dst, vecA shift) %{
1254   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1255   match(Set dst (URShiftVL dst shift));
1256   ins_cost(SVE_COST);
1257   format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (D)" %}
1258   ins_encode %{
1259     __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
1260          ptrue, as_FloatRegister($shift$$reg));
1261   %}
1262   ins_pipe(pipe_slow);
1263 %}
1264 
1265 instruct vasrAB_imm(vecA dst, vecA src, immI shift) %{
1266   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
1267   match(Set dst (RShiftVB src shift));
1268   ins_cost(SVE_COST);
1269   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %}
1270   ins_encode %{
1271     int con = (int)$shift$$constant;
1272     if (con == 0) {
1273       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1274            as_FloatRegister($src$$reg));
1275       return;
1276     }
1277     if (con >= 8) con = 7;
1278     __ sve_asr(as_FloatRegister($dst$$reg), __ B,
1279          as_FloatRegister($src$$reg), con);
1280   %}
1281   ins_pipe(pipe_slow);
1282 %}
1283 
1284 instruct vasrAS_imm(vecA dst, vecA src, immI shift) %{
1285   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
1286   match(Set dst (RShiftVS src shift));
1287   ins_cost(SVE_COST);
1288   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %}
1289   ins_encode %{
1290     int con = (int)$shift$$constant;
1291     if (con == 0) {
1292       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1293            as_FloatRegister($src$$reg));
1294       return;
1295     }
1296     if (con >= 16) con = 15;
1297     __ sve_asr(as_FloatRegister($dst$$reg), __ H,
1298          as_FloatRegister($src$$reg), con);
1299   %}
1300   ins_pipe(pipe_slow);
1301 %}
1302 
1303 instruct vasrAI_imm(vecA dst, vecA src, immI shift) %{
1304   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1305   match(Set dst (RShiftVI src shift));
1306   ins_cost(SVE_COST);
1307   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %}
1308   ins_encode %{
1309     int con = (int)$shift$$constant;
1310     if (con == 0) {
1311       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1312            as_FloatRegister($src$$reg));
1313       return;
1314     }
1315     __ sve_asr(as_FloatRegister($dst$$reg), __ S,
1316          as_FloatRegister($src$$reg), con);
1317   %}
1318   ins_pipe(pipe_slow);
1319 %}
1320 
1321 instruct vasrAL_imm(vecA dst, vecA src, immI shift) %{
1322   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1323   match(Set dst (RShiftVL src shift));
1324   ins_cost(SVE_COST);
1325   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %}
1326   ins_encode %{
1327     int con = (int)$shift$$constant;
1328     if (con == 0) {
1329       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1330            as_FloatRegister($src$$reg));
1331       return;
1332     }
1333     __ sve_asr(as_FloatRegister($dst$$reg), __ D,
1334          as_FloatRegister($src$$reg), con);
1335   %}
1336   ins_pipe(pipe_slow);
1337 %}
1338 
1339 instruct vlsrAB_imm(vecA dst, vecA src, immI shift) %{
1340   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
1341   match(Set dst (URShiftVB src shift));
1342   ins_cost(SVE_COST);
1343   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %}
1344   ins_encode %{
1345     int con = (int)$shift$$constant;
1346     if (con == 0) {
1347       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1348            as_FloatRegister($src$$reg));
1349       return;
1350     }
1351     if (con >= 8) {
1352       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1353            as_FloatRegister($src$$reg));
1354       return;
1355     }
1356     __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
1357          as_FloatRegister($src$$reg), con);
1358   %}
1359   ins_pipe(pipe_slow);
1360 %}
1361 
1362 instruct vlsrAS_imm(vecA dst, vecA src, immI shift) %{
1363   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
1364   match(Set dst (URShiftVS src shift));
1365   ins_cost(SVE_COST);
1366   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %}
1367   ins_encode %{
1368     int con = (int)$shift$$constant;
1369     if (con == 0) {
1370       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1371            as_FloatRegister($src$$reg));
1372       return;
1373     }
1374     if (con >= 8) {
1375       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1376            as_FloatRegister($src$$reg));
1377       return;
1378     }
1379     __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
1380          as_FloatRegister($src$$reg), con);
1381   %}
1382   ins_pipe(pipe_slow);
1383 %}
1384 
1385 instruct vlsrAI_imm(vecA dst, vecA src, immI shift) %{
1386   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1387   match(Set dst (URShiftVI src shift));
1388   ins_cost(SVE_COST);
1389   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %}
1390   ins_encode %{
1391     int con = (int)$shift$$constant;
1392     if (con == 0) {
1393       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1394            as_FloatRegister($src$$reg));
1395       return;
1396     }
1397     __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
1398          as_FloatRegister($src$$reg), con);
1399   %}
1400   ins_pipe(pipe_slow);
1401 %}
1402 
1403 instruct vlsrAL_imm(vecA dst, vecA src, immI shift) %{
1404   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1405   match(Set dst (URShiftVL src shift));
1406   ins_cost(SVE_COST);
1407   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
1408   ins_encode %{
1409     int con = (int)$shift$$constant;
1410     if (con == 0) {
1411       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1412            as_FloatRegister($src$$reg));
1413       return;
1414     }
1415     __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
1416          as_FloatRegister($src$$reg), con);
1417   %}
1418   ins_pipe(pipe_slow);
1419 %}
1420 
1421 instruct vlslAB_imm(vecA dst, vecA src, immI shift) %{
1422   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
1423   match(Set dst (LShiftVB src shift));
1424   ins_cost(SVE_COST);
1425   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
1426   ins_encode %{
1427     int con = (int)$shift$$constant;
1428     if (con >= 8) {
1429       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1430            as_FloatRegister($src$$reg));
1431       return;
1432     }
1433     __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
1434          as_FloatRegister($src$$reg), con);
1435   %}
1436   ins_pipe(pipe_slow);
1437 %}
1438 
1439 instruct vlslAS_imm(vecA dst, vecA src, immI shift) %{
1440   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
1441   match(Set dst (LShiftVS src shift));
1442   ins_cost(SVE_COST);
1443   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
1444   ins_encode %{
1445     int con = (int)$shift$$constant;
1446     if (con >= 8) {
1447       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1448            as_FloatRegister($src$$reg));
1449       return;
1450     }
1451     __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
1452          as_FloatRegister($src$$reg), con);
1453   %}
1454   ins_pipe(pipe_slow);
1455 %}
1456 
1457 instruct vlslAI_imm(vecA dst, vecA src, immI shift) %{
1458   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1459   match(Set dst (LShiftVI src shift));
1460   ins_cost(SVE_COST);
1461   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
1462   ins_encode %{
1463     int con = (int)$shift$$constant;
1464     __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
1465          as_FloatRegister($src$$reg), con);
1466   %}
1467   ins_pipe(pipe_slow);
1468 %}
1469 
1470 instruct vlslAL_imm(vecA dst, vecA src, immI shift) %{
1471   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1472   match(Set dst (LShiftVL src shift));
1473   ins_cost(SVE_COST);
1474   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
1475   ins_encode %{
1476     int con = (int)$shift$$constant;
1477     __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
1478          as_FloatRegister($src$$reg), con);
1479   %}
1480   ins_pipe(pipe_slow);
1481 %}
1482 
1483 instruct vshiftcntAB(vecA dst, iRegIorL2I cnt) %{
1484   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 &&
1485             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE));
1486   match(Set dst (LShiftCntV cnt));
1487   match(Set dst (RShiftCntV cnt));
1488   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %}
1489   ins_encode %{
1490     __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg));
1491   %}
1492   ins_pipe(pipe_slow);
1493 %}
1494 
1495 instruct vshiftcntAS(vecA dst, iRegIorL2I cnt) %{
1496   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 &&
1497             (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
1498             (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR)));
1499   match(Set dst (LShiftCntV cnt));
1500   match(Set dst (RShiftCntV cnt));
1501   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %}
1502   ins_encode %{
1503     __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg));
1504   %}
1505   ins_pipe(pipe_slow);
1506 %}
1507 
1508 instruct vshiftcntAI(vecA dst, iRegIorL2I cnt) %{
1509   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
1510             (n->bottom_type()->is_vect()->element_basic_type() == T_INT));
1511   match(Set dst (LShiftCntV cnt));
1512   match(Set dst (RShiftCntV cnt));
1513   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %}
1514   ins_encode %{
1515     __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg));
1516   %}
1517   ins_pipe(pipe_slow);
1518 %}
1519 
1520 instruct vshiftcntAL(vecA dst, iRegIorL2I cnt) %{
1521   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
1522             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
1523   match(Set dst (LShiftCntV cnt));
1524   match(Set dst (RShiftCntV cnt));
1525   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %}
1526   ins_encode %{
1527     __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg));
1528   %}
1529   ins_pipe(pipe_slow);
1530 %}
1531 
1532 // sve sqrt
1533 
1534 instruct vsqrtAF(vecA dst, vecA src) %{
1535   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
1536   match(Set dst (SqrtVF src));
1537   ins_cost(SVE_COST);
1538   format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %}
1539   ins_encode %{
1540     __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S,
1541          ptrue, as_FloatRegister($src$$reg));
1542   %}
1543   ins_pipe(pipe_slow);
1544 %}
1545 
1546 instruct vsqrtAD(vecA dst, vecA src) %{
1547   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
1548   match(Set dst (SqrtVD src));
1549   ins_cost(SVE_COST);
1550   format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %}
1551   ins_encode %{
1552     __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D,
1553          ptrue, as_FloatRegister($src$$reg));
1554   %}
1555   ins_pipe(pipe_slow);
1556 %}
1557 
1558 // sve sub
1559 
1560 instruct vsubAB(vecA dst, vecA src1, vecA src2) %{
1561   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
1562   match(Set dst (SubVB src1 src2));
1563   ins_cost(SVE_COST);
1564   format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (B)" %}
1565   ins_encode %{
1566     __ sve_sub(as_FloatRegister($dst$$reg), __ B,
1567          as_FloatRegister($src1$$reg),
1568          as_FloatRegister($src2$$reg));
1569   %}
1570   ins_pipe(pipe_slow);
1571 %}
1572 
1573 instruct vsubAS(vecA dst, vecA src1, vecA src2) %{
1574   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
1575   match(Set dst (SubVS src1 src2));
1576   ins_cost(SVE_COST);
1577   format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (H)" %}
1578   ins_encode %{
1579     __ sve_sub(as_FloatRegister($dst$$reg), __ H,
1580          as_FloatRegister($src1$$reg),
1581          as_FloatRegister($src2$$reg));
1582   %}
1583   ins_pipe(pipe_slow);
1584 %}
1585 
1586 instruct vsubAI(vecA dst, vecA src1, vecA src2) %{
1587   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1588   match(Set dst (SubVI src1 src2));
1589   ins_cost(SVE_COST);
1590   format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (S)" %}
1591   ins_encode %{
1592     __ sve_sub(as_FloatRegister($dst$$reg), __ S,
1593          as_FloatRegister($src1$$reg),
1594          as_FloatRegister($src2$$reg));
1595   %}
1596   ins_pipe(pipe_slow);
1597 %}
1598 
1599 instruct vsubAL(vecA dst, vecA src1, vecA src2) %{
1600   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1601   match(Set dst (SubVL src1 src2));
1602   ins_cost(SVE_COST);
1603   format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (D)" %}
1604   ins_encode %{
1605     __ sve_sub(as_FloatRegister($dst$$reg), __ D,
1606          as_FloatRegister($src1$$reg),
1607          as_FloatRegister($src2$$reg));
1608   %}
1609   ins_pipe(pipe_slow);
1610 %}
1611 
1612 instruct vsubAF(vecA dst, vecA src1, vecA src2) %{
1613   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
1614   match(Set dst (SubVF src1 src2));
1615   ins_cost(SVE_COST);
1616   format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (S)" %}
1617   ins_encode %{
1618     __ sve_fsub(as_FloatRegister($dst$$reg), __ S,
1619          as_FloatRegister($src1$$reg),
1620          as_FloatRegister($src2$$reg));
1621   %}
1622   ins_pipe(pipe_slow);
1623 %}
1624 
1625 instruct vsubAD(vecA dst, vecA src1, vecA src2) %{
1626   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
1627   match(Set dst (SubVD src1 src2));
1628   ins_cost(SVE_COST);
1629   format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (D)" %}
1630   ins_encode %{
1631     __ sve_fsub(as_FloatRegister($dst$$reg), __ D,
1632          as_FloatRegister($src1$$reg),
1633          as_FloatRegister($src2$$reg));
1634   %}
1635   ins_pipe(pipe_slow);
1636 %}
1637