< prev index next >

src/hotspot/cpu/aarch64/aarch64.ad

Print this page

 1153 
 1154 // Class for 128 bit register v30
 1155 reg_class v30_reg(
 1156     V30, V30_H
 1157 );
 1158 
 1159 // Class for 128 bit register v31
 1160 reg_class v31_reg(
 1161     V31, V31_H
 1162 );
 1163 
 1164 // Class for all SVE predicate registers.
 1165 reg_class pr_reg (
 1166     P0,
 1167     P1,
 1168     P2,
 1169     P3,
 1170     P4,
 1171     P5,
 1172     P6,
 1173     P7,
 1174     P8,
 1175     P9,
 1176     P10,
 1177     P11,
 1178     P12,
 1179     P13,
 1180     P14,
 1181     P15
 1182 );
 1183 
 1184 // Class for SVE governing predicate registers, which are used
 1185 // to determine the active elements of a predicated instruction.
 1186 reg_class gov_pr (
 1187     P0,
 1188     P1,
 1189     P2,
 1190     P3,
 1191     P4,
 1192     P5,
 1193     P6,
 1194     P7
 1195 );
 1196 
 1197 // Singleton class for condition codes
 1198 reg_class int_flags(RFLAGS);
 1199 
 1200 %}
 1201 
 1202 //----------DEFINITION BLOCK---------------------------------------------------
 1203 // Define name --> value mappings to inform the ADLC of an integer valued name
 1204 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 1205 // Format:
 1206 //        int_def  <name>         ( <int_value>, <expression>);
 1207 // Generated Code in ad_<arch>.hpp
 1208 //        #define  <name>   (<expression>)
 1209 //        // value == <int_value>
 1210 // Generated code in ad_<arch>.cpp adlc_verification()
 1211 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 1212 //
 1213 
 1214 // we follow the ppc-aix port in using a simple cost model which ranks

 1881 
 1882   // n.b. frame size includes space for return pc and rfp
 1883   const int framesize = C->output()->frame_size_in_bytes();
 1884   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
 1885 
 1886   // insert a nop at the start of the prolog so we can patch in a
 1887   // branch if we need to invalidate the method later
 1888   __ nop();
 1889 
 1890   if (C->clinit_barrier_on_entry()) {
 1891     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1892 
 1893     Label L_skip_barrier;
 1894 
 1895     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1896     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1897     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 




 1901   int bangsize = C->output()->bang_size_in_bytes();
 1902   if (C->output()->need_stack_bang(bangsize) && UseStackBanging)
 1903     __ generate_stack_overflow_check(bangsize);
 1904 
 1905   __ build_frame(framesize);
 1906 
 1907   if (C->stub_function() == NULL) {
 1908     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1909     bs->nmethod_entry_barrier(&_masm);
 1910   }
 1911 
 1912   if (VerifyStackAtCalls) {
 1913     Unimplemented();
 1914   }
 1915 
 1916   C->output()->set_frame_complete(cbuf.insts_size());
 1917 
 1918   if (C->has_mach_constant_base_node()) {
 1919     // NOTE: We set the table base offset here because users might be
 1920     // emitted before MachConstantBaseNode.

 2045   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
 2046 
 2047   if (src_hi != OptoReg::Bad) {
 2048     assert((src_lo&1)==0 && src_lo+1==src_hi &&
 2049            (dst_lo&1)==0 && dst_lo+1==dst_hi,
 2050            "expected aligned-adjacent pairs");
 2051   }
 2052 
 2053   if (src_lo == dst_lo && src_hi == dst_hi) {
 2054     return 0;            // Self copy, no move.
 2055   }
 2056 
 2057   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
 2058               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
 2059   int src_offset = ra_->reg2offset(src_lo);
 2060   int dst_offset = ra_->reg2offset(dst_lo);
 2061 
 2062   if (bottom_type()->isa_vect() != NULL) {
 2063     uint ireg = ideal_reg();
 2064     if (ireg == Op_VecA && cbuf) {
 2065       Unimplemented();


















 2066     } else if (cbuf) {
 2067       assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
 2068       C2_MacroAssembler _masm(cbuf);
 2069       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
 2070       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 2071         // stack->stack
 2072         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
 2073         if (ireg == Op_VecD) {
 2074           __ unspill(rscratch1, true, src_offset);
 2075           __ spill(rscratch1, true, dst_offset);
 2076         } else {
 2077           __ spill_copy128(src_offset, dst_offset);
 2078         }
 2079       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
 2080         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
 2081                ireg == Op_VecD ? __ T8B : __ T16B,
 2082                as_FloatRegister(Matcher::_regEncode[src_lo]));
 2083       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
 2084         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
 2085                  ireg == Op_VecD ? __ D : __ Q,

 2338   if (!has_match_rule(opcode))
 2339     return false;
 2340 
 2341   bool ret_value = true;
 2342   switch (opcode) {
 2343     case Op_CacheWB:
 2344     case Op_CacheWBPreSync:
 2345     case Op_CacheWBPostSync:
 2346       if (!VM_Version::supports_data_cache_line_flush()) {
 2347         ret_value = false;
 2348       }
 2349       break;
 2350   }
 2351 
 2352   return ret_value; // Per default match rules are supported.
 2353 }
 2354 
 2355 // Identify extra cases that we might want to provide match rules for vector nodes and
 2356 // other intrinsics guarded with vector length (vlen) and element type (bt).
 2357 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 2358   if (!match_rule_supported(opcode)) {
 2359     return false;
 2360   }
 2361 
 2362   // Special cases which require vector length
 2363   switch (opcode) {
 2364     case Op_MulAddVS2VI: {
 2365       if (vlen != 4) {






 2366         return false;
 2367       }
 2368       break;




 2369     }
 2370   }
 2371 
 2372   return true; // Per default match rules are supported.
 2373 }
 2374 
 2375 const bool Matcher::has_predicated_vectors(void) {
 2376   return UseSVE > 0;
 2377 }
 2378 
 2379 const int Matcher::float_pressure(int default_pressure_threshold) {
 2380   return default_pressure_threshold;
 2381 }
 2382 
 2383 int Matcher::regnum_to_fpu_offset(int regnum)
 2384 {
 2385   Unimplemented();
 2386   return 0;
 2387 }
 2388 
 2389 // Is this branch offset short enough that a short branch can be used?
 2390 //
 2391 // NOTE: If the platform does not provide any short branch variants, then
 2392 //       this method should return false for offset 0.
 2393 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2394   // The passed offset is relative to address of the branch.
 2395 
 2396   return (-32768 <= offset && offset < 32768);
 2397 }
 2398 
 2399 const bool Matcher::isSimpleConstant64(jlong value) {
 2400   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
 2401   // Probably always true, even if a temp register is required.
 2402   return true;
 2403 }
 2404 
 2405 // true just means we have fast l2f conversion
 2406 const bool Matcher::convL2FSupported(void) {
 2407   return true;
 2408 }
 2409 
 2410 // Vector width in bytes.
 2411 const int Matcher::vector_width_in_bytes(BasicType bt) {
 2412   int size = MIN2(16, (int)MaxVectorSize);

 2413   // Minimum 2 values in vector
 2414   if (size < 2*type2aelembytes(bt)) size = 0;
 2415   // But never < 4
 2416   if (size < 4) size = 0;
 2417   return size;
 2418 }
 2419 
 2420 // Limits on vector size (number of elements) loaded into vector.
 2421 const int Matcher::max_vector_size(const BasicType bt) {
 2422   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 2423 }
 2424 const int Matcher::min_vector_size(const BasicType bt) {
 2425   int max_size = max_vector_size(bt);
 2426   if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
 2427     // Currently vector length less than SVE vector register size is not supported.
 2428     return max_size;
 2429   } else {
 2430     //  For the moment limit the vector size to 8 bytes with NEON.
 2431     int size = 8 / type2aelembytes(bt);
 2432     if (size < 2) size = 2;

 3699     address call;
 3700     if (!_method) {
 3701       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
 3702       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
 3703     } else {
 3704       int method_index = resolved_method_index(cbuf);
 3705       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 3706                                                   : static_call_Relocation::spec(method_index);
 3707       call = __ trampoline_call(Address(addr, rspec), &cbuf);
 3708 
 3709       // Emit stub for static call
 3710       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 3711       if (stub == NULL) {
 3712         ciEnv::current()->record_failure("CodeCache is full");
 3713         return;
 3714       }
 3715     }
 3716     if (call == NULL) {
 3717       ciEnv::current()->record_failure("CodeCache is full");
 3718       return;





 3719     }
 3720   %}
 3721 
 3722   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3723     C2_MacroAssembler _masm(&cbuf);
 3724     int method_index = resolved_method_index(cbuf);
 3725     address call = __ ic_call((address)$meth$$method, method_index);
 3726     if (call == NULL) {
 3727       ciEnv::current()->record_failure("CodeCache is full");
 3728       return;


 3729     }
 3730   %}
 3731 
 3732   enc_class aarch64_enc_call_epilog() %{
 3733     C2_MacroAssembler _masm(&cbuf);
 3734     if (VerifyStackAtCalls) {
 3735       // Check that stack depth is unchanged: find majik cookie on stack
 3736       __ call_Unimplemented();
 3737     }
 3738   %}
 3739 
 3740   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3741     C2_MacroAssembler _masm(&cbuf);
 3742 
 3743     // some calls to generated routines (arraycopy code) are scheduled
 3744     // by C2 as runtime calls. if so we can call them using a br (they
 3745     // will be in a reachable segment) otherwise we have to use a blr
 3746     // which loads the absolute address into a register.
 3747     address entry = (address)$meth$$method;
 3748     CodeBlob *cb = CodeCache::find_blob(entry);
 3749     if (cb) {
 3750       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3751       if (call == NULL) {
 3752         ciEnv::current()->record_failure("CodeCache is full");
 3753         return;
 3754       }
 3755     } else {
 3756       Label retaddr;
 3757       __ adr(rscratch2, retaddr);
 3758       __ lea(rscratch1, RuntimeAddress(entry));
 3759       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
 3760       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
 3761       __ blr(rscratch1);
 3762       __ bind(retaddr);
 3763       __ add(sp, sp, 2 * wordSize);
 3764     }



 3765   %}
 3766 
 3767   enc_class aarch64_enc_rethrow() %{
 3768     C2_MacroAssembler _masm(&cbuf);
 3769     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
 3770   %}
 3771 
 3772   enc_class aarch64_enc_ret() %{
 3773     C2_MacroAssembler _masm(&cbuf);





 3774     __ ret(lr);
 3775   %}
 3776 
 3777   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
 3778     C2_MacroAssembler _masm(&cbuf);
 3779     Register target_reg = as_Register($jump_target$$reg);
 3780     __ br(target_reg);
 3781   %}
 3782 
 3783   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
 3784     C2_MacroAssembler _masm(&cbuf);
 3785     Register target_reg = as_Register($jump_target$$reg);
 3786     // exception oop should be in r0
 3787     // ret addr has been popped into lr
 3788     // callee expects it in r3
 3789     __ mov(r3, lr);
 3790     __ br(target_reg);
 3791   %}
 3792 
 3793   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{

 4517 operand immLoffset8()
 4518 %{
 4519   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
 4520   match(ConL);
 4521 
 4522   op_cost(0);
 4523   format %{ %}
 4524   interface(CONST_INTER);
 4525 %}
 4526 
 4527 operand immLoffset16()
 4528 %{
 4529   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
 4530   match(ConL);
 4531 
 4532   op_cost(0);
 4533   format %{ %}
 4534   interface(CONST_INTER);
 4535 %}
 4536 



































 4537 // 32 bit integer valid for add sub immediate
 4538 operand immIAddSub()
 4539 %{
 4540   predicate(Assembler::operand_valid_for_add_sub_immediate((int64_t)n->get_int()));
 4541   match(ConI);
 4542   op_cost(0);
 4543   format %{ %}
 4544   interface(CONST_INTER);
 4545 %}
 4546 
 4547 // 32 bit unsigned integer valid for logical immediate
 4548 // TODO -- check this is right when e.g the mask is 0x80000000
 4549 operand immILog()
 4550 %{
 4551   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (uint64_t)n->get_int()));
 4552   match(ConI);
 4553 
 4554   op_cost(0);
 4555   format %{ %}
 4556   interface(CONST_INTER);

16385   ins_cost(4 * INSN_COST);
16386   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16387   ins_encode( aarch64_enc_ldrvS(dst, mem) );
16388   ins_pipe(vload_reg_mem64);
16389 %}
16390 
16391 // Load vector (64 bits)
16392 instruct loadV8(vecD dst, vmem8 mem)
16393 %{
16394   predicate(n->as_LoadVector()->memory_size() == 8);
16395   match(Set dst (LoadVector mem));
16396   ins_cost(4 * INSN_COST);
16397   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16398   ins_encode( aarch64_enc_ldrvD(dst, mem) );
16399   ins_pipe(vload_reg_mem64);
16400 %}
16401 
16402 // Load Vector (128 bits)
16403 instruct loadV16(vecX dst, vmem16 mem)
16404 %{
16405   predicate(n->as_LoadVector()->memory_size() == 16);
16406   match(Set dst (LoadVector mem));
16407   ins_cost(4 * INSN_COST);
16408   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16409   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16410   ins_pipe(vload_reg_mem128);
16411 %}
16412 
16413 // Store Vector (32 bits)
16414 instruct storeV4(vecD src, vmem4 mem)
16415 %{
16416   predicate(n->as_StoreVector()->memory_size() == 4);
16417   match(Set mem (StoreVector mem src));
16418   ins_cost(4 * INSN_COST);
16419   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16420   ins_encode( aarch64_enc_strvS(src, mem) );
16421   ins_pipe(vstore_reg_mem64);
16422 %}
16423 
16424 // Store Vector (64 bits)
16425 instruct storeV8(vecD src, vmem8 mem)

16441   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16442   ins_encode( aarch64_enc_strvQ(src, mem) );
16443   ins_pipe(vstore_reg_mem128);
16444 %}
16445 
16446 instruct replicate8B(vecD dst, iRegIorL2I src)
16447 %{
16448   predicate(n->as_Vector()->length() == 4 ||
16449             n->as_Vector()->length() == 8);
16450   match(Set dst (ReplicateB src));
16451   ins_cost(INSN_COST);
16452   format %{ "dup  $dst, $src\t# vector (8B)" %}
16453   ins_encode %{
16454     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16455   %}
16456   ins_pipe(vdup_reg_reg64);
16457 %}
16458 
16459 instruct replicate16B(vecX dst, iRegIorL2I src)
16460 %{
16461   predicate(n->as_Vector()->length() == 16);
16462   match(Set dst (ReplicateB src));
16463   ins_cost(INSN_COST);
16464   format %{ "dup  $dst, $src\t# vector (16B)" %}
16465   ins_encode %{
16466     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16467   %}
16468   ins_pipe(vdup_reg_reg128);
16469 %}
16470 
16471 instruct replicate8B_imm(vecD dst, immI con)
16472 %{
16473   predicate(n->as_Vector()->length() == 4 ||
16474             n->as_Vector()->length() == 8);
16475   match(Set dst (ReplicateB con));
16476   ins_cost(INSN_COST);
16477   format %{ "movi  $dst, $con\t# vector(8B)" %}
16478   ins_encode %{
16479     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16480   %}
16481   ins_pipe(vmovi_reg_imm64);
16482 %}
16483 
16484 instruct replicate16B_imm(vecX dst, immI con)
16485 %{
16486   predicate(n->as_Vector()->length() == 16);
16487   match(Set dst (ReplicateB con));
16488   ins_cost(INSN_COST);
16489   format %{ "movi  $dst, $con\t# vector(16B)" %}
16490   ins_encode %{
16491     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16492   %}
16493   ins_pipe(vmovi_reg_imm128);
16494 %}
16495 
16496 instruct replicate4S(vecD dst, iRegIorL2I src)
16497 %{
16498   predicate(n->as_Vector()->length() == 2 ||
16499             n->as_Vector()->length() == 4);
16500   match(Set dst (ReplicateS src));
16501   ins_cost(INSN_COST);
16502   format %{ "dup  $dst, $src\t# vector (4S)" %}
16503   ins_encode %{
16504     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16505   %}
16506   ins_pipe(vdup_reg_reg64);
16507 %}
16508 
16509 instruct replicate8S(vecX dst, iRegIorL2I src)
16510 %{
16511   predicate(n->as_Vector()->length() == 8);
16512   match(Set dst (ReplicateS src));
16513   ins_cost(INSN_COST);
16514   format %{ "dup  $dst, $src\t# vector (8S)" %}
16515   ins_encode %{
16516     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16517   %}
16518   ins_pipe(vdup_reg_reg128);
16519 %}
16520 
16521 instruct replicate4S_imm(vecD dst, immI con)
16522 %{
16523   predicate(n->as_Vector()->length() == 2 ||
16524             n->as_Vector()->length() == 4);
16525   match(Set dst (ReplicateS con));
16526   ins_cost(INSN_COST);
16527   format %{ "movi  $dst, $con\t# vector(4H)" %}
16528   ins_encode %{
16529     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16530   %}
16531   ins_pipe(vmovi_reg_imm64);
16532 %}
16533 
16534 instruct replicate8S_imm(vecX dst, immI con)
16535 %{
16536   predicate(n->as_Vector()->length() == 8);
16537   match(Set dst (ReplicateS con));
16538   ins_cost(INSN_COST);
16539   format %{ "movi  $dst, $con\t# vector(8H)" %}
16540   ins_encode %{
16541     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16542   %}
16543   ins_pipe(vmovi_reg_imm128);
16544 %}
16545 
16546 instruct replicate2I(vecD dst, iRegIorL2I src)
16547 %{
16548   predicate(n->as_Vector()->length() == 2);
16549   match(Set dst (ReplicateI src));
16550   ins_cost(INSN_COST);
16551   format %{ "dup  $dst, $src\t# vector (2I)" %}
16552   ins_encode %{
16553     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16554   %}
16555   ins_pipe(vdup_reg_reg64);
16556 %}
16557 
16558 instruct replicate4I(vecX dst, iRegIorL2I src)
16559 %{
16560   predicate(n->as_Vector()->length() == 4);
16561   match(Set dst (ReplicateI src));
16562   ins_cost(INSN_COST);
16563   format %{ "dup  $dst, $src\t# vector (4I)" %}
16564   ins_encode %{
16565     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16566   %}
16567   ins_pipe(vdup_reg_reg128);
16568 %}
16569 
16570 instruct replicate2I_imm(vecD dst, immI con)
16571 %{
16572   predicate(n->as_Vector()->length() == 2);
16573   match(Set dst (ReplicateI con));
16574   ins_cost(INSN_COST);
16575   format %{ "movi  $dst, $con\t# vector(2I)" %}
16576   ins_encode %{
16577     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16578   %}
16579   ins_pipe(vmovi_reg_imm64);
16580 %}
16581 
16582 instruct replicate4I_imm(vecX dst, immI con)
16583 %{
16584   predicate(n->as_Vector()->length() == 4);
16585   match(Set dst (ReplicateI con));
16586   ins_cost(INSN_COST);
16587   format %{ "movi  $dst, $con\t# vector(4I)" %}
16588   ins_encode %{
16589     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16590   %}
16591   ins_pipe(vmovi_reg_imm128);
16592 %}
16593 
16594 instruct replicate2L(vecX dst, iRegL src)
16595 %{
16596   predicate(n->as_Vector()->length() == 2);
16597   match(Set dst (ReplicateL src));
16598   ins_cost(INSN_COST);
16599   format %{ "dup  $dst, $src\t# vector (2L)" %}
16600   ins_encode %{
16601     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16602   %}
16603   ins_pipe(vdup_reg_reg128);
16604 %}
16605 
16606 instruct replicate2L_zero(vecX dst, immI0 zero)
16607 %{
16608   predicate(n->as_Vector()->length() == 2);
16609   match(Set dst (ReplicateI zero));
16610   ins_cost(INSN_COST);
16611   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16612   ins_encode %{
16613     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16614            as_FloatRegister($dst$$reg),
16615            as_FloatRegister($dst$$reg));
16616   %}
16617   ins_pipe(vmovi_reg_imm128);
16618 %}
16619 
16620 instruct replicate2F(vecD dst, vRegF src)
16621 %{
16622   predicate(n->as_Vector()->length() == 2);
16623   match(Set dst (ReplicateF src));
16624   ins_cost(INSN_COST);
16625   format %{ "dup  $dst, $src\t# vector (2F)" %}
16626   ins_encode %{
16627     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16628            as_FloatRegister($src$$reg));
16629   %}
16630   ins_pipe(vdup_reg_freg64);
16631 %}
16632 
16633 instruct replicate4F(vecX dst, vRegF src)
16634 %{
16635   predicate(n->as_Vector()->length() == 4);
16636   match(Set dst (ReplicateF src));
16637   ins_cost(INSN_COST);
16638   format %{ "dup  $dst, $src\t# vector (4F)" %}
16639   ins_encode %{
16640     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16641            as_FloatRegister($src$$reg));
16642   %}
16643   ins_pipe(vdup_reg_freg128);
16644 %}
16645 
16646 instruct replicate2D(vecX dst, vRegD src)
16647 %{
16648   predicate(n->as_Vector()->length() == 2);
16649   match(Set dst (ReplicateD src));
16650   ins_cost(INSN_COST);
16651   format %{ "dup  $dst, $src\t# vector (2D)" %}
16652   ins_encode %{
16653     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16654            as_FloatRegister($src$$reg));
16655   %}
16656   ins_pipe(vdup_reg_dreg128);
16657 %}
16658 
16659 // ====================REDUCTION ARITHMETIC====================================
16660 
16661 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
16662 %{
16663   match(Set dst (AddReductionVI isrc vsrc));
16664   ins_cost(INSN_COST);
16665   effect(TEMP tmp, TEMP tmp2);
16666   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
16667             "umov  $tmp2, $vsrc, S, 1\n\t"
16668             "addw  $tmp, $isrc, $tmp\n\t"

 1153 
 1154 // Class for 128 bit register v30
 1155 reg_class v30_reg(
 1156     V30, V30_H
 1157 );
 1158 
 1159 // Class for 128 bit register v31
 1160 reg_class v31_reg(
 1161     V31, V31_H
 1162 );
 1163 
 1164 // Class for all SVE predicate registers.
 1165 reg_class pr_reg (
 1166     P0,
 1167     P1,
 1168     P2,
 1169     P3,
 1170     P4,
 1171     P5,
 1172     P6,
 1173     // P7, non-allocatable, preserved with all elements preset to TRUE.
 1174     P8,
 1175     P9,
 1176     P10,
 1177     P11,
 1178     P12,
 1179     P13,
 1180     P14,
 1181     P15
 1182 );
 1183 
 1184 // Class for SVE governing predicate registers, which are used
 1185 // to determine the active elements of a predicated instruction.
 1186 reg_class gov_pr (
 1187     P0,
 1188     P1,
 1189     P2,
 1190     P3,
 1191     P4,
 1192     P5,
 1193     P6,
 1194     // P7, non-allocatable, preserved with all elements preset to TRUE.
 1195 );
 1196 
 1197 // Singleton class for condition codes
 1198 reg_class int_flags(RFLAGS);
 1199 
 1200 %}
 1201 
 1202 //----------DEFINITION BLOCK---------------------------------------------------
 1203 // Define name --> value mappings to inform the ADLC of an integer valued name
 1204 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 1205 // Format:
 1206 //        int_def  <name>         ( <int_value>, <expression>);
 1207 // Generated Code in ad_<arch>.hpp
 1208 //        #define  <name>   (<expression>)
 1209 //        // value == <int_value>
 1210 // Generated code in ad_<arch>.cpp adlc_verification()
 1211 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 1212 //
 1213 
 1214 // we follow the ppc-aix port in using a simple cost model which ranks

 1881 
 1882   // n.b. frame size includes space for return pc and rfp
 1883   const int framesize = C->output()->frame_size_in_bytes();
 1884   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
 1885 
 1886   // insert a nop at the start of the prolog so we can patch in a
 1887   // branch if we need to invalidate the method later
 1888   __ nop();
 1889 
 1890   if (C->clinit_barrier_on_entry()) {
 1891     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1892 
 1893     Label L_skip_barrier;
 1894 
 1895     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
 1896     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 1897     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   if (UseSVE > 0 && C->max_vector_size() >= 16) {
 1902     __ reinitialize_ptrue();
 1903   }
 1904 
 1905   int bangsize = C->output()->bang_size_in_bytes();
 1906   if (C->output()->need_stack_bang(bangsize) && UseStackBanging)
 1907     __ generate_stack_overflow_check(bangsize);
 1908 
 1909   __ build_frame(framesize);
 1910 
 1911   if (C->stub_function() == NULL) {
 1912     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1913     bs->nmethod_entry_barrier(&_masm);
 1914   }
 1915 
 1916   if (VerifyStackAtCalls) {
 1917     Unimplemented();
 1918   }
 1919 
 1920   C->output()->set_frame_complete(cbuf.insts_size());
 1921 
 1922   if (C->has_mach_constant_base_node()) {
 1923     // NOTE: We set the table base offset here because users might be
 1924     // emitted before MachConstantBaseNode.

 2049   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
 2050 
 2051   if (src_hi != OptoReg::Bad) {
 2052     assert((src_lo&1)==0 && src_lo+1==src_hi &&
 2053            (dst_lo&1)==0 && dst_lo+1==dst_hi,
 2054            "expected aligned-adjacent pairs");
 2055   }
 2056 
 2057   if (src_lo == dst_lo && src_hi == dst_hi) {
 2058     return 0;            // Self copy, no move.
 2059   }
 2060 
 2061   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
 2062               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
 2063   int src_offset = ra_->reg2offset(src_lo);
 2064   int dst_offset = ra_->reg2offset(dst_lo);
 2065 
 2066   if (bottom_type()->isa_vect() != NULL) {
 2067     uint ireg = ideal_reg();
 2068     if (ireg == Op_VecA && cbuf) {
 2069       C2_MacroAssembler _masm(cbuf);
 2070       int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
 2071       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 2072         // stack->stack
 2073         __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset,
 2074                                                 sve_vector_reg_size_in_bytes);
 2075       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
 2076         __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo),
 2077                             sve_vector_reg_size_in_bytes);
 2078       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
 2079         __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo),
 2080                               sve_vector_reg_size_in_bytes);
 2081       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
 2082         __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
 2083                    as_FloatRegister(Matcher::_regEncode[src_lo]),
 2084                    as_FloatRegister(Matcher::_regEncode[src_lo]));
 2085       } else {
 2086         ShouldNotReachHere();
 2087       }
 2088     } else if (cbuf) {
 2089       assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
 2090       C2_MacroAssembler _masm(cbuf);
 2091       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
 2092       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 2093         // stack->stack
 2094         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
 2095         if (ireg == Op_VecD) {
 2096           __ unspill(rscratch1, true, src_offset);
 2097           __ spill(rscratch1, true, dst_offset);
 2098         } else {
 2099           __ spill_copy128(src_offset, dst_offset);
 2100         }
 2101       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
 2102         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
 2103                ireg == Op_VecD ? __ T8B : __ T16B,
 2104                as_FloatRegister(Matcher::_regEncode[src_lo]));
 2105       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
 2106         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
 2107                  ireg == Op_VecD ? __ D : __ Q,

 2360   if (!has_match_rule(opcode))
 2361     return false;
 2362 
 2363   bool ret_value = true;
 2364   switch (opcode) {
 2365     case Op_CacheWB:
 2366     case Op_CacheWBPreSync:
 2367     case Op_CacheWBPostSync:
 2368       if (!VM_Version::supports_data_cache_line_flush()) {
 2369         ret_value = false;
 2370       }
 2371       break;
 2372   }
 2373 
 2374   return ret_value; // Per default match rules are supported.
 2375 }
 2376 
 2377 // Identify extra cases that we might want to provide match rules for vector nodes and
 2378 // other intrinsics guarded with vector length (vlen) and element type (bt).
 2379 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 2380   if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
 2381     return false;
 2382   }
 2383   int bit_size = vlen * type2aelembytes(bt) * 8;
 2384   if (UseSVE == 0 && bit_size > 128) {
 2385     return false;
 2386   }
 2387   if (UseSVE > 0) {
 2388     return op_sve_supported(opcode);
 2389   } else { // NEON
 2390     // Special cases
 2391     switch (opcode) {
 2392     case Op_MulAddVS2VI:
 2393       if (bit_size < 128) {
 2394         return false;
 2395       }
 2396       break;
 2397     case Op_MulVL:
 2398       return false;
 2399     default:
 2400       break;
 2401     }
 2402   }

 2403   return true; // Per default match rules are supported.
 2404 }
 2405 
 2406 const bool Matcher::has_predicated_vectors(void) {
 2407   return UseSVE > 0;
 2408 }
 2409 
 2410 const int Matcher::float_pressure(int default_pressure_threshold) {
 2411   return default_pressure_threshold;
 2412 }
 2413 
 2414 int Matcher::regnum_to_fpu_offset(int regnum)
 2415 {
 2416   Unimplemented();
 2417   return 0;
 2418 }
 2419 
 2420 // Is this branch offset short enough that a short branch can be used?
 2421 //
 2422 // NOTE: If the platform does not provide any short branch variants, then
 2423 //       this method should return false for offset 0.
 2424 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2425   // The passed offset is relative to address of the branch.
 2426 
 2427   return (-32768 <= offset && offset < 32768);
 2428 }
 2429 
 2430 const bool Matcher::isSimpleConstant64(jlong value) {
 2431   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
 2432   // Probably always true, even if a temp register is required.
 2433   return true;
 2434 }
 2435 
 2436 // true just means we have fast l2f conversion
 2437 const bool Matcher::convL2FSupported(void) {
 2438   return true;
 2439 }
 2440 
 2441 // Vector width in bytes.
 2442 const int Matcher::vector_width_in_bytes(BasicType bt) {
 2443   // The MaxVectorSize should have been set by detecting SVE max vector register size.
 2444   int size = MIN2((UseSVE > 0) ? 256 : 16, (int)MaxVectorSize);
 2445   // Minimum 2 values in vector
 2446   if (size < 2*type2aelembytes(bt)) size = 0;
 2447   // But never < 4
 2448   if (size < 4) size = 0;
 2449   return size;
 2450 }
 2451 
 2452 // Limits on vector size (number of elements) loaded into vector.
 2453 const int Matcher::max_vector_size(const BasicType bt) {
 2454   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 2455 }
 2456 const int Matcher::min_vector_size(const BasicType bt) {
 2457   int max_size = max_vector_size(bt);
 2458   if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
 2459     // Currently vector length less than SVE vector register size is not supported.
 2460     return max_size;
 2461   } else {
 2462     //  For the moment limit the vector size to 8 bytes with NEON.
 2463     int size = 8 / type2aelembytes(bt);
 2464     if (size < 2) size = 2;

 3731     address call;
 3732     if (!_method) {
 3733       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
 3734       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
 3735     } else {
 3736       int method_index = resolved_method_index(cbuf);
 3737       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 3738                                                   : static_call_Relocation::spec(method_index);
 3739       call = __ trampoline_call(Address(addr, rspec), &cbuf);
 3740 
 3741       // Emit stub for static call
 3742       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 3743       if (stub == NULL) {
 3744         ciEnv::current()->record_failure("CodeCache is full");
 3745         return;
 3746       }
 3747     }
 3748     if (call == NULL) {
 3749       ciEnv::current()->record_failure("CodeCache is full");
 3750       return;
 3751     } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
 3752       // Only non uncommon_trap calls need to reinitialize ptrue.
 3753       if (uncommon_trap_request() == 0) {
 3754         __ reinitialize_ptrue();
 3755       }
 3756     }
 3757   %}
 3758 
 3759   enc_class aarch64_enc_java_dynamic_call(method meth) %{
 3760     C2_MacroAssembler _masm(&cbuf);
 3761     int method_index = resolved_method_index(cbuf);
 3762     address call = __ ic_call((address)$meth$$method, method_index);
 3763     if (call == NULL) {
 3764       ciEnv::current()->record_failure("CodeCache is full");
 3765       return;
 3766     } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
 3767       __ reinitialize_ptrue();
 3768     }
 3769   %}
 3770 
 3771   enc_class aarch64_enc_call_epilog() %{
 3772     C2_MacroAssembler _masm(&cbuf);
 3773     if (VerifyStackAtCalls) {
 3774       // Check that stack depth is unchanged: find majik cookie on stack
 3775       __ call_Unimplemented();
 3776     }
 3777   %}
 3778 
 3779   enc_class aarch64_enc_java_to_runtime(method meth) %{
 3780     C2_MacroAssembler _masm(&cbuf);
 3781 
 3782     // some calls to generated routines (arraycopy code) are scheduled
 3783     // by C2 as runtime calls. if so we can call them using a br (they
 3784     // will be in a reachable segment) otherwise we have to use a blr
 3785     // which loads the absolute address into a register.
 3786     address entry = (address)$meth$$method;
 3787     CodeBlob *cb = CodeCache::find_blob(entry);
 3788     if (cb) {
 3789       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
 3790       if (call == NULL) {
 3791         ciEnv::current()->record_failure("CodeCache is full");
 3792         return;
 3793       }
 3794     } else {
 3795       Label retaddr;
 3796       __ adr(rscratch2, retaddr);
 3797       __ lea(rscratch1, RuntimeAddress(entry));
 3798       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
 3799       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
 3800       __ blr(rscratch1);
 3801       __ bind(retaddr);
 3802       __ add(sp, sp, 2 * wordSize);
 3803     }
 3804     if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
 3805       __ reinitialize_ptrue();
 3806     }
 3807   %}
 3808 
 3809   enc_class aarch64_enc_rethrow() %{
 3810     C2_MacroAssembler _masm(&cbuf);
 3811     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
 3812   %}
 3813 
 3814   enc_class aarch64_enc_ret() %{
 3815     C2_MacroAssembler _masm(&cbuf);
 3816 #ifdef ASSERT
 3817     if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
 3818       __ verify_ptrue();
 3819     }
 3820 #endif
 3821     __ ret(lr);
 3822   %}
 3823 
 3824   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
 3825     C2_MacroAssembler _masm(&cbuf);
 3826     Register target_reg = as_Register($jump_target$$reg);
 3827     __ br(target_reg);
 3828   %}
 3829 
 3830   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
 3831     C2_MacroAssembler _masm(&cbuf);
 3832     Register target_reg = as_Register($jump_target$$reg);
 3833     // exception oop should be in r0
 3834     // ret addr has been popped into lr
 3835     // callee expects it in r3
 3836     __ mov(r3, lr);
 3837     __ br(target_reg);
 3838   %}
 3839 
 3840   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{

 4564 operand immLoffset8()
 4565 %{
 4566   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
 4567   match(ConL);
 4568 
 4569   op_cost(0);
 4570   format %{ %}
 4571   interface(CONST_INTER);
 4572 %}
 4573 
 4574 operand immLoffset16()
 4575 %{
 4576   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
 4577   match(ConL);
 4578 
 4579   op_cost(0);
 4580   format %{ %}
 4581   interface(CONST_INTER);
 4582 %}
 4583 
 4584 // 8 bit signed value.
 4585 operand immI8()
 4586 %{
 4587   predicate(n->get_int() <= 127 && n->get_int() >= -128);
 4588   match(ConI);
 4589 
 4590   op_cost(0);
 4591   format %{ %}
 4592   interface(CONST_INTER);
 4593 %}
 4594 
 4595 // 8 bit signed value (simm8), or #simm8 LSL 8.
 4596 operand immI8_shift8()
 4597 %{
 4598   predicate((n->get_int() <= 127 && n->get_int() >= -128) ||
 4599             (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0));
 4600   match(ConI);
 4601 
 4602   op_cost(0);
 4603   format %{ %}
 4604   interface(CONST_INTER);
 4605 %}
 4606 
 4607 // 8 bit signed value (simm8), or #simm8 LSL 8.
 4608 operand immL8_shift8()
 4609 %{
 4610   predicate((n->get_long() <= 127 && n->get_long() >= -128) ||
 4611             (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0));
 4612   match(ConL);
 4613 
 4614   op_cost(0);
 4615   format %{ %}
 4616   interface(CONST_INTER);
 4617 %}
 4618 
 4619 // 32 bit integer valid for add sub immediate
 4620 operand immIAddSub()
 4621 %{
 4622   predicate(Assembler::operand_valid_for_add_sub_immediate((int64_t)n->get_int()));
 4623   match(ConI);
 4624   op_cost(0);
 4625   format %{ %}
 4626   interface(CONST_INTER);
 4627 %}
 4628 
 4629 // 32 bit unsigned integer valid for logical immediate
 4630 // TODO -- check this is right when e.g the mask is 0x80000000
 4631 operand immILog()
 4632 %{
 4633   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (uint64_t)n->get_int()));
 4634   match(ConI);
 4635 
 4636   op_cost(0);
 4637   format %{ %}
 4638   interface(CONST_INTER);

16467   ins_cost(4 * INSN_COST);
16468   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16469   ins_encode( aarch64_enc_ldrvS(dst, mem) );
16470   ins_pipe(vload_reg_mem64);
16471 %}
16472 
16473 // Load vector (64 bits)
16474 instruct loadV8(vecD dst, vmem8 mem)
16475 %{
16476   predicate(n->as_LoadVector()->memory_size() == 8);
16477   match(Set dst (LoadVector mem));
16478   ins_cost(4 * INSN_COST);
16479   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16480   ins_encode( aarch64_enc_ldrvD(dst, mem) );
16481   ins_pipe(vload_reg_mem64);
16482 %}
16483 
16484 // Load Vector (128 bits)
16485 instruct loadV16(vecX dst, vmem16 mem)
16486 %{
16487   predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16);
16488   match(Set dst (LoadVector mem));
16489   ins_cost(4 * INSN_COST);
16490   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16491   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16492   ins_pipe(vload_reg_mem128);
16493 %}
16494 
16495 // Store Vector (32 bits)
16496 instruct storeV4(vecD src, vmem4 mem)
16497 %{
16498   predicate(n->as_StoreVector()->memory_size() == 4);
16499   match(Set mem (StoreVector mem src));
16500   ins_cost(4 * INSN_COST);
16501   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16502   ins_encode( aarch64_enc_strvS(src, mem) );
16503   ins_pipe(vstore_reg_mem64);
16504 %}
16505 
16506 // Store Vector (64 bits)
16507 instruct storeV8(vecD src, vmem8 mem)

16523   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16524   ins_encode( aarch64_enc_strvQ(src, mem) );
16525   ins_pipe(vstore_reg_mem128);
16526 %}
16527 
16528 instruct replicate8B(vecD dst, iRegIorL2I src)
16529 %{
16530   predicate(n->as_Vector()->length() == 4 ||
16531             n->as_Vector()->length() == 8);
16532   match(Set dst (ReplicateB src));
16533   ins_cost(INSN_COST);
16534   format %{ "dup  $dst, $src\t# vector (8B)" %}
16535   ins_encode %{
16536     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16537   %}
16538   ins_pipe(vdup_reg_reg64);
16539 %}
16540 
16541 instruct replicate16B(vecX dst, iRegIorL2I src)
16542 %{
16543   predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
16544   match(Set dst (ReplicateB src));
16545   ins_cost(INSN_COST);
16546   format %{ "dup  $dst, $src\t# vector (16B)" %}
16547   ins_encode %{
16548     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16549   %}
16550   ins_pipe(vdup_reg_reg128);
16551 %}
16552 
16553 instruct replicate8B_imm(vecD dst, immI con)
16554 %{
16555   predicate(n->as_Vector()->length() == 4 ||
16556             n->as_Vector()->length() == 8);
16557   match(Set dst (ReplicateB con));
16558   ins_cost(INSN_COST);
16559   format %{ "movi  $dst, $con\t# vector(8B)" %}
16560   ins_encode %{
16561     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16562   %}
16563   ins_pipe(vmovi_reg_imm64);
16564 %}
16565 
16566 instruct replicate16B_imm(vecX dst, immI con)
16567 %{
16568   predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
16569   match(Set dst (ReplicateB con));
16570   ins_cost(INSN_COST);
16571   format %{ "movi  $dst, $con\t# vector(16B)" %}
16572   ins_encode %{
16573     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16574   %}
16575   ins_pipe(vmovi_reg_imm128);
16576 %}
16577 
16578 instruct replicate4S(vecD dst, iRegIorL2I src)
16579 %{
16580   predicate(n->as_Vector()->length() == 2 ||
16581             n->as_Vector()->length() == 4);
16582   match(Set dst (ReplicateS src));
16583   ins_cost(INSN_COST);
16584   format %{ "dup  $dst, $src\t# vector (4S)" %}
16585   ins_encode %{
16586     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16587   %}
16588   ins_pipe(vdup_reg_reg64);
16589 %}
16590 
16591 instruct replicate8S(vecX dst, iRegIorL2I src)
16592 %{
16593   predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
16594   match(Set dst (ReplicateS src));
16595   ins_cost(INSN_COST);
16596   format %{ "dup  $dst, $src\t# vector (8S)" %}
16597   ins_encode %{
16598     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16599   %}
16600   ins_pipe(vdup_reg_reg128);
16601 %}
16602 
16603 instruct replicate4S_imm(vecD dst, immI con)
16604 %{
16605   predicate(n->as_Vector()->length() == 2 ||
16606             n->as_Vector()->length() == 4);
16607   match(Set dst (ReplicateS con));
16608   ins_cost(INSN_COST);
16609   format %{ "movi  $dst, $con\t# vector(4H)" %}
16610   ins_encode %{
16611     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16612   %}
16613   ins_pipe(vmovi_reg_imm64);
16614 %}
16615 
16616 instruct replicate8S_imm(vecX dst, immI con)
16617 %{
16618   predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
16619   match(Set dst (ReplicateS con));
16620   ins_cost(INSN_COST);
16621   format %{ "movi  $dst, $con\t# vector(8H)" %}
16622   ins_encode %{
16623     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16624   %}
16625   ins_pipe(vmovi_reg_imm128);
16626 %}
16627 
16628 instruct replicate2I(vecD dst, iRegIorL2I src)
16629 %{
16630   predicate(n->as_Vector()->length() == 2);
16631   match(Set dst (ReplicateI src));
16632   ins_cost(INSN_COST);
16633   format %{ "dup  $dst, $src\t# vector (2I)" %}
16634   ins_encode %{
16635     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16636   %}
16637   ins_pipe(vdup_reg_reg64);
16638 %}
16639 
16640 instruct replicate4I(vecX dst, iRegIorL2I src)
16641 %{
16642   predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
16643   match(Set dst (ReplicateI src));
16644   ins_cost(INSN_COST);
16645   format %{ "dup  $dst, $src\t# vector (4I)" %}
16646   ins_encode %{
16647     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16648   %}
16649   ins_pipe(vdup_reg_reg128);
16650 %}
16651 
16652 instruct replicate2I_imm(vecD dst, immI con)
16653 %{
16654   predicate(n->as_Vector()->length() == 2);
16655   match(Set dst (ReplicateI con));
16656   ins_cost(INSN_COST);
16657   format %{ "movi  $dst, $con\t# vector(2I)" %}
16658   ins_encode %{
16659     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16660   %}
16661   ins_pipe(vmovi_reg_imm64);
16662 %}
16663 
16664 instruct replicate4I_imm(vecX dst, immI con)
16665 %{
16666   predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
16667   match(Set dst (ReplicateI con));
16668   ins_cost(INSN_COST);
16669   format %{ "movi  $dst, $con\t# vector(4I)" %}
16670   ins_encode %{
16671     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16672   %}
16673   ins_pipe(vmovi_reg_imm128);
16674 %}
16675 
16676 instruct replicate2L(vecX dst, iRegL src)
16677 %{
16678   predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
16679   match(Set dst (ReplicateL src));
16680   ins_cost(INSN_COST);
16681   format %{ "dup  $dst, $src\t# vector (2L)" %}
16682   ins_encode %{
16683     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16684   %}
16685   ins_pipe(vdup_reg_reg128);
16686 %}
16687 
16688 instruct replicate2L_zero(vecX dst, immI0 zero)
16689 %{
16690   predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
16691   match(Set dst (ReplicateI zero));
16692   ins_cost(INSN_COST);
16693   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16694   ins_encode %{
16695     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16696            as_FloatRegister($dst$$reg),
16697            as_FloatRegister($dst$$reg));
16698   %}
16699   ins_pipe(vmovi_reg_imm128);
16700 %}
16701 
16702 instruct replicate2F(vecD dst, vRegF src)
16703 %{
16704   predicate(n->as_Vector()->length() == 2);
16705   match(Set dst (ReplicateF src));
16706   ins_cost(INSN_COST);
16707   format %{ "dup  $dst, $src\t# vector (2F)" %}
16708   ins_encode %{
16709     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16710            as_FloatRegister($src$$reg));
16711   %}
16712   ins_pipe(vdup_reg_freg64);
16713 %}
16714 
16715 instruct replicate4F(vecX dst, vRegF src)
16716 %{
16717   predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
16718   match(Set dst (ReplicateF src));
16719   ins_cost(INSN_COST);
16720   format %{ "dup  $dst, $src\t# vector (4F)" %}
16721   ins_encode %{
16722     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16723            as_FloatRegister($src$$reg));
16724   %}
16725   ins_pipe(vdup_reg_freg128);
16726 %}
16727 
16728 instruct replicate2D(vecX dst, vRegD src)
16729 %{
16730   predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
16731   match(Set dst (ReplicateD src));
16732   ins_cost(INSN_COST);
16733   format %{ "dup  $dst, $src\t# vector (2D)" %}
16734   ins_encode %{
16735     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16736            as_FloatRegister($src$$reg));
16737   %}
16738   ins_pipe(vdup_reg_dreg128);
16739 %}
16740 
16741 // ====================REDUCTION ARITHMETIC====================================
16742 
16743 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
16744 %{
16745   match(Set dst (AddReductionVI isrc vsrc));
16746   ins_cost(INSN_COST);
16747   effect(TEMP tmp, TEMP tmp2);
16748   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
16749             "umov  $tmp2, $vsrc, S, 1\n\t"
16750             "addw  $tmp, $isrc, $tmp\n\t"
< prev index next >