1153
1154 // Class for 128 bit register v30
1155 reg_class v30_reg(
1156 V30, V30_H
1157 );
1158
1159 // Class for 128 bit register v31
1160 reg_class v31_reg(
1161 V31, V31_H
1162 );
1163
1164 // Class for all SVE predicate registers.
1165 reg_class pr_reg (
1166 P0,
1167 P1,
1168 P2,
1169 P3,
1170 P4,
1171 P5,
1172 P6,
1173 P7,
1174 P8,
1175 P9,
1176 P10,
1177 P11,
1178 P12,
1179 P13,
1180 P14,
1181 P15
1182 );
1183
1184 // Class for SVE governing predicate registers, which are used
1185 // to determine the active elements of a predicated instruction.
1186 reg_class gov_pr (
1187 P0,
1188 P1,
1189 P2,
1190 P3,
1191 P4,
1192 P5,
1193 P6,
1194 P7
1195 );
1196
1197 // Singleton class for condition codes
1198 reg_class int_flags(RFLAGS);
1199
1200 %}
1201
1202 //----------DEFINITION BLOCK---------------------------------------------------
1203 // Define name --> value mappings to inform the ADLC of an integer valued name
1204 // Current support includes integer values in the range [0, 0x7FFFFFFF]
1205 // Format:
1206 // int_def <name> ( <int_value>, <expression>);
1207 // Generated Code in ad_<arch>.hpp
1208 // #define <name> (<expression>)
1209 // // value == <int_value>
1210 // Generated code in ad_<arch>.cpp adlc_verification()
1211 // assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
1212 //
1213
1214 // we follow the ppc-aix port in using a simple cost model which ranks
1881
1882 // n.b. frame size includes space for return pc and rfp
1883 const int framesize = C->output()->frame_size_in_bytes();
1884 assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1885
1886 // insert a nop at the start of the prolog so we can patch in a
1887 // branch if we need to invalidate the method later
1888 __ nop();
1889
1890 if (C->clinit_barrier_on_entry()) {
1891 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1892
1893 Label L_skip_barrier;
1894
1895 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1896 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1897 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 int bangsize = C->output()->bang_size_in_bytes();
1902 if (C->output()->need_stack_bang(bangsize) && UseStackBanging)
1903 __ generate_stack_overflow_check(bangsize);
1904
1905 __ build_frame(framesize);
1906
1907 if (C->stub_function() == NULL) {
1908 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1909 bs->nmethod_entry_barrier(&_masm);
1910 }
1911
1912 if (VerifyStackAtCalls) {
1913 Unimplemented();
1914 }
1915
1916 C->output()->set_frame_complete(cbuf.insts_size());
1917
1918 if (C->has_mach_constant_base_node()) {
1919 // NOTE: We set the table base offset here because users might be
1920 // emitted before MachConstantBaseNode.
2045 assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
2046
2047 if (src_hi != OptoReg::Bad) {
2048 assert((src_lo&1)==0 && src_lo+1==src_hi &&
2049 (dst_lo&1)==0 && dst_lo+1==dst_hi,
2050 "expected aligned-adjacent pairs");
2051 }
2052
2053 if (src_lo == dst_lo && src_hi == dst_hi) {
2054 return 0; // Self copy, no move.
2055 }
2056
2057 bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
2058 (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
2059 int src_offset = ra_->reg2offset(src_lo);
2060 int dst_offset = ra_->reg2offset(dst_lo);
2061
2062 if (bottom_type()->isa_vect() != NULL) {
2063 uint ireg = ideal_reg();
2064 if (ireg == Op_VecA && cbuf) {
2065 Unimplemented();
2066 } else if (cbuf) {
2067 assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
2068 C2_MacroAssembler _masm(cbuf);
2069 assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
2070 if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2071 // stack->stack
2072 assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
2073 if (ireg == Op_VecD) {
2074 __ unspill(rscratch1, true, src_offset);
2075 __ spill(rscratch1, true, dst_offset);
2076 } else {
2077 __ spill_copy128(src_offset, dst_offset);
2078 }
2079 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2080 __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2081 ireg == Op_VecD ? __ T8B : __ T16B,
2082 as_FloatRegister(Matcher::_regEncode[src_lo]));
2083 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2084 __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
2085 ireg == Op_VecD ? __ D : __ Q,
2338 if (!has_match_rule(opcode))
2339 return false;
2340
2341 bool ret_value = true;
2342 switch (opcode) {
2343 case Op_CacheWB:
2344 case Op_CacheWBPreSync:
2345 case Op_CacheWBPostSync:
2346 if (!VM_Version::supports_data_cache_line_flush()) {
2347 ret_value = false;
2348 }
2349 break;
2350 }
2351
2352 return ret_value; // Per default match rules are supported.
2353 }
2354
2355 // Identify extra cases that we might want to provide match rules for vector nodes and
2356 // other intrinsics guarded with vector length (vlen) and element type (bt).
2357 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
2358 if (!match_rule_supported(opcode)) {
2359 return false;
2360 }
2361
2362 // Special cases which require vector length
2363 switch (opcode) {
2364 case Op_MulAddVS2VI: {
2365 if (vlen != 4) {
2366 return false;
2367 }
2368 break;
2369 }
2370 }
2371
2372 return true; // Per default match rules are supported.
2373 }
2374
2375 const bool Matcher::has_predicated_vectors(void) {
2376 return UseSVE > 0;
2377 }
2378
2379 const int Matcher::float_pressure(int default_pressure_threshold) {
2380 return default_pressure_threshold;
2381 }
2382
2383 int Matcher::regnum_to_fpu_offset(int regnum)
2384 {
2385 Unimplemented();
2386 return 0;
2387 }
2388
2389 // Is this branch offset short enough that a short branch can be used?
2390 //
2391 // NOTE: If the platform does not provide any short branch variants, then
2392 // this method should return false for offset 0.
2393 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2394 // The passed offset is relative to address of the branch.
2395
2396 return (-32768 <= offset && offset < 32768);
2397 }
2398
2399 const bool Matcher::isSimpleConstant64(jlong value) {
2400 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2401 // Probably always true, even if a temp register is required.
2402 return true;
2403 }
2404
2405 // true just means we have fast l2f conversion
2406 const bool Matcher::convL2FSupported(void) {
2407 return true;
2408 }
2409
2410 // Vector width in bytes.
2411 const int Matcher::vector_width_in_bytes(BasicType bt) {
2412 int size = MIN2(16, (int)MaxVectorSize);
2413 // Minimum 2 values in vector
2414 if (size < 2*type2aelembytes(bt)) size = 0;
2415 // But never < 4
2416 if (size < 4) size = 0;
2417 return size;
2418 }
2419
2420 // Limits on vector size (number of elements) loaded into vector.
2421 const int Matcher::max_vector_size(const BasicType bt) {
2422 return vector_width_in_bytes(bt)/type2aelembytes(bt);
2423 }
2424 const int Matcher::min_vector_size(const BasicType bt) {
2425 int max_size = max_vector_size(bt);
2426 if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
2427 // Currently vector length less than SVE vector register size is not supported.
2428 return max_size;
2429 } else {
2430 // For the moment limit the vector size to 8 bytes with NEON.
2431 int size = 8 / type2aelembytes(bt);
2432 if (size < 2) size = 2;
3699 address call;
3700 if (!_method) {
3701 // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3702 call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3703 } else {
3704 int method_index = resolved_method_index(cbuf);
3705 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3706 : static_call_Relocation::spec(method_index);
3707 call = __ trampoline_call(Address(addr, rspec), &cbuf);
3708
3709 // Emit stub for static call
3710 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3711 if (stub == NULL) {
3712 ciEnv::current()->record_failure("CodeCache is full");
3713 return;
3714 }
3715 }
3716 if (call == NULL) {
3717 ciEnv::current()->record_failure("CodeCache is full");
3718 return;
3719 }
3720 %}
3721
3722 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3723 C2_MacroAssembler _masm(&cbuf);
3724 int method_index = resolved_method_index(cbuf);
3725 address call = __ ic_call((address)$meth$$method, method_index);
3726 if (call == NULL) {
3727 ciEnv::current()->record_failure("CodeCache is full");
3728 return;
3729 }
3730 %}
3731
3732 enc_class aarch64_enc_call_epilog() %{
3733 C2_MacroAssembler _masm(&cbuf);
3734 if (VerifyStackAtCalls) {
3735 // Check that stack depth is unchanged: find majik cookie on stack
3736 __ call_Unimplemented();
3737 }
3738 %}
3739
3740 enc_class aarch64_enc_java_to_runtime(method meth) %{
3741 C2_MacroAssembler _masm(&cbuf);
3742
3743 // some calls to generated routines (arraycopy code) are scheduled
3744 // by C2 as runtime calls. if so we can call them using a br (they
3745 // will be in a reachable segment) otherwise we have to use a blr
3746 // which loads the absolute address into a register.
3747 address entry = (address)$meth$$method;
3748 CodeBlob *cb = CodeCache::find_blob(entry);
3749 if (cb) {
3750 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3751 if (call == NULL) {
3752 ciEnv::current()->record_failure("CodeCache is full");
3753 return;
3754 }
3755 } else {
3756 Label retaddr;
3757 __ adr(rscratch2, retaddr);
3758 __ lea(rscratch1, RuntimeAddress(entry));
3759 // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3760 __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3761 __ blr(rscratch1);
3762 __ bind(retaddr);
3763 __ add(sp, sp, 2 * wordSize);
3764 }
3765 %}
3766
3767 enc_class aarch64_enc_rethrow() %{
3768 C2_MacroAssembler _masm(&cbuf);
3769 __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3770 %}
3771
3772 enc_class aarch64_enc_ret() %{
3773 C2_MacroAssembler _masm(&cbuf);
3774 __ ret(lr);
3775 %}
3776
3777 enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3778 C2_MacroAssembler _masm(&cbuf);
3779 Register target_reg = as_Register($jump_target$$reg);
3780 __ br(target_reg);
3781 %}
3782
3783 enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3784 C2_MacroAssembler _masm(&cbuf);
3785 Register target_reg = as_Register($jump_target$$reg);
3786 // exception oop should be in r0
3787 // ret addr has been popped into lr
3788 // callee expects it in r3
3789 __ mov(r3, lr);
3790 __ br(target_reg);
3791 %}
3792
3793 enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4517 operand immLoffset8()
4518 %{
4519 predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4520 match(ConL);
4521
4522 op_cost(0);
4523 format %{ %}
4524 interface(CONST_INTER);
4525 %}
4526
4527 operand immLoffset16()
4528 %{
4529 predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4530 match(ConL);
4531
4532 op_cost(0);
4533 format %{ %}
4534 interface(CONST_INTER);
4535 %}
4536
4537 // 32 bit integer valid for add sub immediate
4538 operand immIAddSub()
4539 %{
4540 predicate(Assembler::operand_valid_for_add_sub_immediate((int64_t)n->get_int()));
4541 match(ConI);
4542 op_cost(0);
4543 format %{ %}
4544 interface(CONST_INTER);
4545 %}
4546
4547 // 32 bit unsigned integer valid for logical immediate
4548 // TODO -- check this is right when e.g the mask is 0x80000000
4549 operand immILog()
4550 %{
4551 predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (uint64_t)n->get_int()));
4552 match(ConI);
4553
4554 op_cost(0);
4555 format %{ %}
4556 interface(CONST_INTER);
16385 ins_cost(4 * INSN_COST);
16386 format %{ "ldrs $dst,$mem\t# vector (32 bits)" %}
16387 ins_encode( aarch64_enc_ldrvS(dst, mem) );
16388 ins_pipe(vload_reg_mem64);
16389 %}
16390
16391 // Load vector (64 bits)
16392 instruct loadV8(vecD dst, vmem8 mem)
16393 %{
16394 predicate(n->as_LoadVector()->memory_size() == 8);
16395 match(Set dst (LoadVector mem));
16396 ins_cost(4 * INSN_COST);
16397 format %{ "ldrd $dst,$mem\t# vector (64 bits)" %}
16398 ins_encode( aarch64_enc_ldrvD(dst, mem) );
16399 ins_pipe(vload_reg_mem64);
16400 %}
16401
16402 // Load Vector (128 bits)
16403 instruct loadV16(vecX dst, vmem16 mem)
16404 %{
16405 predicate(n->as_LoadVector()->memory_size() == 16);
16406 match(Set dst (LoadVector mem));
16407 ins_cost(4 * INSN_COST);
16408 format %{ "ldrq $dst,$mem\t# vector (128 bits)" %}
16409 ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16410 ins_pipe(vload_reg_mem128);
16411 %}
16412
16413 // Store Vector (32 bits)
16414 instruct storeV4(vecD src, vmem4 mem)
16415 %{
16416 predicate(n->as_StoreVector()->memory_size() == 4);
16417 match(Set mem (StoreVector mem src));
16418 ins_cost(4 * INSN_COST);
16419 format %{ "strs $mem,$src\t# vector (32 bits)" %}
16420 ins_encode( aarch64_enc_strvS(src, mem) );
16421 ins_pipe(vstore_reg_mem64);
16422 %}
16423
16424 // Store Vector (64 bits)
16425 instruct storeV8(vecD src, vmem8 mem)
16441 format %{ "strq $mem,$src\t# vector (128 bits)" %}
16442 ins_encode( aarch64_enc_strvQ(src, mem) );
16443 ins_pipe(vstore_reg_mem128);
16444 %}
16445
16446 instruct replicate8B(vecD dst, iRegIorL2I src)
16447 %{
16448 predicate(n->as_Vector()->length() == 4 ||
16449 n->as_Vector()->length() == 8);
16450 match(Set dst (ReplicateB src));
16451 ins_cost(INSN_COST);
16452 format %{ "dup $dst, $src\t# vector (8B)" %}
16453 ins_encode %{
16454 __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16455 %}
16456 ins_pipe(vdup_reg_reg64);
16457 %}
16458
16459 instruct replicate16B(vecX dst, iRegIorL2I src)
16460 %{
16461 predicate(n->as_Vector()->length() == 16);
16462 match(Set dst (ReplicateB src));
16463 ins_cost(INSN_COST);
16464 format %{ "dup $dst, $src\t# vector (16B)" %}
16465 ins_encode %{
16466 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16467 %}
16468 ins_pipe(vdup_reg_reg128);
16469 %}
16470
16471 instruct replicate8B_imm(vecD dst, immI con)
16472 %{
16473 predicate(n->as_Vector()->length() == 4 ||
16474 n->as_Vector()->length() == 8);
16475 match(Set dst (ReplicateB con));
16476 ins_cost(INSN_COST);
16477 format %{ "movi $dst, $con\t# vector(8B)" %}
16478 ins_encode %{
16479 __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16480 %}
16481 ins_pipe(vmovi_reg_imm64);
16482 %}
16483
16484 instruct replicate16B_imm(vecX dst, immI con)
16485 %{
16486 predicate(n->as_Vector()->length() == 16);
16487 match(Set dst (ReplicateB con));
16488 ins_cost(INSN_COST);
16489 format %{ "movi $dst, $con\t# vector(16B)" %}
16490 ins_encode %{
16491 __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16492 %}
16493 ins_pipe(vmovi_reg_imm128);
16494 %}
16495
16496 instruct replicate4S(vecD dst, iRegIorL2I src)
16497 %{
16498 predicate(n->as_Vector()->length() == 2 ||
16499 n->as_Vector()->length() == 4);
16500 match(Set dst (ReplicateS src));
16501 ins_cost(INSN_COST);
16502 format %{ "dup $dst, $src\t# vector (4S)" %}
16503 ins_encode %{
16504 __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16505 %}
16506 ins_pipe(vdup_reg_reg64);
16507 %}
16508
16509 instruct replicate8S(vecX dst, iRegIorL2I src)
16510 %{
16511 predicate(n->as_Vector()->length() == 8);
16512 match(Set dst (ReplicateS src));
16513 ins_cost(INSN_COST);
16514 format %{ "dup $dst, $src\t# vector (8S)" %}
16515 ins_encode %{
16516 __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16517 %}
16518 ins_pipe(vdup_reg_reg128);
16519 %}
16520
16521 instruct replicate4S_imm(vecD dst, immI con)
16522 %{
16523 predicate(n->as_Vector()->length() == 2 ||
16524 n->as_Vector()->length() == 4);
16525 match(Set dst (ReplicateS con));
16526 ins_cost(INSN_COST);
16527 format %{ "movi $dst, $con\t# vector(4H)" %}
16528 ins_encode %{
16529 __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16530 %}
16531 ins_pipe(vmovi_reg_imm64);
16532 %}
16533
16534 instruct replicate8S_imm(vecX dst, immI con)
16535 %{
16536 predicate(n->as_Vector()->length() == 8);
16537 match(Set dst (ReplicateS con));
16538 ins_cost(INSN_COST);
16539 format %{ "movi $dst, $con\t# vector(8H)" %}
16540 ins_encode %{
16541 __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16542 %}
16543 ins_pipe(vmovi_reg_imm128);
16544 %}
16545
16546 instruct replicate2I(vecD dst, iRegIorL2I src)
16547 %{
16548 predicate(n->as_Vector()->length() == 2);
16549 match(Set dst (ReplicateI src));
16550 ins_cost(INSN_COST);
16551 format %{ "dup $dst, $src\t# vector (2I)" %}
16552 ins_encode %{
16553 __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16554 %}
16555 ins_pipe(vdup_reg_reg64);
16556 %}
16557
16558 instruct replicate4I(vecX dst, iRegIorL2I src)
16559 %{
16560 predicate(n->as_Vector()->length() == 4);
16561 match(Set dst (ReplicateI src));
16562 ins_cost(INSN_COST);
16563 format %{ "dup $dst, $src\t# vector (4I)" %}
16564 ins_encode %{
16565 __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16566 %}
16567 ins_pipe(vdup_reg_reg128);
16568 %}
16569
16570 instruct replicate2I_imm(vecD dst, immI con)
16571 %{
16572 predicate(n->as_Vector()->length() == 2);
16573 match(Set dst (ReplicateI con));
16574 ins_cost(INSN_COST);
16575 format %{ "movi $dst, $con\t# vector(2I)" %}
16576 ins_encode %{
16577 __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16578 %}
16579 ins_pipe(vmovi_reg_imm64);
16580 %}
16581
16582 instruct replicate4I_imm(vecX dst, immI con)
16583 %{
16584 predicate(n->as_Vector()->length() == 4);
16585 match(Set dst (ReplicateI con));
16586 ins_cost(INSN_COST);
16587 format %{ "movi $dst, $con\t# vector(4I)" %}
16588 ins_encode %{
16589 __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16590 %}
16591 ins_pipe(vmovi_reg_imm128);
16592 %}
16593
16594 instruct replicate2L(vecX dst, iRegL src)
16595 %{
16596 predicate(n->as_Vector()->length() == 2);
16597 match(Set dst (ReplicateL src));
16598 ins_cost(INSN_COST);
16599 format %{ "dup $dst, $src\t# vector (2L)" %}
16600 ins_encode %{
16601 __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16602 %}
16603 ins_pipe(vdup_reg_reg128);
16604 %}
16605
16606 instruct replicate2L_zero(vecX dst, immI0 zero)
16607 %{
16608 predicate(n->as_Vector()->length() == 2);
16609 match(Set dst (ReplicateI zero));
16610 ins_cost(INSN_COST);
16611 format %{ "movi $dst, $zero\t# vector(4I)" %}
16612 ins_encode %{
16613 __ eor(as_FloatRegister($dst$$reg), __ T16B,
16614 as_FloatRegister($dst$$reg),
16615 as_FloatRegister($dst$$reg));
16616 %}
16617 ins_pipe(vmovi_reg_imm128);
16618 %}
16619
16620 instruct replicate2F(vecD dst, vRegF src)
16621 %{
16622 predicate(n->as_Vector()->length() == 2);
16623 match(Set dst (ReplicateF src));
16624 ins_cost(INSN_COST);
16625 format %{ "dup $dst, $src\t# vector (2F)" %}
16626 ins_encode %{
16627 __ dup(as_FloatRegister($dst$$reg), __ T2S,
16628 as_FloatRegister($src$$reg));
16629 %}
16630 ins_pipe(vdup_reg_freg64);
16631 %}
16632
16633 instruct replicate4F(vecX dst, vRegF src)
16634 %{
16635 predicate(n->as_Vector()->length() == 4);
16636 match(Set dst (ReplicateF src));
16637 ins_cost(INSN_COST);
16638 format %{ "dup $dst, $src\t# vector (4F)" %}
16639 ins_encode %{
16640 __ dup(as_FloatRegister($dst$$reg), __ T4S,
16641 as_FloatRegister($src$$reg));
16642 %}
16643 ins_pipe(vdup_reg_freg128);
16644 %}
16645
16646 instruct replicate2D(vecX dst, vRegD src)
16647 %{
16648 predicate(n->as_Vector()->length() == 2);
16649 match(Set dst (ReplicateD src));
16650 ins_cost(INSN_COST);
16651 format %{ "dup $dst, $src\t# vector (2D)" %}
16652 ins_encode %{
16653 __ dup(as_FloatRegister($dst$$reg), __ T2D,
16654 as_FloatRegister($src$$reg));
16655 %}
16656 ins_pipe(vdup_reg_dreg128);
16657 %}
16658
16659 // ====================REDUCTION ARITHMETIC====================================
16660
16661 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
16662 %{
16663 match(Set dst (AddReductionVI isrc vsrc));
16664 ins_cost(INSN_COST);
16665 effect(TEMP tmp, TEMP tmp2);
16666 format %{ "umov $tmp, $vsrc, S, 0\n\t"
16667 "umov $tmp2, $vsrc, S, 1\n\t"
16668 "addw $tmp, $isrc, $tmp\n\t"
|
1153
1154 // Class for 128 bit register v30
1155 reg_class v30_reg(
1156 V30, V30_H
1157 );
1158
1159 // Class for 128 bit register v31
1160 reg_class v31_reg(
1161 V31, V31_H
1162 );
1163
1164 // Class for all SVE predicate registers.
1165 reg_class pr_reg (
1166 P0,
1167 P1,
1168 P2,
1169 P3,
1170 P4,
1171 P5,
1172 P6,
1173 // P7, non-allocatable, preserved with all elements preset to TRUE.
1174 P8,
1175 P9,
1176 P10,
1177 P11,
1178 P12,
1179 P13,
1180 P14,
1181 P15
1182 );
1183
1184 // Class for SVE governing predicate registers, which are used
1185 // to determine the active elements of a predicated instruction.
1186 reg_class gov_pr (
1187 P0,
1188 P1,
1189 P2,
1190 P3,
1191 P4,
1192 P5,
1193 P6,
1194 // P7, non-allocatable, preserved with all elements preset to TRUE.
1195 );
1196
1197 // Singleton class for condition codes
1198 reg_class int_flags(RFLAGS);
1199
1200 %}
1201
1202 //----------DEFINITION BLOCK---------------------------------------------------
1203 // Define name --> value mappings to inform the ADLC of an integer valued name
1204 // Current support includes integer values in the range [0, 0x7FFFFFFF]
1205 // Format:
1206 // int_def <name> ( <int_value>, <expression>);
1207 // Generated Code in ad_<arch>.hpp
1208 // #define <name> (<expression>)
1209 // // value == <int_value>
1210 // Generated code in ad_<arch>.cpp adlc_verification()
1211 // assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
1212 //
1213
1214 // we follow the ppc-aix port in using a simple cost model which ranks
1881
1882 // n.b. frame size includes space for return pc and rfp
1883 const int framesize = C->output()->frame_size_in_bytes();
1884 assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1885
1886 // insert a nop at the start of the prolog so we can patch in a
1887 // branch if we need to invalidate the method later
1888 __ nop();
1889
1890 if (C->clinit_barrier_on_entry()) {
1891 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1892
1893 Label L_skip_barrier;
1894
1895 __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1896 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1897 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 if (UseSVE > 0 && C->max_vector_size() >= 16) {
1902 __ reinitialize_ptrue();
1903 }
1904
1905 int bangsize = C->output()->bang_size_in_bytes();
1906 if (C->output()->need_stack_bang(bangsize) && UseStackBanging)
1907 __ generate_stack_overflow_check(bangsize);
1908
1909 __ build_frame(framesize);
1910
1911 if (C->stub_function() == NULL) {
1912 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1913 bs->nmethod_entry_barrier(&_masm);
1914 }
1915
1916 if (VerifyStackAtCalls) {
1917 Unimplemented();
1918 }
1919
1920 C->output()->set_frame_complete(cbuf.insts_size());
1921
1922 if (C->has_mach_constant_base_node()) {
1923 // NOTE: We set the table base offset here because users might be
1924 // emitted before MachConstantBaseNode.
2049 assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
2050
2051 if (src_hi != OptoReg::Bad) {
2052 assert((src_lo&1)==0 && src_lo+1==src_hi &&
2053 (dst_lo&1)==0 && dst_lo+1==dst_hi,
2054 "expected aligned-adjacent pairs");
2055 }
2056
2057 if (src_lo == dst_lo && src_hi == dst_hi) {
2058 return 0; // Self copy, no move.
2059 }
2060
2061 bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
2062 (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
2063 int src_offset = ra_->reg2offset(src_lo);
2064 int dst_offset = ra_->reg2offset(dst_lo);
2065
2066 if (bottom_type()->isa_vect() != NULL) {
2067 uint ireg = ideal_reg();
2068 if (ireg == Op_VecA && cbuf) {
2069 C2_MacroAssembler _masm(cbuf);
2070 int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
2071 if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2072 // stack->stack
2073 __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset,
2074 sve_vector_reg_size_in_bytes);
2075 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2076 __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo),
2077 sve_vector_reg_size_in_bytes);
2078 } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2079 __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo),
2080 sve_vector_reg_size_in_bytes);
2081 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2082 __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2083 as_FloatRegister(Matcher::_regEncode[src_lo]),
2084 as_FloatRegister(Matcher::_regEncode[src_lo]));
2085 } else {
2086 ShouldNotReachHere();
2087 }
2088 } else if (cbuf) {
2089 assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
2090 C2_MacroAssembler _masm(cbuf);
2091 assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
2092 if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2093 // stack->stack
2094 assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
2095 if (ireg == Op_VecD) {
2096 __ unspill(rscratch1, true, src_offset);
2097 __ spill(rscratch1, true, dst_offset);
2098 } else {
2099 __ spill_copy128(src_offset, dst_offset);
2100 }
2101 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2102 __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2103 ireg == Op_VecD ? __ T8B : __ T16B,
2104 as_FloatRegister(Matcher::_regEncode[src_lo]));
2105 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2106 __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
2107 ireg == Op_VecD ? __ D : __ Q,
2360 if (!has_match_rule(opcode))
2361 return false;
2362
2363 bool ret_value = true;
2364 switch (opcode) {
2365 case Op_CacheWB:
2366 case Op_CacheWBPreSync:
2367 case Op_CacheWBPostSync:
2368 if (!VM_Version::supports_data_cache_line_flush()) {
2369 ret_value = false;
2370 }
2371 break;
2372 }
2373
2374 return ret_value; // Per default match rules are supported.
2375 }
2376
2377 // Identify extra cases that we might want to provide match rules for vector nodes and
2378 // other intrinsics guarded with vector length (vlen) and element type (bt).
2379 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
2380 if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
2381 return false;
2382 }
2383 int bit_size = vlen * type2aelembytes(bt) * 8;
2384 if (UseSVE == 0 && bit_size > 128) {
2385 return false;
2386 }
2387 if (UseSVE > 0) {
2388 return op_sve_supported(opcode);
2389 } else { // NEON
2390 // Special cases
2391 switch (opcode) {
2392 case Op_MulAddVS2VI:
2393 if (bit_size < 128) {
2394 return false;
2395 }
2396 break;
2397 case Op_MulVL:
2398 return false;
2399 default:
2400 break;
2401 }
2402 }
2403 return true; // Per default match rules are supported.
2404 }
2405
2406 const bool Matcher::has_predicated_vectors(void) {
2407 return UseSVE > 0;
2408 }
2409
2410 const int Matcher::float_pressure(int default_pressure_threshold) {
2411 return default_pressure_threshold;
2412 }
2413
2414 int Matcher::regnum_to_fpu_offset(int regnum)
2415 {
2416 Unimplemented();
2417 return 0;
2418 }
2419
2420 // Is this branch offset short enough that a short branch can be used?
2421 //
2422 // NOTE: If the platform does not provide any short branch variants, then
2423 // this method should return false for offset 0.
2424 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2425 // The passed offset is relative to address of the branch.
2426
2427 return (-32768 <= offset && offset < 32768);
2428 }
2429
2430 const bool Matcher::isSimpleConstant64(jlong value) {
2431 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2432 // Probably always true, even if a temp register is required.
2433 return true;
2434 }
2435
2436 // true just means we have fast l2f conversion
2437 const bool Matcher::convL2FSupported(void) {
2438 return true;
2439 }
2440
2441 // Vector width in bytes.
2442 const int Matcher::vector_width_in_bytes(BasicType bt) {
2443 // The MaxVectorSize should have been set by detecting SVE max vector register size.
2444 int size = MIN2((UseSVE > 0) ? 256 : 16, (int)MaxVectorSize);
2445 // Minimum 2 values in vector
2446 if (size < 2*type2aelembytes(bt)) size = 0;
2447 // But never < 4
2448 if (size < 4) size = 0;
2449 return size;
2450 }
2451
2452 // Limits on vector size (number of elements) loaded into vector.
2453 const int Matcher::max_vector_size(const BasicType bt) {
2454 return vector_width_in_bytes(bt)/type2aelembytes(bt);
2455 }
2456 const int Matcher::min_vector_size(const BasicType bt) {
2457 int max_size = max_vector_size(bt);
2458 if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
2459 // Currently vector length less than SVE vector register size is not supported.
2460 return max_size;
2461 } else {
2462 // For the moment limit the vector size to 8 bytes with NEON.
2463 int size = 8 / type2aelembytes(bt);
2464 if (size < 2) size = 2;
3731 address call;
3732 if (!_method) {
3733 // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3734 call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3735 } else {
3736 int method_index = resolved_method_index(cbuf);
3737 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3738 : static_call_Relocation::spec(method_index);
3739 call = __ trampoline_call(Address(addr, rspec), &cbuf);
3740
3741 // Emit stub for static call
3742 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3743 if (stub == NULL) {
3744 ciEnv::current()->record_failure("CodeCache is full");
3745 return;
3746 }
3747 }
3748 if (call == NULL) {
3749 ciEnv::current()->record_failure("CodeCache is full");
3750 return;
3751 } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
3752 // Only non uncommon_trap calls need to reinitialize ptrue.
3753 if (uncommon_trap_request() == 0) {
3754 __ reinitialize_ptrue();
3755 }
3756 }
3757 %}
3758
3759 enc_class aarch64_enc_java_dynamic_call(method meth) %{
3760 C2_MacroAssembler _masm(&cbuf);
3761 int method_index = resolved_method_index(cbuf);
3762 address call = __ ic_call((address)$meth$$method, method_index);
3763 if (call == NULL) {
3764 ciEnv::current()->record_failure("CodeCache is full");
3765 return;
3766 } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
3767 __ reinitialize_ptrue();
3768 }
3769 %}
3770
3771 enc_class aarch64_enc_call_epilog() %{
3772 C2_MacroAssembler _masm(&cbuf);
3773 if (VerifyStackAtCalls) {
3774 // Check that stack depth is unchanged: find majik cookie on stack
3775 __ call_Unimplemented();
3776 }
3777 %}
3778
3779 enc_class aarch64_enc_java_to_runtime(method meth) %{
3780 C2_MacroAssembler _masm(&cbuf);
3781
3782 // some calls to generated routines (arraycopy code) are scheduled
3783 // by C2 as runtime calls. if so we can call them using a br (they
3784 // will be in a reachable segment) otherwise we have to use a blr
3785 // which loads the absolute address into a register.
3786 address entry = (address)$meth$$method;
3787 CodeBlob *cb = CodeCache::find_blob(entry);
3788 if (cb) {
3789 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3790 if (call == NULL) {
3791 ciEnv::current()->record_failure("CodeCache is full");
3792 return;
3793 }
3794 } else {
3795 Label retaddr;
3796 __ adr(rscratch2, retaddr);
3797 __ lea(rscratch1, RuntimeAddress(entry));
3798 // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3799 __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3800 __ blr(rscratch1);
3801 __ bind(retaddr);
3802 __ add(sp, sp, 2 * wordSize);
3803 }
3804 if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
3805 __ reinitialize_ptrue();
3806 }
3807 %}
3808
3809 enc_class aarch64_enc_rethrow() %{
3810 C2_MacroAssembler _masm(&cbuf);
3811 __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3812 %}
3813
3814 enc_class aarch64_enc_ret() %{
3815 C2_MacroAssembler _masm(&cbuf);
3816 #ifdef ASSERT
3817 if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
3818 __ verify_ptrue();
3819 }
3820 #endif
3821 __ ret(lr);
3822 %}
3823
3824 enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3825 C2_MacroAssembler _masm(&cbuf);
3826 Register target_reg = as_Register($jump_target$$reg);
3827 __ br(target_reg);
3828 %}
3829
3830 enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3831 C2_MacroAssembler _masm(&cbuf);
3832 Register target_reg = as_Register($jump_target$$reg);
3833 // exception oop should be in r0
3834 // ret addr has been popped into lr
3835 // callee expects it in r3
3836 __ mov(r3, lr);
3837 __ br(target_reg);
3838 %}
3839
3840 enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4564 operand immLoffset8()
4565 %{
4566 predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4567 match(ConL);
4568
4569 op_cost(0);
4570 format %{ %}
4571 interface(CONST_INTER);
4572 %}
4573
4574 operand immLoffset16()
4575 %{
4576 predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4577 match(ConL);
4578
4579 op_cost(0);
4580 format %{ %}
4581 interface(CONST_INTER);
4582 %}
4583
4584 // 8 bit signed value.
4585 operand immI8()
4586 %{
4587 predicate(n->get_int() <= 127 && n->get_int() >= -128);
4588 match(ConI);
4589
4590 op_cost(0);
4591 format %{ %}
4592 interface(CONST_INTER);
4593 %}
4594
4595 // 8 bit signed value (simm8), or #simm8 LSL 8.
4596 operand immI8_shift8()
4597 %{
4598 predicate((n->get_int() <= 127 && n->get_int() >= -128) ||
4599 (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0));
4600 match(ConI);
4601
4602 op_cost(0);
4603 format %{ %}
4604 interface(CONST_INTER);
4605 %}
4606
4607 // 8 bit signed value (simm8), or #simm8 LSL 8.
4608 operand immL8_shift8()
4609 %{
4610 predicate((n->get_long() <= 127 && n->get_long() >= -128) ||
4611 (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0));
4612 match(ConL);
4613
4614 op_cost(0);
4615 format %{ %}
4616 interface(CONST_INTER);
4617 %}
4618
4619 // 32 bit integer valid for add sub immediate
4620 operand immIAddSub()
4621 %{
4622 predicate(Assembler::operand_valid_for_add_sub_immediate((int64_t)n->get_int()));
4623 match(ConI);
4624 op_cost(0);
4625 format %{ %}
4626 interface(CONST_INTER);
4627 %}
4628
4629 // 32 bit unsigned integer valid for logical immediate
4630 // TODO -- check this is right when e.g the mask is 0x80000000
4631 operand immILog()
4632 %{
4633 predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (uint64_t)n->get_int()));
4634 match(ConI);
4635
4636 op_cost(0);
4637 format %{ %}
4638 interface(CONST_INTER);
16467 ins_cost(4 * INSN_COST);
16468 format %{ "ldrs $dst,$mem\t# vector (32 bits)" %}
16469 ins_encode( aarch64_enc_ldrvS(dst, mem) );
16470 ins_pipe(vload_reg_mem64);
16471 %}
16472
16473 // Load vector (64 bits)
16474 instruct loadV8(vecD dst, vmem8 mem)
16475 %{
16476 predicate(n->as_LoadVector()->memory_size() == 8);
16477 match(Set dst (LoadVector mem));
16478 ins_cost(4 * INSN_COST);
16479 format %{ "ldrd $dst,$mem\t# vector (64 bits)" %}
16480 ins_encode( aarch64_enc_ldrvD(dst, mem) );
16481 ins_pipe(vload_reg_mem64);
16482 %}
16483
16484 // Load Vector (128 bits)
16485 instruct loadV16(vecX dst, vmem16 mem)
16486 %{
16487 predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16);
16488 match(Set dst (LoadVector mem));
16489 ins_cost(4 * INSN_COST);
16490 format %{ "ldrq $dst,$mem\t# vector (128 bits)" %}
16491 ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16492 ins_pipe(vload_reg_mem128);
16493 %}
16494
16495 // Store Vector (32 bits)
16496 instruct storeV4(vecD src, vmem4 mem)
16497 %{
16498 predicate(n->as_StoreVector()->memory_size() == 4);
16499 match(Set mem (StoreVector mem src));
16500 ins_cost(4 * INSN_COST);
16501 format %{ "strs $mem,$src\t# vector (32 bits)" %}
16502 ins_encode( aarch64_enc_strvS(src, mem) );
16503 ins_pipe(vstore_reg_mem64);
16504 %}
16505
16506 // Store Vector (64 bits)
16507 instruct storeV8(vecD src, vmem8 mem)
16523 format %{ "strq $mem,$src\t# vector (128 bits)" %}
16524 ins_encode( aarch64_enc_strvQ(src, mem) );
16525 ins_pipe(vstore_reg_mem128);
16526 %}
16527
16528 instruct replicate8B(vecD dst, iRegIorL2I src)
16529 %{
16530 predicate(n->as_Vector()->length() == 4 ||
16531 n->as_Vector()->length() == 8);
16532 match(Set dst (ReplicateB src));
16533 ins_cost(INSN_COST);
16534 format %{ "dup $dst, $src\t# vector (8B)" %}
16535 ins_encode %{
16536 __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16537 %}
16538 ins_pipe(vdup_reg_reg64);
16539 %}
16540
16541 instruct replicate16B(vecX dst, iRegIorL2I src)
16542 %{
16543 predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
16544 match(Set dst (ReplicateB src));
16545 ins_cost(INSN_COST);
16546 format %{ "dup $dst, $src\t# vector (16B)" %}
16547 ins_encode %{
16548 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16549 %}
16550 ins_pipe(vdup_reg_reg128);
16551 %}
16552
16553 instruct replicate8B_imm(vecD dst, immI con)
16554 %{
16555 predicate(n->as_Vector()->length() == 4 ||
16556 n->as_Vector()->length() == 8);
16557 match(Set dst (ReplicateB con));
16558 ins_cost(INSN_COST);
16559 format %{ "movi $dst, $con\t# vector(8B)" %}
16560 ins_encode %{
16561 __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16562 %}
16563 ins_pipe(vmovi_reg_imm64);
16564 %}
16565
16566 instruct replicate16B_imm(vecX dst, immI con)
16567 %{
16568 predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
16569 match(Set dst (ReplicateB con));
16570 ins_cost(INSN_COST);
16571 format %{ "movi $dst, $con\t# vector(16B)" %}
16572 ins_encode %{
16573 __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16574 %}
16575 ins_pipe(vmovi_reg_imm128);
16576 %}
16577
16578 instruct replicate4S(vecD dst, iRegIorL2I src)
16579 %{
16580 predicate(n->as_Vector()->length() == 2 ||
16581 n->as_Vector()->length() == 4);
16582 match(Set dst (ReplicateS src));
16583 ins_cost(INSN_COST);
16584 format %{ "dup $dst, $src\t# vector (4S)" %}
16585 ins_encode %{
16586 __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16587 %}
16588 ins_pipe(vdup_reg_reg64);
16589 %}
16590
16591 instruct replicate8S(vecX dst, iRegIorL2I src)
16592 %{
16593 predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
16594 match(Set dst (ReplicateS src));
16595 ins_cost(INSN_COST);
16596 format %{ "dup $dst, $src\t# vector (8S)" %}
16597 ins_encode %{
16598 __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16599 %}
16600 ins_pipe(vdup_reg_reg128);
16601 %}
16602
16603 instruct replicate4S_imm(vecD dst, immI con)
16604 %{
16605 predicate(n->as_Vector()->length() == 2 ||
16606 n->as_Vector()->length() == 4);
16607 match(Set dst (ReplicateS con));
16608 ins_cost(INSN_COST);
16609 format %{ "movi $dst, $con\t# vector(4H)" %}
16610 ins_encode %{
16611 __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16612 %}
16613 ins_pipe(vmovi_reg_imm64);
16614 %}
16615
16616 instruct replicate8S_imm(vecX dst, immI con)
16617 %{
16618 predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
16619 match(Set dst (ReplicateS con));
16620 ins_cost(INSN_COST);
16621 format %{ "movi $dst, $con\t# vector(8H)" %}
16622 ins_encode %{
16623 __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16624 %}
16625 ins_pipe(vmovi_reg_imm128);
16626 %}
16627
16628 instruct replicate2I(vecD dst, iRegIorL2I src)
16629 %{
16630 predicate(n->as_Vector()->length() == 2);
16631 match(Set dst (ReplicateI src));
16632 ins_cost(INSN_COST);
16633 format %{ "dup $dst, $src\t# vector (2I)" %}
16634 ins_encode %{
16635 __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16636 %}
16637 ins_pipe(vdup_reg_reg64);
16638 %}
16639
16640 instruct replicate4I(vecX dst, iRegIorL2I src)
16641 %{
16642 predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
16643 match(Set dst (ReplicateI src));
16644 ins_cost(INSN_COST);
16645 format %{ "dup $dst, $src\t# vector (4I)" %}
16646 ins_encode %{
16647 __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16648 %}
16649 ins_pipe(vdup_reg_reg128);
16650 %}
16651
16652 instruct replicate2I_imm(vecD dst, immI con)
16653 %{
16654 predicate(n->as_Vector()->length() == 2);
16655 match(Set dst (ReplicateI con));
16656 ins_cost(INSN_COST);
16657 format %{ "movi $dst, $con\t# vector(2I)" %}
16658 ins_encode %{
16659 __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16660 %}
16661 ins_pipe(vmovi_reg_imm64);
16662 %}
16663
16664 instruct replicate4I_imm(vecX dst, immI con)
16665 %{
16666 predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
16667 match(Set dst (ReplicateI con));
16668 ins_cost(INSN_COST);
16669 format %{ "movi $dst, $con\t# vector(4I)" %}
16670 ins_encode %{
16671 __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16672 %}
16673 ins_pipe(vmovi_reg_imm128);
16674 %}
16675
16676 instruct replicate2L(vecX dst, iRegL src)
16677 %{
16678 predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
16679 match(Set dst (ReplicateL src));
16680 ins_cost(INSN_COST);
16681 format %{ "dup $dst, $src\t# vector (2L)" %}
16682 ins_encode %{
16683 __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16684 %}
16685 ins_pipe(vdup_reg_reg128);
16686 %}
16687
16688 instruct replicate2L_zero(vecX dst, immI0 zero)
16689 %{
16690 predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
16691 match(Set dst (ReplicateI zero));
16692 ins_cost(INSN_COST);
16693 format %{ "movi $dst, $zero\t# vector(4I)" %}
16694 ins_encode %{
16695 __ eor(as_FloatRegister($dst$$reg), __ T16B,
16696 as_FloatRegister($dst$$reg),
16697 as_FloatRegister($dst$$reg));
16698 %}
16699 ins_pipe(vmovi_reg_imm128);
16700 %}
16701
16702 instruct replicate2F(vecD dst, vRegF src)
16703 %{
16704 predicate(n->as_Vector()->length() == 2);
16705 match(Set dst (ReplicateF src));
16706 ins_cost(INSN_COST);
16707 format %{ "dup $dst, $src\t# vector (2F)" %}
16708 ins_encode %{
16709 __ dup(as_FloatRegister($dst$$reg), __ T2S,
16710 as_FloatRegister($src$$reg));
16711 %}
16712 ins_pipe(vdup_reg_freg64);
16713 %}
16714
16715 instruct replicate4F(vecX dst, vRegF src)
16716 %{
16717 predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
16718 match(Set dst (ReplicateF src));
16719 ins_cost(INSN_COST);
16720 format %{ "dup $dst, $src\t# vector (4F)" %}
16721 ins_encode %{
16722 __ dup(as_FloatRegister($dst$$reg), __ T4S,
16723 as_FloatRegister($src$$reg));
16724 %}
16725 ins_pipe(vdup_reg_freg128);
16726 %}
16727
16728 instruct replicate2D(vecX dst, vRegD src)
16729 %{
16730 predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
16731 match(Set dst (ReplicateD src));
16732 ins_cost(INSN_COST);
16733 format %{ "dup $dst, $src\t# vector (2D)" %}
16734 ins_encode %{
16735 __ dup(as_FloatRegister($dst$$reg), __ T2D,
16736 as_FloatRegister($src$$reg));
16737 %}
16738 ins_pipe(vdup_reg_dreg128);
16739 %}
16740
16741 // ====================REDUCTION ARITHMETIC====================================
16742
16743 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
16744 %{
16745 match(Set dst (AddReductionVI isrc vsrc));
16746 ins_cost(INSN_COST);
16747 effect(TEMP tmp, TEMP tmp2);
16748 format %{ "umov $tmp, $vsrc, S, 0\n\t"
16749 "umov $tmp2, $vsrc, S, 1\n\t"
16750 "addw $tmp, $isrc, $tmp\n\t"
|