< prev index next >

src/cpu/aarch64/vm/aarch64.ad

Print this page
rev 8573 : 8086087: aarch64: add support for 64 bit vectors
Summary: Support 64 bit vectors
Reviewed-by: duke


 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 




































 868 // Class for all 128bit vector registers
 869 reg_class vectorx_reg(
 870     V0, V0_H, V0_J, V0_K,
 871     V1, V1_H, V1_J, V1_K,
 872     V2, V2_H, V2_J, V2_K,
 873     V3, V3_H, V3_J, V3_K,
 874     V4, V4_H, V4_J, V4_K,
 875     V5, V5_H, V5_J, V5_K,
 876     V6, V6_H, V6_J, V6_K,
 877     V7, V7_H, V7_J, V7_K,
 878     V8, V8_H, V8_J, V8_K,
 879     V9, V9_H, V9_J, V9_K,
 880     V10, V10_H, V10_J, V10_K,
 881     V11, V11_H, V11_J, V11_K,
 882     V12, V12_H, V12_J, V12_K,
 883     V13, V13_H, V13_J, V13_K,
 884     V14, V14_H, V14_J, V14_K,
 885     V15, V15_H, V15_J, V15_K,
 886     V16, V16_H, V16_J, V16_K,
 887     V17, V17_H, V17_J, V17_K,


2116 
2117   enum RC src_hi_rc = rc_class(src_hi);
2118   enum RC src_lo_rc = rc_class(src_lo);
2119   enum RC dst_hi_rc = rc_class(dst_hi);
2120   enum RC dst_lo_rc = rc_class(dst_lo);
2121 
2122   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
2123 
2124   if (src_hi != OptoReg::Bad) {
2125     assert((src_lo&1)==0 && src_lo+1==src_hi &&
2126            (dst_lo&1)==0 && dst_lo+1==dst_hi,
2127            "expected aligned-adjacent pairs");
2128   }
2129 
2130   if (src_lo == dst_lo && src_hi == dst_hi) {
2131     return 0;            // Self copy, no move.
2132   }
2133 
2134   if (bottom_type()->isa_vect() != NULL) {
2135     uint len = 4;


2136     if (cbuf) {
2137       MacroAssembler _masm(cbuf);
2138       uint ireg = ideal_reg();
2139       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
2140       assert(ireg == Op_VecX, "sanity");
2141       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2142         // stack->stack
2143         int src_offset = ra_->reg2offset(src_lo);
2144         int dst_offset = ra_->reg2offset(dst_lo);
2145         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
2146         len = 8;




2147         if (src_offset < 512) {
2148           __ ldp(rscratch1, rscratch2, Address(sp, src_offset));
2149         } else {
2150           __ ldr(rscratch1, Address(sp, src_offset));
2151           __ ldr(rscratch2, Address(sp, src_offset+4));
2152           len += 4;
2153         }
2154         if (dst_offset < 512) {
2155           __ stp(rscratch1, rscratch2, Address(sp, dst_offset));
2156         } else {
2157           __ str(rscratch1, Address(sp, dst_offset));
2158           __ str(rscratch2, Address(sp, dst_offset+4));
2159           len += 4;
2160         }

2161       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2162         __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ T16B,

2163                as_FloatRegister(Matcher::_regEncode[src_lo]),
2164                as_FloatRegister(Matcher::_regEncode[src_lo]));
2165       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2166         __ str(as_FloatRegister(Matcher::_regEncode[src_lo]), __ Q,

2167                Address(sp, ra_->reg2offset(dst_lo)));
2168       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2169         __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ Q,

2170                Address(sp, ra_->reg2offset(src_lo)));
2171       } else {
2172         ShouldNotReachHere();
2173       }
2174     } else if (st) {
2175       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2176         // stack->stack
2177         int src_offset = ra_->reg2offset(src_lo);
2178         int dst_offset = ra_->reg2offset(dst_lo);




2179         if (src_offset < 512) {
2180           st->print("ldp  rscratch1, rscratch2, [sp, #%d]", src_offset);
2181         } else {
2182           st->print("ldr  rscratch1, [sp, #%d]", src_offset);
2183           st->print("\nldr  rscratch2, [sp, #%d]", src_offset+4);
2184         }
2185         if (dst_offset < 512) {
2186           st->print("\nstp  rscratch1, rscratch2, [sp, #%d]", dst_offset);
2187         } else {
2188           st->print("\nstr  rscratch1, [sp, #%d]", dst_offset);
2189           st->print("\nstr  rscratch2, [sp, #%d]", dst_offset+4);
2190         }

2191         st->print("\t# vector spill, stack to stack");
2192       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2193         st->print("mov  %s, %s\t# vector spill, reg to reg",
2194                    Matcher::regName[dst_lo], Matcher::regName[src_lo]);
2195       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2196         st->print("str  %s, [sp, #%d]\t# vector spill, reg to stack",
2197                    Matcher::regName[src_lo], ra_->reg2offset(dst_lo));
2198       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2199         st->print("ldr  %s, [sp, #%d]\t# vector spill, stack to reg",
2200                    Matcher::regName[dst_lo], ra_->reg2offset(src_lo));
2201       }
2202     }
2203     return len;
2204   }
2205 
2206   switch (src_lo_rc) {
2207   case rc_int:
2208     if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
2209       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2210           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {


2621 // true just means we have fast l2f conversion
2622 const bool Matcher::convL2FSupported(void) {
2623   return true;
2624 }
2625 
2626 // Vector width in bytes.
2627 const int Matcher::vector_width_in_bytes(BasicType bt) {
2628   int size = MIN2(16,(int)MaxVectorSize);
2629   // Minimum 2 values in vector
2630   if (size < 2*type2aelembytes(bt)) size = 0;
2631   // But never < 4
2632   if (size < 4) size = 0;
2633   return size;
2634 }
2635 
2636 // Limits on vector size (number of elements) loaded into vector.
2637 const int Matcher::max_vector_size(const BasicType bt) {
2638   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2639 }
2640 const int Matcher::min_vector_size(const BasicType bt) {
2641   //return (type2aelembytes(bt) == 1) ? 4 : 2;
2642   // For the moment, only support 1 vector size, 128 bits
2643   return max_vector_size(bt);
2644 }
2645 
2646 // Vector ideal reg.
2647 const int Matcher::vector_ideal_reg(int len) {
2648   return Op_VecX;






2649 }
2650 
2651 // Only lowest bits of xmm reg are used for vector shift count.
2652 const int Matcher::vector_shift_count_ideal_reg(int size) {
2653   return Op_VecX;
2654 }
2655 
2656 // AES support not yet implemented
2657 const bool Matcher::pass_original_key_for_aes() {
2658   return false;
2659 }
2660 
2661 // x86 supports misaligned vectors store/load.
2662 const bool Matcher::misaligned_vectors_ok() {
2663   // TODO fixme
2664   // return !AlignVector; // can be changed by flag
2665   return false;
2666 }
2667 
2668 // false => size gets scaled to BytesPerLong, ok.
2669 const bool Matcher::init_array_count_is_in_bytes = false;
2670 
2671 // Threshold size for cleararray.
2672 const int Matcher::init_array_short_size = 18 * BytesPerLong;
2673 
2674 // Use conditional move (CMOVL)
2675 const int Matcher::long_cmove_cost() {
2676   // long cmoves are no more expensive than int cmoves
2677   return 0;
2678 }
2679 
2680 const int Matcher::float_cmove_cost() {
2681   // float cmoves are no more expensive than int cmoves
2682   return 0;
2683 }
2684 
2685 // Does the CPU require late expand (see block.cpp for description of late expand)?


3056   %}
3057 
3058   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3059     Register dst_reg = as_Register($dst$$reg);
3060     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3061                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3062   %}
3063 
3064   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3065     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3066     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3067                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3068   %}
3069 
3070   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3071     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3072     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3073                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3074   %}
3075 
3076   enc_class aarch64_enc_ldrvS(vecX dst, memory mem) %{
3077     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3078     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3079        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3080   %}
3081 
3082   enc_class aarch64_enc_ldrvD(vecX dst, memory mem) %{
3083     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3084     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3085        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3086   %}
3087 
3088   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3089     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3090     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3091        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3092   %}
3093 
3094   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3095     Register src_reg = as_Register($src$$reg);
3096     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3097                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3098   %}
3099 
3100   enc_class aarch64_enc_strb0(memory mem) %{
3101     MacroAssembler _masm(&cbuf);
3102     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),


3142   %}
3143 
3144   enc_class aarch64_enc_str0(memory mem) %{
3145     MacroAssembler _masm(&cbuf);
3146     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
3147                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3148   %}
3149 
3150   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
3151     FloatRegister src_reg = as_FloatRegister($src$$reg);
3152     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
3153                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3154   %}
3155 
3156   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
3157     FloatRegister src_reg = as_FloatRegister($src$$reg);
3158     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
3159                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3160   %}
3161 
3162   enc_class aarch64_enc_strvS(vecX src, memory mem) %{
3163     FloatRegister src_reg = as_FloatRegister($src$$reg);
3164     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
3165        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3166   %}
3167 
3168   enc_class aarch64_enc_strvD(vecX src, memory mem) %{
3169     FloatRegister src_reg = as_FloatRegister($src$$reg);
3170     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
3171        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3172   %}
3173 
3174   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
3175     FloatRegister src_reg = as_FloatRegister($src$$reg);
3176     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
3177        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3178   %}
3179 
3180   // END Non-volatile memory access
3181 
3182   // volatile loads and stores
3183 
3184   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
3185     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3186                  rscratch1, stlrb);
3187   %}
3188 


5170   constraint(ALLOC_IN_RC(float_reg));
5171   match(RegF);
5172 
5173   op_cost(0);
5174   format %{ %}
5175   interface(REG_INTER);
5176 %}
5177 
5178 // Double Register
5179 // Double register operands
5180 operand vRegD()
5181 %{
5182   constraint(ALLOC_IN_RC(double_reg));
5183   match(RegD);
5184 
5185   op_cost(0);
5186   format %{ %}
5187   interface(REG_INTER);
5188 %}
5189 










5190 operand vecX()
5191 %{
5192   constraint(ALLOC_IN_RC(vectorx_reg));
5193   match(VecX);
5194 
5195   op_cost(0);
5196   format %{ %}
5197   interface(REG_INTER);
5198 %}
5199 
5200 operand vRegD_V0()
5201 %{
5202   constraint(ALLOC_IN_RC(v0_reg));
5203   match(RegD);
5204   op_cost(0);
5205   format %{ %}
5206   interface(REG_INTER);
5207 %}
5208 
5209 operand vRegD_V1()


13177 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13178 // for this guy.
13179 instruct tlsLoadP(thread_RegP dst)
13180 %{
13181   match(Set dst (ThreadLocal));
13182 
13183   ins_cost(0);
13184 
13185   format %{ " -- \t// $dst=Thread::current(), empty" %}
13186 
13187   size(0);
13188 
13189   ins_encode( /*empty*/ );
13190 
13191   ins_pipe(pipe_class_empty);
13192 %}
13193 
13194 // ====================VECTOR INSTRUCTIONS=====================================
13195 
13196 // Load vector (32 bits)
13197 instruct loadV4(vecX dst, vmem mem)
13198 %{
13199   predicate(n->as_LoadVector()->memory_size() == 4);
13200   match(Set dst (LoadVector mem));
13201   ins_cost(4 * INSN_COST);
13202   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
13203   ins_encode( aarch64_enc_ldrvS(dst, mem) );
13204   ins_pipe(pipe_class_memory);
13205 %}
13206 
13207 // Load vector (64 bits)
13208 instruct loadV8(vecX dst, vmem mem)
13209 %{
13210   predicate(n->as_LoadVector()->memory_size() == 8);
13211   match(Set dst (LoadVector mem));
13212   ins_cost(4 * INSN_COST);
13213   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
13214   ins_encode( aarch64_enc_ldrvD(dst, mem) );
13215   ins_pipe(pipe_class_memory);
13216 %}
13217 
13218 // Load Vector (128 bits)
13219 instruct loadV16(vecX dst, vmem mem)
13220 %{
13221   predicate(n->as_LoadVector()->memory_size() == 16);
13222   match(Set dst (LoadVector mem));
13223   ins_cost(4 * INSN_COST);
13224   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
13225   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
13226   ins_pipe(pipe_class_memory);
13227 %}
13228 
13229 // Store Vector (32 bits)
13230 instruct storeV4(vecX src, vmem mem)
13231 %{
13232   predicate(n->as_StoreVector()->memory_size() == 4);
13233   match(Set mem (StoreVector mem src));
13234   ins_cost(4 * INSN_COST);
13235   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
13236   ins_encode( aarch64_enc_strvS(src, mem) );
13237   ins_pipe(pipe_class_memory);
13238 %}
13239 
13240 // Store Vector (64 bits)
13241 instruct storeV8(vecX src, vmem mem)
13242 %{
13243   predicate(n->as_StoreVector()->memory_size() == 8);
13244   match(Set mem (StoreVector mem src));
13245   ins_cost(4 * INSN_COST);
13246   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
13247   ins_encode( aarch64_enc_strvD(src, mem) );
13248   ins_pipe(pipe_class_memory);
13249 %}
13250 
13251 // Store Vector (128 bits)
13252 instruct storeV16(vecX src, vmem mem)
13253 %{
13254   predicate(n->as_StoreVector()->memory_size() == 16);
13255   match(Set mem (StoreVector mem src));
13256   ins_cost(4 * INSN_COST);
13257   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
13258   ins_encode( aarch64_enc_strvQ(src, mem) );
13259   ins_pipe(pipe_class_memory);
13260 %}
13261 













13262 instruct replicate16B(vecX dst, iRegIorL2I src)
13263 %{

13264   match(Set dst (ReplicateB src));
13265   ins_cost(INSN_COST);
13266   format %{ "dup  $dst, $src\t# vector (16B)" %}
13267   ins_encode %{
13268     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
13269   %}
13270   ins_pipe(pipe_class_default);
13271 %}
13272 













13273 instruct replicate16B_imm(vecX dst, immI con)
13274 %{

13275   match(Set dst (ReplicateB con));
13276   ins_cost(INSN_COST);
13277   format %{ "movi  $dst, $con\t# vector(16B)" %}
13278   ins_encode %{
13279     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant);
13280   %}
13281   ins_pipe(pipe_class_default);
13282 %}
13283 













13284 instruct replicate8S(vecX dst, iRegIorL2I src)
13285 %{

13286   match(Set dst (ReplicateS src));
13287   ins_cost(INSN_COST);
13288   format %{ "dup  $dst, $src\t# vector (8S)" %}
13289   ins_encode %{
13290     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
13291   %}
13292   ins_pipe(pipe_class_default);
13293 %}
13294 













13295 instruct replicate8S_imm(vecX dst, immI con)
13296 %{

13297   match(Set dst (ReplicateS con));
13298   ins_cost(INSN_COST);
13299   format %{ "movi  $dst, $con\t# vector(8H)" %}
13300   ins_encode %{
13301     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant);
13302   %}
13303   ins_pipe(pipe_class_default);
13304 %}
13305 












13306 instruct replicate4I(vecX dst, iRegIorL2I src)
13307 %{

13308   match(Set dst (ReplicateI src));
13309   ins_cost(INSN_COST);
13310   format %{ "dup  $dst, $src\t# vector (4I)" %}
13311   ins_encode %{
13312     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
13313   %}
13314   ins_pipe(pipe_class_default);
13315 %}
13316 












13317 instruct replicate4I_imm(vecX dst, immI con)
13318 %{

13319   match(Set dst (ReplicateI con));
13320   ins_cost(INSN_COST);
13321   format %{ "movi  $dst, $con\t# vector(4I)" %}
13322   ins_encode %{
13323     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
13324   %}
13325   ins_pipe(pipe_class_default);
13326 %}
13327 
13328 instruct replicate2L(vecX dst, iRegL src)
13329 %{

13330   match(Set dst (ReplicateL src));
13331   ins_cost(INSN_COST);
13332   format %{ "dup  $dst, $src\t# vector (2L)" %}
13333   ins_encode %{
13334     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
13335   %}
13336   ins_pipe(pipe_class_default);
13337 %}
13338 
13339 instruct replicate2L_zero(vecX dst, immI0 zero)
13340 %{

13341   match(Set dst (ReplicateI zero));
13342   ins_cost(INSN_COST);
13343   format %{ "movi  $dst, $zero\t# vector(4I)" %}
13344   ins_encode %{
13345     __ eor(as_FloatRegister($dst$$reg), __ T16B,
13346            as_FloatRegister($dst$$reg),
13347            as_FloatRegister($dst$$reg));
13348   %}
13349   ins_pipe(pipe_class_default);
13350 %}
13351 













13352 instruct replicate4F(vecX dst, vRegF src)
13353 %{

13354   match(Set dst (ReplicateF src));
13355   ins_cost(INSN_COST);
13356   format %{ "dup  $dst, $src\t# vector (4F)" %}
13357   ins_encode %{
13358     __ dup(as_FloatRegister($dst$$reg), __ T4S,
13359            as_FloatRegister($src$$reg));
13360   %}
13361   ins_pipe(pipe_class_default);
13362 %}
13363 
13364 instruct replicate2D(vecX dst, vRegD src)
13365 %{

13366   match(Set dst (ReplicateD src));
13367   ins_cost(INSN_COST);
13368   format %{ "dup  $dst, $src\t# vector (2D)" %}
13369   ins_encode %{
13370     __ dup(as_FloatRegister($dst$$reg), __ T2D,
13371            as_FloatRegister($src$$reg));
13372   %}
13373   ins_pipe(pipe_class_default);
13374 %}
13375 
13376 // ====================REDUCTION ARITHMETIC====================================
13377 



















13378 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13379 %{
13380   match(Set dst (AddReductionVI src1 src2));
13381   ins_cost(INSN_COST);
13382   effect(TEMP tmp, TEMP tmp2);
13383   format %{ "addv  $tmp, T4S, $src2\n\t"
13384             "umov  $tmp2, $tmp, S, 0\n\t"
13385             "addw  $dst, $tmp2, $src1\t add reduction4i"
13386   %}
13387   ins_encode %{
13388     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
13389             as_FloatRegister($src2$$reg));
13390     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13391     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
13392   %}
13393   ins_pipe(pipe_class_default);
13394 %}
13395 



















13396 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13397 %{
13398   match(Set dst (MulReductionVI src1 src2));
13399   ins_cost(INSN_COST);
13400   effect(TEMP tmp, TEMP tmp2, TEMP dst);
13401   format %{ "ins   $tmp, $src2, 0, 1\n\t"
13402             "mul   $tmp, $tmp, $src2\n\t"
13403             "umov  $tmp2, $tmp, S, 0\n\t"
13404             "mul   $dst, $tmp2, $src1\n\t"
13405             "umov  $tmp2, $tmp, S, 1\n\t"
13406             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
13407   %}
13408   ins_encode %{
13409     __ ins(as_FloatRegister($tmp$$reg), __ D,
13410            as_FloatRegister($src2$$reg), 0, 1);
13411     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
13412            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
13413     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13414     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
13415     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
13416     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
13417   %}
13418   ins_pipe(pipe_class_default);
13419 %}
13420 




















13421 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13422 %{
13423   match(Set dst (AddReductionVF src1 src2));
13424   ins_cost(INSN_COST);
13425   effect(TEMP tmp, TEMP dst);
13426   format %{ "fadds $dst, $src1, $src2\n\t"
13427             "ins   $tmp, S, $src2, 0, 1\n\t"
13428             "fadds $dst, $dst, $tmp\n\t"
13429             "ins   $tmp, S, $src2, 0, 2\n\t"
13430             "fadds $dst, $dst, $tmp\n\t"
13431             "ins   $tmp, S, $src2, 0, 3\n\t"
13432             "fadds $dst, $dst, $tmp\t add reduction4f"
13433   %}
13434   ins_encode %{
13435     __ fadds(as_FloatRegister($dst$$reg),
13436              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13437     __ ins(as_FloatRegister($tmp$$reg), __ S,
13438            as_FloatRegister($src2$$reg), 0, 1);
13439     __ fadds(as_FloatRegister($dst$$reg),
13440              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13441     __ ins(as_FloatRegister($tmp$$reg), __ S,
13442            as_FloatRegister($src2$$reg), 0, 2);
13443     __ fadds(as_FloatRegister($dst$$reg),
13444              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13445     __ ins(as_FloatRegister($tmp$$reg), __ S,
13446            as_FloatRegister($src2$$reg), 0, 3);
13447     __ fadds(as_FloatRegister($dst$$reg),
13448              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13449   %}
13450   ins_pipe(pipe_class_default);
13451 %}
13452 




















13453 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13454 %{
13455   match(Set dst (MulReductionVF src1 src2));
13456   ins_cost(INSN_COST);
13457   effect(TEMP tmp, TEMP dst);
13458   format %{ "fmuls $dst, $src1, $src2\n\t"
13459             "ins   $tmp, S, $src2, 0, 1\n\t"
13460             "fmuls $dst, $dst, $tmp\n\t"
13461             "ins   $tmp, S, $src2, 0, 2\n\t"
13462             "fmuls $dst, $dst, $tmp\n\t"
13463             "ins   $tmp, S, $src2, 0, 3\n\t"
13464             "fmuls $dst, $dst, $tmp\t add reduction4f"
13465   %}
13466   ins_encode %{
13467     __ fmuls(as_FloatRegister($dst$$reg),
13468              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13469     __ ins(as_FloatRegister($tmp$$reg), __ S,
13470            as_FloatRegister($src2$$reg), 0, 1);
13471     __ fmuls(as_FloatRegister($dst$$reg),
13472              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));


13509   effect(TEMP tmp, TEMP dst);
13510   format %{ "fmuld $dst, $src1, $src2\n\t"
13511             "ins   $tmp, D, $src2, 0, 1\n\t"
13512             "fmuld $dst, $dst, $tmp\t add reduction2d"
13513   %}
13514   ins_encode %{
13515     __ fmuld(as_FloatRegister($dst$$reg),
13516              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13517     __ ins(as_FloatRegister($tmp$$reg), __ D,
13518            as_FloatRegister($src2$$reg), 0, 1);
13519     __ fmuld(as_FloatRegister($dst$$reg),
13520              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13521   %}
13522   ins_pipe(pipe_class_default);
13523 %}
13524 
13525 // ====================VECTOR ARITHMETIC=======================================
13526 
13527 // --------------------------------- ADD --------------------------------------
13528 















13529 instruct vadd16B(vecX dst, vecX src1, vecX src2)
13530 %{

13531   match(Set dst (AddVB src1 src2));
13532   ins_cost(INSN_COST);
13533   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
13534   ins_encode %{
13535     __ addv(as_FloatRegister($dst$$reg), __ T16B,
13536             as_FloatRegister($src1$$reg),
13537             as_FloatRegister($src2$$reg));
13538   %}
13539   ins_pipe(pipe_class_default);
13540 %}
13541 















13542 instruct vadd8S(vecX dst, vecX src1, vecX src2)
13543 %{

13544   match(Set dst (AddVS src1 src2));
13545   ins_cost(INSN_COST);
13546   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
13547   ins_encode %{
13548     __ addv(as_FloatRegister($dst$$reg), __ T8H,
13549             as_FloatRegister($src1$$reg),
13550             as_FloatRegister($src2$$reg));
13551   %}
13552   ins_pipe(pipe_class_default);
13553 %}
13554 














13555 instruct vadd4I(vecX dst, vecX src1, vecX src2)
13556 %{

13557   match(Set dst (AddVI src1 src2));
13558   ins_cost(INSN_COST);
13559   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
13560   ins_encode %{
13561     __ addv(as_FloatRegister($dst$$reg), __ T4S,
13562             as_FloatRegister($src1$$reg),
13563             as_FloatRegister($src2$$reg));
13564   %}
13565   ins_pipe(pipe_class_default);
13566 %}
13567 
13568 instruct vadd2L(vecX dst, vecX src1, vecX src2)
13569 %{

13570   match(Set dst (AddVL src1 src2));
13571   ins_cost(INSN_COST);
13572   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
13573   ins_encode %{
13574     __ addv(as_FloatRegister($dst$$reg), __ T2D,
13575             as_FloatRegister($src1$$reg),
13576             as_FloatRegister($src2$$reg));
13577   %}
13578   ins_pipe(pipe_class_default);
13579 %}
13580 














13581 instruct vadd4F(vecX dst, vecX src1, vecX src2)
13582 %{

13583   match(Set dst (AddVF src1 src2));
13584   ins_cost(INSN_COST);
13585   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
13586   ins_encode %{
13587     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
13588             as_FloatRegister($src1$$reg),
13589             as_FloatRegister($src2$$reg));
13590   %}
13591   ins_pipe(pipe_class_default);
13592 %}
13593 
13594 instruct vadd2D(vecX dst, vecX src1, vecX src2)
13595 %{
13596   match(Set dst (AddVD src1 src2));
13597   ins_cost(INSN_COST);
13598   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
13599   ins_encode %{
13600     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
13601             as_FloatRegister($src1$$reg),
13602             as_FloatRegister($src2$$reg));
13603   %}
13604   ins_pipe(pipe_class_default);
13605 %}
13606 
13607 // --------------------------------- SUB --------------------------------------
13608 















13609 instruct vsub16B(vecX dst, vecX src1, vecX src2)
13610 %{

13611   match(Set dst (SubVB src1 src2));
13612   ins_cost(INSN_COST);
13613   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
13614   ins_encode %{
13615     __ subv(as_FloatRegister($dst$$reg), __ T16B,
13616             as_FloatRegister($src1$$reg),
13617             as_FloatRegister($src2$$reg));
13618   %}
13619   ins_pipe(pipe_class_default);
13620 %}
13621 















13622 instruct vsub8S(vecX dst, vecX src1, vecX src2)
13623 %{

13624   match(Set dst (SubVS src1 src2));
13625   ins_cost(INSN_COST);
13626   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
13627   ins_encode %{
13628     __ subv(as_FloatRegister($dst$$reg), __ T8H,
13629             as_FloatRegister($src1$$reg),
13630             as_FloatRegister($src2$$reg));
13631   %}
13632   ins_pipe(pipe_class_default);
13633 %}
13634 














13635 instruct vsub4I(vecX dst, vecX src1, vecX src2)
13636 %{

13637   match(Set dst (SubVI src1 src2));
13638   ins_cost(INSN_COST);
13639   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
13640   ins_encode %{
13641     __ subv(as_FloatRegister($dst$$reg), __ T4S,
13642             as_FloatRegister($src1$$reg),
13643             as_FloatRegister($src2$$reg));
13644   %}
13645   ins_pipe(pipe_class_default);
13646 %}
13647 
13648 instruct vsub2L(vecX dst, vecX src1, vecX src2)
13649 %{

13650   match(Set dst (SubVL src1 src2));
13651   ins_cost(INSN_COST);
13652   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
13653   ins_encode %{
13654     __ subv(as_FloatRegister($dst$$reg), __ T2D,
13655             as_FloatRegister($src1$$reg),
13656             as_FloatRegister($src2$$reg));
13657   %}
13658   ins_pipe(pipe_class_default);
13659 %}
13660 














13661 instruct vsub4F(vecX dst, vecX src1, vecX src2)
13662 %{

13663   match(Set dst (SubVF src1 src2));
13664   ins_cost(INSN_COST);
13665   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
13666   ins_encode %{
13667     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
13668             as_FloatRegister($src1$$reg),
13669             as_FloatRegister($src2$$reg));
13670   %}
13671   ins_pipe(pipe_class_default);
13672 %}
13673 
13674 instruct vsub2D(vecX dst, vecX src1, vecX src2)
13675 %{

13676   match(Set dst (SubVD src1 src2));
13677   ins_cost(INSN_COST);
13678   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
13679   ins_encode %{
13680     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
13681             as_FloatRegister($src1$$reg),
13682             as_FloatRegister($src2$$reg));
13683   %}
13684   ins_pipe(pipe_class_default);
13685 %}
13686 
13687 // --------------------------------- MUL --------------------------------------
13688 















13689 instruct vmul8S(vecX dst, vecX src1, vecX src2)
13690 %{

13691   match(Set dst (MulVS src1 src2));
13692   ins_cost(INSN_COST);
13693   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
13694   ins_encode %{
13695     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
13696             as_FloatRegister($src1$$reg),
13697             as_FloatRegister($src2$$reg));
13698   %}
13699   ins_pipe(pipe_class_default);
13700 %}
13701 














13702 instruct vmul4I(vecX dst, vecX src1, vecX src2)
13703 %{

13704   match(Set dst (MulVI src1 src2));
13705   ins_cost(INSN_COST);
13706   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
13707   ins_encode %{
13708     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
13709             as_FloatRegister($src1$$reg),
13710             as_FloatRegister($src2$$reg));
13711   %}
13712   ins_pipe(pipe_class_default);
13713 %}
13714 














13715 instruct vmul4F(vecX dst, vecX src1, vecX src2)
13716 %{

13717   match(Set dst (MulVF src1 src2));
13718   ins_cost(INSN_COST);
13719   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
13720   ins_encode %{
13721     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
13722             as_FloatRegister($src1$$reg),
13723             as_FloatRegister($src2$$reg));
13724   %}
13725   ins_pipe(pipe_class_default);
13726 %}
13727 
13728 instruct vmul2D(vecX dst, vecX src1, vecX src2)
13729 %{

13730   match(Set dst (MulVD src1 src2));
13731   ins_cost(INSN_COST);
13732   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
13733   ins_encode %{
13734     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
13735             as_FloatRegister($src1$$reg),
13736             as_FloatRegister($src2$$reg));
13737   %}
13738   ins_pipe(pipe_class_default);
13739 %}
13740 
13741 // --------------------------------- DIV --------------------------------------
13742 














13743 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
13744 %{

13745   match(Set dst (DivVF src1 src2));
13746   ins_cost(INSN_COST);
13747   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
13748   ins_encode %{
13749     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
13750             as_FloatRegister($src1$$reg),
13751             as_FloatRegister($src2$$reg));
13752   %}
13753   ins_pipe(pipe_class_default);
13754 %}
13755 
13756 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
13757 %{

13758   match(Set dst (DivVD src1 src2));
13759   ins_cost(INSN_COST);
13760   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
13761   ins_encode %{
13762     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
13763             as_FloatRegister($src1$$reg),
13764             as_FloatRegister($src2$$reg));
13765   %}
13766   ins_pipe(pipe_class_default);
13767 %}
13768 
13769 // --------------------------------- AND --------------------------------------
13770 















13771 instruct vand16B(vecX dst, vecX src1, vecX src2)
13772 %{

13773   match(Set dst (AndV src1 src2));
13774   ins_cost(INSN_COST);
13775   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
13776   ins_encode %{
13777     __ andr(as_FloatRegister($dst$$reg), __ T16B,
13778             as_FloatRegister($src1$$reg),
13779             as_FloatRegister($src2$$reg));
13780   %}
13781   ins_pipe(pipe_class_default);
13782 %}
13783 
13784 // --------------------------------- OR ---------------------------------------
13785 















13786 instruct vor16B(vecX dst, vecX src1, vecX src2)
13787 %{

13788   match(Set dst (OrV src1 src2));
13789   ins_cost(INSN_COST);
13790   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
13791   ins_encode %{
13792     __ orr(as_FloatRegister($dst$$reg), __ T16B,
13793             as_FloatRegister($src1$$reg),
13794             as_FloatRegister($src2$$reg));
13795   %}
13796   ins_pipe(pipe_class_default);
13797 %}
13798 
13799 // --------------------------------- XOR --------------------------------------
13800 















13801 instruct vxor16B(vecX dst, vecX src1, vecX src2)
13802 %{

13803   match(Set dst (XorV src1 src2));
13804   ins_cost(INSN_COST);
13805   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
13806   ins_encode %{
13807     __ eor(as_FloatRegister($dst$$reg), __ T16B,
13808             as_FloatRegister($src1$$reg),
13809             as_FloatRegister($src2$$reg));
13810   %}
13811   ins_pipe(pipe_class_default);
13812 %}
13813 
13814 // ------------------------------ Shift ---------------------------------------
13815 
13816 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
13817   match(Set dst (LShiftCntV cnt));
13818   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
13819   ins_encode %{
13820     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
13821   %}
13822   ins_pipe(pipe_class_default);
13823 %}
13824 
13825 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
13826 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
13827   match(Set dst (RShiftCntV cnt));
13828   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
13829   ins_encode %{
13830     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
13831     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
13832   %}
13833   ins_pipe(pipe_class_default);
13834 %}
13835 















13836 instruct vsll16B(vecX dst, vecX src, vecX shift) %{

13837   match(Set dst (LShiftVB src shift));
13838   match(Set dst (RShiftVB src shift));
13839   ins_cost(INSN_COST);
13840   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
13841   ins_encode %{
13842     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
13843             as_FloatRegister($src$$reg),
13844             as_FloatRegister($shift$$reg));
13845   %}
13846   ins_pipe(pipe_class_default);
13847 %}
13848 














13849 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{

13850   match(Set dst (URShiftVB src shift));
13851   ins_cost(INSN_COST);
13852   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
13853   ins_encode %{
13854     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
13855             as_FloatRegister($src$$reg),
13856             as_FloatRegister($shift$$reg));
13857   %}
13858   ins_pipe(pipe_class_default);
13859 %}
13860 




















13861 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{

13862   match(Set dst (LShiftVB src shift));
13863   ins_cost(INSN_COST);
13864   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
13865   ins_encode %{
13866     int sh = (int)$shift$$constant & 31;
13867     if (sh >= 8) {
13868       __ eor(as_FloatRegister($dst$$reg), __ T16B,
13869              as_FloatRegister($src$$reg),
13870              as_FloatRegister($src$$reg));
13871     } else {
13872       __ shl(as_FloatRegister($dst$$reg), __ T16B,
13873              as_FloatRegister($src$$reg), sh);
13874     }
13875   %}
13876   ins_pipe(pipe_class_default);
13877 %}
13878 
















13879 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{

13880   match(Set dst (RShiftVB src shift));
13881   ins_cost(INSN_COST);
13882   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
13883   ins_encode %{
13884     int sh = (int)$shift$$constant & 31;
13885     if (sh >= 8) sh = 7;
13886     sh = -sh & 7;
13887     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
13888            as_FloatRegister($src$$reg), sh);
13889   %}
13890   ins_pipe(pipe_class_default);
13891 %}
13892 




















13893 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{

13894   match(Set dst (URShiftVB src shift));
13895   ins_cost(INSN_COST);
13896   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
13897   ins_encode %{
13898     int sh = (int)$shift$$constant & 31;
13899     if (sh >= 8) {
13900       __ eor(as_FloatRegister($dst$$reg), __ T16B,
13901              as_FloatRegister($src$$reg),
13902              as_FloatRegister($src$$reg));
13903     } else {
13904       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
13905              as_FloatRegister($src$$reg), -sh & 7);
13906     }
13907   %}
13908   ins_pipe(pipe_class_default);
13909 %}
13910 















13911 instruct vsll8S(vecX dst, vecX src, vecX shift) %{

13912   match(Set dst (LShiftVS src shift));
13913   match(Set dst (RShiftVS src shift));
13914   ins_cost(INSN_COST);
13915   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
13916   ins_encode %{
13917     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
13918             as_FloatRegister($src$$reg),
13919             as_FloatRegister($shift$$reg));
13920   %}
13921   ins_pipe(pipe_class_default);
13922 %}
13923 














13924 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{

13925   match(Set dst (URShiftVS src shift));
13926   ins_cost(INSN_COST);
13927   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
13928   ins_encode %{
13929     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
13930             as_FloatRegister($src$$reg),
13931             as_FloatRegister($shift$$reg));
13932   %}
13933   ins_pipe(pipe_class_default);
13934 %}
13935 




















13936 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{

13937   match(Set dst (LShiftVS src shift));
13938   ins_cost(INSN_COST);
13939   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
13940   ins_encode %{
13941     int sh = (int)$shift$$constant & 31;
13942     if (sh >= 16) {
13943       __ eor(as_FloatRegister($dst$$reg), __ T16B,
13944              as_FloatRegister($src$$reg),
13945              as_FloatRegister($src$$reg));
13946     } else {
13947       __ shl(as_FloatRegister($dst$$reg), __ T8H,
13948              as_FloatRegister($src$$reg), sh);
13949     }
13950   %}
13951   ins_pipe(pipe_class_default);
13952 %}
13953 
















13954 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{

13955   match(Set dst (RShiftVS src shift));
13956   ins_cost(INSN_COST);
13957   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
13958   ins_encode %{
13959     int sh = (int)$shift$$constant & 31;
13960     if (sh >= 16) sh = 15;
13961     sh = -sh & 15;
13962     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
13963            as_FloatRegister($src$$reg), sh);
13964   %}
13965   ins_pipe(pipe_class_default);
13966 %}
13967 




















13968 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{

13969   match(Set dst (URShiftVS src shift));
13970   ins_cost(INSN_COST);
13971   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
13972   ins_encode %{
13973     int sh = (int)$shift$$constant & 31;
13974     if (sh >= 16) {
13975       __ eor(as_FloatRegister($dst$$reg), __ T16B,
13976              as_FloatRegister($src$$reg),
13977              as_FloatRegister($src$$reg));
13978     } else {
13979       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
13980              as_FloatRegister($src$$reg), -sh & 15);
13981     }
13982   %}
13983   ins_pipe(pipe_class_default);
13984 %}
13985 














13986 instruct vsll4I(vecX dst, vecX src, vecX shift) %{

13987   match(Set dst (LShiftVI src shift));
13988   match(Set dst (RShiftVI src shift));
13989   ins_cost(INSN_COST);
13990   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
13991   ins_encode %{
13992     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
13993             as_FloatRegister($src$$reg),
13994             as_FloatRegister($shift$$reg));
13995   %}
13996   ins_pipe(pipe_class_default);
13997 %}
13998 













13999 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{

14000   match(Set dst (URShiftVI src shift));
14001   ins_cost(INSN_COST);
14002   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
14003   ins_encode %{
14004     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
14005             as_FloatRegister($src$$reg),
14006             as_FloatRegister($shift$$reg));
14007   %}
14008   ins_pipe(pipe_class_default);
14009 %}
14010 













14011 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{

14012   match(Set dst (LShiftVI src shift));
14013   ins_cost(INSN_COST);
14014   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
14015   ins_encode %{
14016     __ shl(as_FloatRegister($dst$$reg), __ T4S,
14017            as_FloatRegister($src$$reg),
14018            (int)$shift$$constant & 31);
14019   %}
14020   ins_pipe(pipe_class_default);
14021 %}
14022 













14023 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{

14024   match(Set dst (RShiftVI src shift));
14025   ins_cost(INSN_COST);
14026   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
14027   ins_encode %{
14028     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
14029             as_FloatRegister($src$$reg),
14030             -(int)$shift$$constant & 31);
14031   %}
14032   ins_pipe(pipe_class_default);
14033 %}
14034 













14035 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{

14036   match(Set dst (URShiftVI src shift));
14037   ins_cost(INSN_COST);
14038   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
14039   ins_encode %{
14040     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
14041             as_FloatRegister($src$$reg),
14042             -(int)$shift$$constant & 31);
14043   %}
14044   ins_pipe(pipe_class_default);
14045 %}
14046 
14047 instruct vsll2L(vecX dst, vecX src, vecX shift) %{

14048   match(Set dst (LShiftVL src shift));
14049   match(Set dst (RShiftVL src shift));
14050   ins_cost(INSN_COST);
14051   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
14052   ins_encode %{
14053     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
14054             as_FloatRegister($src$$reg),
14055             as_FloatRegister($shift$$reg));
14056   %}
14057   ins_pipe(pipe_class_default);
14058 %}
14059 
14060 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{

14061   match(Set dst (URShiftVL src shift));
14062   ins_cost(INSN_COST);
14063   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
14064   ins_encode %{
14065     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
14066             as_FloatRegister($src$$reg),
14067             as_FloatRegister($shift$$reg));
14068   %}
14069   ins_pipe(pipe_class_default);
14070 %}
14071 
14072 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{

14073   match(Set dst (LShiftVL src shift));
14074   ins_cost(INSN_COST);
14075   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
14076   ins_encode %{
14077     __ shl(as_FloatRegister($dst$$reg), __ T2D,
14078            as_FloatRegister($src$$reg),
14079            (int)$shift$$constant & 63);
14080   %}
14081   ins_pipe(pipe_class_default);
14082 %}
14083 
14084 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{

14085   match(Set dst (RShiftVL src shift));
14086   ins_cost(INSN_COST);
14087   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
14088   ins_encode %{
14089     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
14090             as_FloatRegister($src$$reg),
14091             -(int)$shift$$constant & 63);
14092   %}
14093   ins_pipe(pipe_class_default);
14094 %}
14095 
14096 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{

14097   match(Set dst (URShiftVL src shift));
14098   ins_cost(INSN_COST);
14099   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
14100   ins_encode %{
14101     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
14102             as_FloatRegister($src$$reg),
14103             -(int)$shift$$constant & 63);
14104   %}
14105   ins_pipe(pipe_class_default);
14106 %}
14107 
14108 //----------PEEPHOLE RULES-----------------------------------------------------
14109 // These must follow all instruction definitions as they use the names
14110 // defined in the instructions definitions.
14111 //
14112 // peepmatch ( root_instr_name [preceding_instruction]* );
14113 //
14114 // peepconstraint %{
14115 // (instruction_number.operand_name relational_op instruction_number.operand_name
14116 //  [, ...] );




 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,


2152 
2153   enum RC src_hi_rc = rc_class(src_hi);
2154   enum RC src_lo_rc = rc_class(src_lo);
2155   enum RC dst_hi_rc = rc_class(dst_hi);
2156   enum RC dst_lo_rc = rc_class(dst_lo);
2157 
2158   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
2159 
2160   if (src_hi != OptoReg::Bad) {
2161     assert((src_lo&1)==0 && src_lo+1==src_hi &&
2162            (dst_lo&1)==0 && dst_lo+1==dst_hi,
2163            "expected aligned-adjacent pairs");
2164   }
2165 
2166   if (src_lo == dst_lo && src_hi == dst_hi) {
2167     return 0;            // Self copy, no move.
2168   }
2169 
2170   if (bottom_type()->isa_vect() != NULL) {
2171     uint len = 4;
2172     uint ireg = ideal_reg();
2173     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
2174     if (cbuf) {
2175       MacroAssembler _masm(cbuf);

2176       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");

2177       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2178         // stack->stack
2179         int src_offset = ra_->reg2offset(src_lo);
2180         int dst_offset = ra_->reg2offset(dst_lo);
2181         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
2182         len = 8;
2183         if (ireg == Op_VecD) {
2184           __ ldr(rscratch1, Address(sp, src_offset));
2185           __ str(rscratch1, Address(sp, dst_offset));
2186         } else {
2187           if (src_offset < 512) {
2188             __ ldp(rscratch1, rscratch2, Address(sp, src_offset));
2189           } else {
2190             __ ldr(rscratch1, Address(sp, src_offset));
2191             __ ldr(rscratch2, Address(sp, src_offset+4));
2192             len += 4;
2193           }
2194           if (dst_offset < 512) {
2195             __ stp(rscratch1, rscratch2, Address(sp, dst_offset));
2196           } else {
2197             __ str(rscratch1, Address(sp, dst_offset));
2198             __ str(rscratch2, Address(sp, dst_offset+4));
2199             len += 4;
2200           }
2201         }
2202       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2203         __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2204                ireg == Op_VecD ? __ T8B : __ T16B,
2205                as_FloatRegister(Matcher::_regEncode[src_lo]),
2206                as_FloatRegister(Matcher::_regEncode[src_lo]));
2207       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2208         __ str(as_FloatRegister(Matcher::_regEncode[src_lo]),
2209                ireg == Op_VecD ? __ D : __ Q,
2210                Address(sp, ra_->reg2offset(dst_lo)));
2211       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2212         __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2213                ireg == Op_VecD ? __ D : __ Q,
2214                Address(sp, ra_->reg2offset(src_lo)));
2215       } else {
2216         ShouldNotReachHere();
2217       }
2218     } else if (st) {
2219       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2220         // stack->stack
2221         int src_offset = ra_->reg2offset(src_lo);
2222         int dst_offset = ra_->reg2offset(dst_lo);
2223         if (ireg == Op_VecD) {
2224           st->print("ldr  rscratch1, [sp, #%d]", src_offset);
2225           st->print("str  rscratch1, [sp, #%d]", dst_offset);
2226         } else {
2227           if (src_offset < 512) {
2228             st->print("ldp  rscratch1, rscratch2, [sp, #%d]", src_offset);
2229           } else {
2230             st->print("ldr  rscratch1, [sp, #%d]", src_offset);
2231             st->print("\nldr  rscratch2, [sp, #%d]", src_offset+4);
2232           }
2233           if (dst_offset < 512) {
2234             st->print("\nstp  rscratch1, rscratch2, [sp, #%d]", dst_offset);
2235           } else {
2236             st->print("\nstr  rscratch1, [sp, #%d]", dst_offset);
2237             st->print("\nstr  rscratch2, [sp, #%d]", dst_offset+4);
2238           }
2239         }
2240         st->print("\t# vector spill, stack to stack");
2241       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2242         st->print("mov  %s, %s\t# vector spill, reg to reg",
2243                    Matcher::regName[dst_lo], Matcher::regName[src_lo]);
2244       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2245         st->print("str  %s, [sp, #%d]\t# vector spill, reg to stack",
2246                    Matcher::regName[src_lo], ra_->reg2offset(dst_lo));
2247       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2248         st->print("ldr  %s, [sp, #%d]\t# vector spill, stack to reg",
2249                    Matcher::regName[dst_lo], ra_->reg2offset(src_lo));
2250       }
2251     }
2252     return len;
2253   }
2254 
2255   switch (src_lo_rc) {
2256   case rc_int:
2257     if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
2258       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2259           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {


2670 // true just means we have fast l2f conversion
2671 const bool Matcher::convL2FSupported(void) {
2672   return true;
2673 }
2674 
2675 // Vector width in bytes.
2676 const int Matcher::vector_width_in_bytes(BasicType bt) {
2677   int size = MIN2(16,(int)MaxVectorSize);
2678   // Minimum 2 values in vector
2679   if (size < 2*type2aelembytes(bt)) size = 0;
2680   // But never < 4
2681   if (size < 4) size = 0;
2682   return size;
2683 }
2684 
2685 // Limits on vector size (number of elements) loaded into vector.
2686 const int Matcher::max_vector_size(const BasicType bt) {
2687   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2688 }
2689 const int Matcher::min_vector_size(const BasicType bt) {
2690   return (type2aelembytes(bt) == 1) ? 4 : 2;


2691 }
2692 
2693 // Vector ideal reg.
2694 const int Matcher::vector_ideal_reg(int len) {
2695   switch(len) {
2696     case  4:
2697     case  8: return Op_VecD;
2698     case 16: return Op_VecX;
2699   }
2700   ShouldNotReachHere();
2701   return 0;
2702 }
2703 

2704 const int Matcher::vector_shift_count_ideal_reg(int size) {
2705   return Op_VecX;
2706 }
2707 
2708 // AES support not yet implemented
2709 const bool Matcher::pass_original_key_for_aes() {
2710   return false;
2711 }
2712 
2713 // x86 supports misaligned vectors store/load.
2714 const bool Matcher::misaligned_vectors_ok() {
2715   return !AlignVector; // can be changed by flag


2716 }
2717 
2718 // false => size gets scaled to BytesPerLong, ok.
2719 const bool Matcher::init_array_count_is_in_bytes = false;
2720 
2721 // Threshold size for cleararray.
2722 const int Matcher::init_array_short_size = 18 * BytesPerLong;
2723 
2724 // Use conditional move (CMOVL)
2725 const int Matcher::long_cmove_cost() {
2726   // long cmoves are no more expensive than int cmoves
2727   return 0;
2728 }
2729 
2730 const int Matcher::float_cmove_cost() {
2731   // float cmoves are no more expensive than int cmoves
2732   return 0;
2733 }
2734 
2735 // Does the CPU require late expand (see block.cpp for description of late expand)?


3106   %}
3107 
3108   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3109     Register dst_reg = as_Register($dst$$reg);
3110     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3111                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3112   %}
3113 
3114   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3115     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3116     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3117                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3118   %}
3119 
3120   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3121     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3122     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3123                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3124   %}
3125 
3126   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3127     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3128     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3129        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3130   %}
3131 
3132   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3133     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3134     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3135        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3136   %}
3137 
3138   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3139     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3140     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3141        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3142   %}
3143 
3144   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3145     Register src_reg = as_Register($src$$reg);
3146     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3147                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3148   %}
3149 
3150   enc_class aarch64_enc_strb0(memory mem) %{
3151     MacroAssembler _masm(&cbuf);
3152     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),


3192   %}
3193 
3194   enc_class aarch64_enc_str0(memory mem) %{
3195     MacroAssembler _masm(&cbuf);
3196     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
3197                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3198   %}
3199 
3200   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
3201     FloatRegister src_reg = as_FloatRegister($src$$reg);
3202     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
3203                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3204   %}
3205 
3206   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
3207     FloatRegister src_reg = as_FloatRegister($src$$reg);
3208     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
3209                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3210   %}
3211 
3212   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
3213     FloatRegister src_reg = as_FloatRegister($src$$reg);
3214     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
3215        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3216   %}
3217 
3218   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
3219     FloatRegister src_reg = as_FloatRegister($src$$reg);
3220     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
3221        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3222   %}
3223 
3224   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
3225     FloatRegister src_reg = as_FloatRegister($src$$reg);
3226     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
3227        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3228   %}
3229 
3230   // END Non-volatile memory access
3231 
3232   // volatile loads and stores
3233 
3234   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
3235     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3236                  rscratch1, stlrb);
3237   %}
3238 


5220   constraint(ALLOC_IN_RC(float_reg));
5221   match(RegF);
5222 
5223   op_cost(0);
5224   format %{ %}
5225   interface(REG_INTER);
5226 %}
5227 
5228 // Double Register
5229 // Double register operands
5230 operand vRegD()
5231 %{
5232   constraint(ALLOC_IN_RC(double_reg));
5233   match(RegD);
5234 
5235   op_cost(0);
5236   format %{ %}
5237   interface(REG_INTER);
5238 %}
5239 
5240 operand vecD()
5241 %{
5242   constraint(ALLOC_IN_RC(vectord_reg));
5243   match(VecD);
5244 
5245   op_cost(0);
5246   format %{ %}
5247   interface(REG_INTER);
5248 %}
5249 
5250 operand vecX()
5251 %{
5252   constraint(ALLOC_IN_RC(vectorx_reg));
5253   match(VecX);
5254 
5255   op_cost(0);
5256   format %{ %}
5257   interface(REG_INTER);
5258 %}
5259 
5260 operand vRegD_V0()
5261 %{
5262   constraint(ALLOC_IN_RC(v0_reg));
5263   match(RegD);
5264   op_cost(0);
5265   format %{ %}
5266   interface(REG_INTER);
5267 %}
5268 
5269 operand vRegD_V1()


13237 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13238 // for this guy.
13239 instruct tlsLoadP(thread_RegP dst)
13240 %{
13241   match(Set dst (ThreadLocal));
13242 
13243   ins_cost(0);
13244 
13245   format %{ " -- \t// $dst=Thread::current(), empty" %}
13246 
13247   size(0);
13248 
13249   ins_encode( /*empty*/ );
13250 
13251   ins_pipe(pipe_class_empty);
13252 %}
13253 
13254 // ====================VECTOR INSTRUCTIONS=====================================
13255 
13256 // Load vector (32 bits)
13257 instruct loadV4(vecD dst, vmem mem)
13258 %{
13259   predicate(n->as_LoadVector()->memory_size() == 4);
13260   match(Set dst (LoadVector mem));
13261   ins_cost(4 * INSN_COST);
13262   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
13263   ins_encode( aarch64_enc_ldrvS(dst, mem) );
13264   ins_pipe(pipe_class_memory);
13265 %}
13266 
13267 // Load vector (64 bits)
13268 instruct loadV8(vecD dst, vmem mem)
13269 %{
13270   predicate(n->as_LoadVector()->memory_size() == 8);
13271   match(Set dst (LoadVector mem));
13272   ins_cost(4 * INSN_COST);
13273   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
13274   ins_encode( aarch64_enc_ldrvD(dst, mem) );
13275   ins_pipe(pipe_class_memory);
13276 %}
13277 
13278 // Load Vector (128 bits)
13279 instruct loadV16(vecX dst, vmem mem)
13280 %{
13281   predicate(n->as_LoadVector()->memory_size() == 16);
13282   match(Set dst (LoadVector mem));
13283   ins_cost(4 * INSN_COST);
13284   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
13285   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
13286   ins_pipe(pipe_class_memory);
13287 %}
13288 
13289 // Store Vector (32 bits)
13290 instruct storeV4(vecD src, vmem mem)
13291 %{
13292   predicate(n->as_StoreVector()->memory_size() == 4);
13293   match(Set mem (StoreVector mem src));
13294   ins_cost(4 * INSN_COST);
13295   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
13296   ins_encode( aarch64_enc_strvS(src, mem) );
13297   ins_pipe(pipe_class_memory);
13298 %}
13299 
13300 // Store Vector (64 bits)
13301 instruct storeV8(vecD src, vmem mem)
13302 %{
13303   predicate(n->as_StoreVector()->memory_size() == 8);
13304   match(Set mem (StoreVector mem src));
13305   ins_cost(4 * INSN_COST);
13306   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
13307   ins_encode( aarch64_enc_strvD(src, mem) );
13308   ins_pipe(pipe_class_memory);
13309 %}
13310 
13311 // Store Vector (128 bits)
13312 instruct storeV16(vecX src, vmem mem)
13313 %{
13314   predicate(n->as_StoreVector()->memory_size() == 16);
13315   match(Set mem (StoreVector mem src));
13316   ins_cost(4 * INSN_COST);
13317   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
13318   ins_encode( aarch64_enc_strvQ(src, mem) );
13319   ins_pipe(pipe_class_memory);
13320 %}
13321 
13322 instruct replicate8B(vecD dst, iRegIorL2I src)
13323 %{
13324   predicate(n->as_Vector()->length() == 4 ||
13325             n->as_Vector()->length() == 8);
13326   match(Set dst (ReplicateB src));
13327   ins_cost(INSN_COST);
13328   format %{ "dup  $dst, $src\t# vector (8B)" %}
13329   ins_encode %{
13330     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
13331   %}
13332   ins_pipe(pipe_class_default);
13333 %}
13334 
13335 instruct replicate16B(vecX dst, iRegIorL2I src)
13336 %{
13337   predicate(n->as_Vector()->length() == 16);
13338   match(Set dst (ReplicateB src));
13339   ins_cost(INSN_COST);
13340   format %{ "dup  $dst, $src\t# vector (16B)" %}
13341   ins_encode %{
13342     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
13343   %}
13344   ins_pipe(pipe_class_default);
13345 %}
13346 
13347 instruct replicate8B_imm(vecD dst, immI con)
13348 %{
13349   predicate(n->as_Vector()->length() == 4 ||
13350             n->as_Vector()->length() == 8);
13351   match(Set dst (ReplicateB con));
13352   ins_cost(INSN_COST);
13353   format %{ "movi  $dst, $con\t# vector(8B)" %}
13354   ins_encode %{
13355     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
13356   %}
13357   ins_pipe(pipe_class_default);
13358 %}
13359 
13360 instruct replicate16B_imm(vecX dst, immI con)
13361 %{
13362   predicate(n->as_Vector()->length() == 16);
13363   match(Set dst (ReplicateB con));
13364   ins_cost(INSN_COST);
13365   format %{ "movi  $dst, $con\t# vector(16B)" %}
13366   ins_encode %{
13367     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant);
13368   %}
13369   ins_pipe(pipe_class_default);
13370 %}
13371 
13372 instruct replicate4S(vecD dst, iRegIorL2I src)
13373 %{
13374   predicate(n->as_Vector()->length() == 2 ||
13375             n->as_Vector()->length() == 4);
13376   match(Set dst (ReplicateS src));
13377   ins_cost(INSN_COST);
13378   format %{ "dup  $dst, $src\t# vector (4S)" %}
13379   ins_encode %{
13380     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
13381   %}
13382   ins_pipe(pipe_class_default);
13383 %}
13384 
13385 instruct replicate8S(vecX dst, iRegIorL2I src)
13386 %{
13387   predicate(n->as_Vector()->length() == 8);
13388   match(Set dst (ReplicateS src));
13389   ins_cost(INSN_COST);
13390   format %{ "dup  $dst, $src\t# vector (8S)" %}
13391   ins_encode %{
13392     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
13393   %}
13394   ins_pipe(pipe_class_default);
13395 %}
13396 
13397 instruct replicate4S_imm(vecD dst, immI con)
13398 %{
13399   predicate(n->as_Vector()->length() == 2 ||
13400             n->as_Vector()->length() == 4);
13401   match(Set dst (ReplicateS con));
13402   ins_cost(INSN_COST);
13403   format %{ "movi  $dst, $con\t# vector(4H)" %}
13404   ins_encode %{
13405     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
13406   %}
13407   ins_pipe(pipe_class_default);
13408 %}
13409 
13410 instruct replicate8S_imm(vecX dst, immI con)
13411 %{
13412   predicate(n->as_Vector()->length() == 8);
13413   match(Set dst (ReplicateS con));
13414   ins_cost(INSN_COST);
13415   format %{ "movi  $dst, $con\t# vector(8H)" %}
13416   ins_encode %{
13417     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant);
13418   %}
13419   ins_pipe(pipe_class_default);
13420 %}
13421 
13422 instruct replicate2I(vecD dst, iRegIorL2I src)
13423 %{
13424   predicate(n->as_Vector()->length() == 2);
13425   match(Set dst (ReplicateI src));
13426   ins_cost(INSN_COST);
13427   format %{ "dup  $dst, $src\t# vector (2I)" %}
13428   ins_encode %{
13429     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
13430   %}
13431   ins_pipe(pipe_class_default);
13432 %}
13433 
13434 instruct replicate4I(vecX dst, iRegIorL2I src)
13435 %{
13436   predicate(n->as_Vector()->length() == 4);
13437   match(Set dst (ReplicateI src));
13438   ins_cost(INSN_COST);
13439   format %{ "dup  $dst, $src\t# vector (4I)" %}
13440   ins_encode %{
13441     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
13442   %}
13443   ins_pipe(pipe_class_default);
13444 %}
13445 
13446 instruct replicate2I_imm(vecD dst, immI con)
13447 %{
13448   predicate(n->as_Vector()->length() == 2);
13449   match(Set dst (ReplicateI con));
13450   ins_cost(INSN_COST);
13451   format %{ "movi  $dst, $con\t# vector(2I)" %}
13452   ins_encode %{
13453     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
13454   %}
13455   ins_pipe(pipe_class_default);
13456 %}
13457 
13458 instruct replicate4I_imm(vecX dst, immI con)
13459 %{
13460   predicate(n->as_Vector()->length() == 4);
13461   match(Set dst (ReplicateI con));
13462   ins_cost(INSN_COST);
13463   format %{ "movi  $dst, $con\t# vector(4I)" %}
13464   ins_encode %{
13465     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
13466   %}
13467   ins_pipe(pipe_class_default);
13468 %}
13469 
13470 instruct replicate2L(vecX dst, iRegL src)
13471 %{
13472   predicate(n->as_Vector()->length() == 2);
13473   match(Set dst (ReplicateL src));
13474   ins_cost(INSN_COST);
13475   format %{ "dup  $dst, $src\t# vector (2L)" %}
13476   ins_encode %{
13477     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
13478   %}
13479   ins_pipe(pipe_class_default);
13480 %}
13481 
13482 instruct replicate2L_zero(vecX dst, immI0 zero)
13483 %{
13484   predicate(n->as_Vector()->length() == 2);
13485   match(Set dst (ReplicateI zero));
13486   ins_cost(INSN_COST);
13487   format %{ "movi  $dst, $zero\t# vector(4I)" %}
13488   ins_encode %{
13489     __ eor(as_FloatRegister($dst$$reg), __ T16B,
13490            as_FloatRegister($dst$$reg),
13491            as_FloatRegister($dst$$reg));
13492   %}
13493   ins_pipe(pipe_class_default);
13494 %}
13495 
13496 instruct replicate2F(vecD dst, vRegF src)
13497 %{
13498   predicate(n->as_Vector()->length() == 2);
13499   match(Set dst (ReplicateF src));
13500   ins_cost(INSN_COST);
13501   format %{ "dup  $dst, $src\t# vector (2F)" %}
13502   ins_encode %{
13503     __ dup(as_FloatRegister($dst$$reg), __ T2S,
13504            as_FloatRegister($src$$reg));
13505   %}
13506   ins_pipe(pipe_class_default);
13507 %}
13508 
13509 instruct replicate4F(vecX dst, vRegF src)
13510 %{
13511   predicate(n->as_Vector()->length() == 4);
13512   match(Set dst (ReplicateF src));
13513   ins_cost(INSN_COST);
13514   format %{ "dup  $dst, $src\t# vector (4F)" %}
13515   ins_encode %{
13516     __ dup(as_FloatRegister($dst$$reg), __ T4S,
13517            as_FloatRegister($src$$reg));
13518   %}
13519   ins_pipe(pipe_class_default);
13520 %}
13521 
13522 instruct replicate2D(vecX dst, vRegD src)
13523 %{
13524   predicate(n->as_Vector()->length() == 2);
13525   match(Set dst (ReplicateD src));
13526   ins_cost(INSN_COST);
13527   format %{ "dup  $dst, $src\t# vector (2D)" %}
13528   ins_encode %{
13529     __ dup(as_FloatRegister($dst$$reg), __ T2D,
13530            as_FloatRegister($src$$reg));
13531   %}
13532   ins_pipe(pipe_class_default);
13533 %}
13534 
13535 // ====================REDUCTION ARITHMETIC====================================
13536 
13537 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
13538 %{
13539   match(Set dst (AddReductionVI src1 src2));
13540   ins_cost(INSN_COST);
13541   effect(TEMP tmp, TEMP tmp2);
13542   format %{ "umov  $tmp, $src2, S, 0\n\t"
13543             "umov  $tmp2, $src2, S, 1\n\t"
13544             "addw  $dst, $src1, $tmp\n\t"
13545             "addw  $dst, $dst, $tmp2\t add reduction2i"
13546   %}
13547   ins_encode %{
13548     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
13549     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
13550     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
13551     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
13552   %}
13553   ins_pipe(pipe_class_default);
13554 %}
13555 
13556 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13557 %{
13558   match(Set dst (AddReductionVI src1 src2));
13559   ins_cost(INSN_COST);
13560   effect(TEMP tmp, TEMP tmp2);
13561   format %{ "addv  $tmp, T4S, $src2\n\t"
13562             "umov  $tmp2, $tmp, S, 0\n\t"
13563             "addw  $dst, $tmp2, $src1\t add reduction4i"
13564   %}
13565   ins_encode %{
13566     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
13567             as_FloatRegister($src2$$reg));
13568     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13569     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
13570   %}
13571   ins_pipe(pipe_class_default);
13572 %}
13573 
13574 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
13575 %{
13576   match(Set dst (MulReductionVI src1 src2));
13577   ins_cost(INSN_COST);
13578   effect(TEMP tmp, TEMP dst);
13579   format %{ "umov  $tmp, $src2, S, 0\n\t"
13580             "mul   $dst, $tmp, $src1\n\t"
13581             "umov  $tmp, $src2, S, 1\n\t"
13582             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
13583   %}
13584   ins_encode %{
13585     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
13586     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
13587     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
13588     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
13589   %}
13590   ins_pipe(pipe_class_default);
13591 %}
13592 
13593 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13594 %{
13595   match(Set dst (MulReductionVI src1 src2));
13596   ins_cost(INSN_COST);
13597   effect(TEMP tmp, TEMP tmp2, TEMP dst);
13598   format %{ "ins   $tmp, $src2, 0, 1\n\t"
13599             "mul   $tmp, $tmp, $src2\n\t"
13600             "umov  $tmp2, $tmp, S, 0\n\t"
13601             "mul   $dst, $tmp2, $src1\n\t"
13602             "umov  $tmp2, $tmp, S, 1\n\t"
13603             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
13604   %}
13605   ins_encode %{
13606     __ ins(as_FloatRegister($tmp$$reg), __ D,
13607            as_FloatRegister($src2$$reg), 0, 1);
13608     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
13609            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
13610     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13611     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
13612     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
13613     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
13614   %}
13615   ins_pipe(pipe_class_default);
13616 %}
13617 
13618 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
13619 %{
13620   match(Set dst (AddReductionVF src1 src2));
13621   ins_cost(INSN_COST);
13622   effect(TEMP tmp, TEMP dst);
13623   format %{ "fadds $dst, $src1, $src2\n\t"
13624             "ins   $tmp, S, $src2, 0, 1\n\t"
13625             "fadds $dst, $dst, $tmp\t add reduction2f"
13626   %}
13627   ins_encode %{
13628     __ fadds(as_FloatRegister($dst$$reg),
13629              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13630     __ ins(as_FloatRegister($tmp$$reg), __ S,
13631            as_FloatRegister($src2$$reg), 0, 1);
13632     __ fadds(as_FloatRegister($dst$$reg),
13633              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13634   %}
13635   ins_pipe(pipe_class_default);
13636 %}
13637 
13638 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13639 %{
13640   match(Set dst (AddReductionVF src1 src2));
13641   ins_cost(INSN_COST);
13642   effect(TEMP tmp, TEMP dst);
13643   format %{ "fadds $dst, $src1, $src2\n\t"
13644             "ins   $tmp, S, $src2, 0, 1\n\t"
13645             "fadds $dst, $dst, $tmp\n\t"
13646             "ins   $tmp, S, $src2, 0, 2\n\t"
13647             "fadds $dst, $dst, $tmp\n\t"
13648             "ins   $tmp, S, $src2, 0, 3\n\t"
13649             "fadds $dst, $dst, $tmp\t add reduction4f"
13650   %}
13651   ins_encode %{
13652     __ fadds(as_FloatRegister($dst$$reg),
13653              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13654     __ ins(as_FloatRegister($tmp$$reg), __ S,
13655            as_FloatRegister($src2$$reg), 0, 1);
13656     __ fadds(as_FloatRegister($dst$$reg),
13657              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13658     __ ins(as_FloatRegister($tmp$$reg), __ S,
13659            as_FloatRegister($src2$$reg), 0, 2);
13660     __ fadds(as_FloatRegister($dst$$reg),
13661              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13662     __ ins(as_FloatRegister($tmp$$reg), __ S,
13663            as_FloatRegister($src2$$reg), 0, 3);
13664     __ fadds(as_FloatRegister($dst$$reg),
13665              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13666   %}
13667   ins_pipe(pipe_class_default);
13668 %}
13669 
13670 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
13671 %{
13672   match(Set dst (MulReductionVF src1 src2));
13673   ins_cost(INSN_COST);
13674   effect(TEMP tmp, TEMP dst);
13675   format %{ "fmuls $dst, $src1, $src2\n\t"
13676             "ins   $tmp, S, $src2, 0, 1\n\t"
13677             "fmuls $dst, $dst, $tmp\t add reduction4f"
13678   %}
13679   ins_encode %{
13680     __ fmuls(as_FloatRegister($dst$$reg),
13681              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13682     __ ins(as_FloatRegister($tmp$$reg), __ S,
13683            as_FloatRegister($src2$$reg), 0, 1);
13684     __ fmuls(as_FloatRegister($dst$$reg),
13685              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13686   %}
13687   ins_pipe(pipe_class_default);
13688 %}
13689 
13690 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13691 %{
13692   match(Set dst (MulReductionVF src1 src2));
13693   ins_cost(INSN_COST);
13694   effect(TEMP tmp, TEMP dst);
13695   format %{ "fmuls $dst, $src1, $src2\n\t"
13696             "ins   $tmp, S, $src2, 0, 1\n\t"
13697             "fmuls $dst, $dst, $tmp\n\t"
13698             "ins   $tmp, S, $src2, 0, 2\n\t"
13699             "fmuls $dst, $dst, $tmp\n\t"
13700             "ins   $tmp, S, $src2, 0, 3\n\t"
13701             "fmuls $dst, $dst, $tmp\t add reduction4f"
13702   %}
13703   ins_encode %{
13704     __ fmuls(as_FloatRegister($dst$$reg),
13705              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13706     __ ins(as_FloatRegister($tmp$$reg), __ S,
13707            as_FloatRegister($src2$$reg), 0, 1);
13708     __ fmuls(as_FloatRegister($dst$$reg),
13709              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));


13746   effect(TEMP tmp, TEMP dst);
13747   format %{ "fmuld $dst, $src1, $src2\n\t"
13748             "ins   $tmp, D, $src2, 0, 1\n\t"
13749             "fmuld $dst, $dst, $tmp\t add reduction2d"
13750   %}
13751   ins_encode %{
13752     __ fmuld(as_FloatRegister($dst$$reg),
13753              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13754     __ ins(as_FloatRegister($tmp$$reg), __ D,
13755            as_FloatRegister($src2$$reg), 0, 1);
13756     __ fmuld(as_FloatRegister($dst$$reg),
13757              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13758   %}
13759   ins_pipe(pipe_class_default);
13760 %}
13761 
13762 // ====================VECTOR ARITHMETIC=======================================
13763 
13764 // --------------------------------- ADD --------------------------------------
13765 
13766 instruct vadd8B(vecD dst, vecD src1, vecD src2)
13767 %{
13768   predicate(n->as_Vector()->length() == 4 ||
13769             n->as_Vector()->length() == 8);
13770   match(Set dst (AddVB src1 src2));
13771   ins_cost(INSN_COST);
13772   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
13773   ins_encode %{
13774     __ addv(as_FloatRegister($dst$$reg), __ T8B,
13775             as_FloatRegister($src1$$reg),
13776             as_FloatRegister($src2$$reg));
13777   %}
13778   ins_pipe(pipe_class_default);
13779 %}
13780 
13781 instruct vadd16B(vecX dst, vecX src1, vecX src2)
13782 %{
13783   predicate(n->as_Vector()->length() == 16);
13784   match(Set dst (AddVB src1 src2));
13785   ins_cost(INSN_COST);
13786   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
13787   ins_encode %{
13788     __ addv(as_FloatRegister($dst$$reg), __ T16B,
13789             as_FloatRegister($src1$$reg),
13790             as_FloatRegister($src2$$reg));
13791   %}
13792   ins_pipe(pipe_class_default);
13793 %}
13794 
13795 instruct vadd4S(vecD dst, vecD src1, vecD src2)
13796 %{
13797   predicate(n->as_Vector()->length() == 2 ||
13798             n->as_Vector()->length() == 4);
13799   match(Set dst (AddVS src1 src2));
13800   ins_cost(INSN_COST);
13801   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
13802   ins_encode %{
13803     __ addv(as_FloatRegister($dst$$reg), __ T4H,
13804             as_FloatRegister($src1$$reg),
13805             as_FloatRegister($src2$$reg));
13806   %}
13807   ins_pipe(pipe_class_default);
13808 %}
13809 
13810 instruct vadd8S(vecX dst, vecX src1, vecX src2)
13811 %{
13812   predicate(n->as_Vector()->length() == 8);
13813   match(Set dst (AddVS src1 src2));
13814   ins_cost(INSN_COST);
13815   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
13816   ins_encode %{
13817     __ addv(as_FloatRegister($dst$$reg), __ T8H,
13818             as_FloatRegister($src1$$reg),
13819             as_FloatRegister($src2$$reg));
13820   %}
13821   ins_pipe(pipe_class_default);
13822 %}
13823 
13824 instruct vadd2I(vecD dst, vecD src1, vecD src2)
13825 %{
13826   predicate(n->as_Vector()->length() == 2);
13827   match(Set dst (AddVI src1 src2));
13828   ins_cost(INSN_COST);
13829   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
13830   ins_encode %{
13831     __ addv(as_FloatRegister($dst$$reg), __ T2S,
13832             as_FloatRegister($src1$$reg),
13833             as_FloatRegister($src2$$reg));
13834   %}
13835   ins_pipe(pipe_class_default);
13836 %}
13837 
13838 instruct vadd4I(vecX dst, vecX src1, vecX src2)
13839 %{
13840   predicate(n->as_Vector()->length() == 4);
13841   match(Set dst (AddVI src1 src2));
13842   ins_cost(INSN_COST);
13843   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
13844   ins_encode %{
13845     __ addv(as_FloatRegister($dst$$reg), __ T4S,
13846             as_FloatRegister($src1$$reg),
13847             as_FloatRegister($src2$$reg));
13848   %}
13849   ins_pipe(pipe_class_default);
13850 %}
13851 
13852 instruct vadd2L(vecX dst, vecX src1, vecX src2)
13853 %{
13854   predicate(n->as_Vector()->length() == 2);
13855   match(Set dst (AddVL src1 src2));
13856   ins_cost(INSN_COST);
13857   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
13858   ins_encode %{
13859     __ addv(as_FloatRegister($dst$$reg), __ T2D,
13860             as_FloatRegister($src1$$reg),
13861             as_FloatRegister($src2$$reg));
13862   %}
13863   ins_pipe(pipe_class_default);
13864 %}
13865 
13866 instruct vadd2F(vecD dst, vecD src1, vecD src2)
13867 %{
13868   predicate(n->as_Vector()->length() == 2);
13869   match(Set dst (AddVF src1 src2));
13870   ins_cost(INSN_COST);
13871   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
13872   ins_encode %{
13873     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
13874             as_FloatRegister($src1$$reg),
13875             as_FloatRegister($src2$$reg));
13876   %}
13877   ins_pipe(pipe_class_default);
13878 %}
13879 
13880 instruct vadd4F(vecX dst, vecX src1, vecX src2)
13881 %{
13882   predicate(n->as_Vector()->length() == 4);
13883   match(Set dst (AddVF src1 src2));
13884   ins_cost(INSN_COST);
13885   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
13886   ins_encode %{
13887     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
13888             as_FloatRegister($src1$$reg),
13889             as_FloatRegister($src2$$reg));
13890   %}
13891   ins_pipe(pipe_class_default);
13892 %}
13893 
13894 instruct vadd2D(vecX dst, vecX src1, vecX src2)
13895 %{
13896   match(Set dst (AddVD src1 src2));
13897   ins_cost(INSN_COST);
13898   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
13899   ins_encode %{
13900     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
13901             as_FloatRegister($src1$$reg),
13902             as_FloatRegister($src2$$reg));
13903   %}
13904   ins_pipe(pipe_class_default);
13905 %}
13906 
13907 // --------------------------------- SUB --------------------------------------
13908 
13909 instruct vsub8B(vecD dst, vecD src1, vecD src2)
13910 %{
13911   predicate(n->as_Vector()->length() == 4 ||
13912             n->as_Vector()->length() == 8);
13913   match(Set dst (SubVB src1 src2));
13914   ins_cost(INSN_COST);
13915   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
13916   ins_encode %{
13917     __ subv(as_FloatRegister($dst$$reg), __ T8B,
13918             as_FloatRegister($src1$$reg),
13919             as_FloatRegister($src2$$reg));
13920   %}
13921   ins_pipe(pipe_class_default);
13922 %}
13923 
13924 instruct vsub16B(vecX dst, vecX src1, vecX src2)
13925 %{
13926   predicate(n->as_Vector()->length() == 16);
13927   match(Set dst (SubVB src1 src2));
13928   ins_cost(INSN_COST);
13929   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
13930   ins_encode %{
13931     __ subv(as_FloatRegister($dst$$reg), __ T16B,
13932             as_FloatRegister($src1$$reg),
13933             as_FloatRegister($src2$$reg));
13934   %}
13935   ins_pipe(pipe_class_default);
13936 %}
13937 
13938 instruct vsub4S(vecD dst, vecD src1, vecD src2)
13939 %{
13940   predicate(n->as_Vector()->length() == 2 ||
13941             n->as_Vector()->length() == 4);
13942   match(Set dst (SubVS src1 src2));
13943   ins_cost(INSN_COST);
13944   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
13945   ins_encode %{
13946     __ subv(as_FloatRegister($dst$$reg), __ T4H,
13947             as_FloatRegister($src1$$reg),
13948             as_FloatRegister($src2$$reg));
13949   %}
13950   ins_pipe(pipe_class_default);
13951 %}
13952 
13953 instruct vsub8S(vecX dst, vecX src1, vecX src2)
13954 %{
13955   predicate(n->as_Vector()->length() == 8);
13956   match(Set dst (SubVS src1 src2));
13957   ins_cost(INSN_COST);
13958   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
13959   ins_encode %{
13960     __ subv(as_FloatRegister($dst$$reg), __ T8H,
13961             as_FloatRegister($src1$$reg),
13962             as_FloatRegister($src2$$reg));
13963   %}
13964   ins_pipe(pipe_class_default);
13965 %}
13966 
13967 instruct vsub2I(vecD dst, vecD src1, vecD src2)
13968 %{
13969   predicate(n->as_Vector()->length() == 2);
13970   match(Set dst (SubVI src1 src2));
13971   ins_cost(INSN_COST);
13972   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
13973   ins_encode %{
13974     __ subv(as_FloatRegister($dst$$reg), __ T2S,
13975             as_FloatRegister($src1$$reg),
13976             as_FloatRegister($src2$$reg));
13977   %}
13978   ins_pipe(pipe_class_default);
13979 %}
13980 
13981 instruct vsub4I(vecX dst, vecX src1, vecX src2)
13982 %{
13983   predicate(n->as_Vector()->length() == 4);
13984   match(Set dst (SubVI src1 src2));
13985   ins_cost(INSN_COST);
13986   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
13987   ins_encode %{
13988     __ subv(as_FloatRegister($dst$$reg), __ T4S,
13989             as_FloatRegister($src1$$reg),
13990             as_FloatRegister($src2$$reg));
13991   %}
13992   ins_pipe(pipe_class_default);
13993 %}
13994 
13995 instruct vsub2L(vecX dst, vecX src1, vecX src2)
13996 %{
13997   predicate(n->as_Vector()->length() == 2);
13998   match(Set dst (SubVL src1 src2));
13999   ins_cost(INSN_COST);
14000   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
14001   ins_encode %{
14002     __ subv(as_FloatRegister($dst$$reg), __ T2D,
14003             as_FloatRegister($src1$$reg),
14004             as_FloatRegister($src2$$reg));
14005   %}
14006   ins_pipe(pipe_class_default);
14007 %}
14008 
14009 instruct vsub2F(vecD dst, vecD src1, vecD src2)
14010 %{
14011   predicate(n->as_Vector()->length() == 2);
14012   match(Set dst (AddVF src1 src2));
14013   ins_cost(INSN_COST);
14014   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
14015   ins_encode %{
14016     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
14017             as_FloatRegister($src1$$reg),
14018             as_FloatRegister($src2$$reg));
14019   %}
14020   ins_pipe(pipe_class_default);
14021 %}
14022 
14023 instruct vsub4F(vecX dst, vecX src1, vecX src2)
14024 %{
14025   predicate(n->as_Vector()->length() == 4);
14026   match(Set dst (SubVF src1 src2));
14027   ins_cost(INSN_COST);
14028   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
14029   ins_encode %{
14030     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
14031             as_FloatRegister($src1$$reg),
14032             as_FloatRegister($src2$$reg));
14033   %}
14034   ins_pipe(pipe_class_default);
14035 %}
14036 
14037 instruct vsub2D(vecX dst, vecX src1, vecX src2)
14038 %{
14039   predicate(n->as_Vector()->length() == 2);
14040   match(Set dst (SubVD src1 src2));
14041   ins_cost(INSN_COST);
14042   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
14043   ins_encode %{
14044     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
14045             as_FloatRegister($src1$$reg),
14046             as_FloatRegister($src2$$reg));
14047   %}
14048   ins_pipe(pipe_class_default);
14049 %}
14050 
14051 // --------------------------------- MUL --------------------------------------
14052 
14053 instruct vmul4S(vecD dst, vecD src1, vecD src2)
14054 %{
14055   predicate(n->as_Vector()->length() == 2 ||
14056             n->as_Vector()->length() == 4);
14057   match(Set dst (MulVS src1 src2));
14058   ins_cost(INSN_COST);
14059   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
14060   ins_encode %{
14061     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
14062             as_FloatRegister($src1$$reg),
14063             as_FloatRegister($src2$$reg));
14064   %}
14065   ins_pipe(pipe_class_default);
14066 %}
14067 
14068 instruct vmul8S(vecX dst, vecX src1, vecX src2)
14069 %{
14070   predicate(n->as_Vector()->length() == 8);
14071   match(Set dst (MulVS src1 src2));
14072   ins_cost(INSN_COST);
14073   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
14074   ins_encode %{
14075     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
14076             as_FloatRegister($src1$$reg),
14077             as_FloatRegister($src2$$reg));
14078   %}
14079   ins_pipe(pipe_class_default);
14080 %}
14081 
14082 instruct vmul2I(vecD dst, vecD src1, vecD src2)
14083 %{
14084   predicate(n->as_Vector()->length() == 2);
14085   match(Set dst (MulVI src1 src2));
14086   ins_cost(INSN_COST);
14087   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
14088   ins_encode %{
14089     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
14090             as_FloatRegister($src1$$reg),
14091             as_FloatRegister($src2$$reg));
14092   %}
14093   ins_pipe(pipe_class_default);
14094 %}
14095 
14096 instruct vmul4I(vecX dst, vecX src1, vecX src2)
14097 %{
14098   predicate(n->as_Vector()->length() == 4);
14099   match(Set dst (MulVI src1 src2));
14100   ins_cost(INSN_COST);
14101   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
14102   ins_encode %{
14103     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
14104             as_FloatRegister($src1$$reg),
14105             as_FloatRegister($src2$$reg));
14106   %}
14107   ins_pipe(pipe_class_default);
14108 %}
14109 
14110 instruct vmul2F(vecD dst, vecD src1, vecD src2)
14111 %{
14112   predicate(n->as_Vector()->length() == 2);
14113   match(Set dst (MulVF src1 src2));
14114   ins_cost(INSN_COST);
14115   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
14116   ins_encode %{
14117     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
14118             as_FloatRegister($src1$$reg),
14119             as_FloatRegister($src2$$reg));
14120   %}
14121   ins_pipe(pipe_class_default);
14122 %}
14123 
14124 instruct vmul4F(vecX dst, vecX src1, vecX src2)
14125 %{
14126   predicate(n->as_Vector()->length() == 4);
14127   match(Set dst (MulVF src1 src2));
14128   ins_cost(INSN_COST);
14129   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
14130   ins_encode %{
14131     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
14132             as_FloatRegister($src1$$reg),
14133             as_FloatRegister($src2$$reg));
14134   %}
14135   ins_pipe(pipe_class_default);
14136 %}
14137 
14138 instruct vmul2D(vecX dst, vecX src1, vecX src2)
14139 %{
14140   predicate(n->as_Vector()->length() == 2);
14141   match(Set dst (MulVD src1 src2));
14142   ins_cost(INSN_COST);
14143   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
14144   ins_encode %{
14145     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
14146             as_FloatRegister($src1$$reg),
14147             as_FloatRegister($src2$$reg));
14148   %}
14149   ins_pipe(pipe_class_default);
14150 %}
14151 
14152 // --------------------------------- DIV --------------------------------------
14153 
14154 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
14155 %{
14156   predicate(n->as_Vector()->length() == 2);
14157   match(Set dst (DivVF src1 src2));
14158   ins_cost(INSN_COST);
14159   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
14160   ins_encode %{
14161     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
14162             as_FloatRegister($src1$$reg),
14163             as_FloatRegister($src2$$reg));
14164   %}
14165   ins_pipe(pipe_class_default);
14166 %}
14167 
14168 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
14169 %{
14170   predicate(n->as_Vector()->length() == 4);
14171   match(Set dst (DivVF src1 src2));
14172   ins_cost(INSN_COST);
14173   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
14174   ins_encode %{
14175     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
14176             as_FloatRegister($src1$$reg),
14177             as_FloatRegister($src2$$reg));
14178   %}
14179   ins_pipe(pipe_class_default);
14180 %}
14181 
14182 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
14183 %{
14184   predicate(n->as_Vector()->length() == 2);
14185   match(Set dst (DivVD src1 src2));
14186   ins_cost(INSN_COST);
14187   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
14188   ins_encode %{
14189     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
14190             as_FloatRegister($src1$$reg),
14191             as_FloatRegister($src2$$reg));
14192   %}
14193   ins_pipe(pipe_class_default);
14194 %}
14195 
14196 // --------------------------------- AND --------------------------------------
14197 
14198 instruct vand8B(vecD dst, vecD src1, vecD src2)
14199 %{
14200   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14201             n->as_Vector()->length_in_bytes() == 8);
14202   match(Set dst (AndV src1 src2));
14203   ins_cost(INSN_COST);
14204   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14205   ins_encode %{
14206     __ andr(as_FloatRegister($dst$$reg), __ T8B,
14207             as_FloatRegister($src1$$reg),
14208             as_FloatRegister($src2$$reg));
14209   %}
14210   ins_pipe(pipe_class_default);
14211 %}
14212 
14213 instruct vand16B(vecX dst, vecX src1, vecX src2)
14214 %{
14215   predicate(n->as_Vector()->length_in_bytes() == 16);
14216   match(Set dst (AndV src1 src2));
14217   ins_cost(INSN_COST);
14218   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
14219   ins_encode %{
14220     __ andr(as_FloatRegister($dst$$reg), __ T16B,
14221             as_FloatRegister($src1$$reg),
14222             as_FloatRegister($src2$$reg));
14223   %}
14224   ins_pipe(pipe_class_default);
14225 %}
14226 
14227 // --------------------------------- OR ---------------------------------------
14228 
14229 instruct vor8B(vecD dst, vecD src1, vecD src2)
14230 %{
14231   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14232             n->as_Vector()->length_in_bytes() == 8);
14233   match(Set dst (OrV src1 src2));
14234   ins_cost(INSN_COST);
14235   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14236   ins_encode %{
14237     __ orr(as_FloatRegister($dst$$reg), __ T8B,
14238             as_FloatRegister($src1$$reg),
14239             as_FloatRegister($src2$$reg));
14240   %}
14241   ins_pipe(pipe_class_default);
14242 %}
14243 
14244 instruct vor16B(vecX dst, vecX src1, vecX src2)
14245 %{
14246   predicate(n->as_Vector()->length_in_bytes() == 16);
14247   match(Set dst (OrV src1 src2));
14248   ins_cost(INSN_COST);
14249   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
14250   ins_encode %{
14251     __ orr(as_FloatRegister($dst$$reg), __ T16B,
14252             as_FloatRegister($src1$$reg),
14253             as_FloatRegister($src2$$reg));
14254   %}
14255   ins_pipe(pipe_class_default);
14256 %}
14257 
14258 // --------------------------------- XOR --------------------------------------
14259 
14260 instruct vxor8B(vecD dst, vecD src1, vecD src2)
14261 %{
14262   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14263             n->as_Vector()->length_in_bytes() == 8);
14264   match(Set dst (XorV src1 src2));
14265   ins_cost(INSN_COST);
14266   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
14267   ins_encode %{
14268     __ eor(as_FloatRegister($dst$$reg), __ T8B,
14269             as_FloatRegister($src1$$reg),
14270             as_FloatRegister($src2$$reg));
14271   %}
14272   ins_pipe(pipe_class_default);
14273 %}
14274 
14275 instruct vxor16B(vecX dst, vecX src1, vecX src2)
14276 %{
14277   predicate(n->as_Vector()->length_in_bytes() == 16);
14278   match(Set dst (XorV src1 src2));
14279   ins_cost(INSN_COST);
14280   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
14281   ins_encode %{
14282     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14283             as_FloatRegister($src1$$reg),
14284             as_FloatRegister($src2$$reg));
14285   %}
14286   ins_pipe(pipe_class_default);
14287 %}
14288 
14289 // ------------------------------ Shift ---------------------------------------
14290 
14291 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
14292   match(Set dst (LShiftCntV cnt));
14293   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
14294   ins_encode %{
14295     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14296   %}
14297   ins_pipe(pipe_class_default);
14298 %}
14299 
14300 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
14301 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
14302   match(Set dst (RShiftCntV cnt));
14303   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
14304   ins_encode %{
14305     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14306     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
14307   %}
14308   ins_pipe(pipe_class_default);
14309 %}
14310 
14311 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
14312   predicate(n->as_Vector()->length() == 4 ||
14313             n->as_Vector()->length() == 8);
14314   match(Set dst (LShiftVB src shift));
14315   match(Set dst (RShiftVB src shift));
14316   ins_cost(INSN_COST);
14317   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
14318   ins_encode %{
14319     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
14320             as_FloatRegister($src$$reg),
14321             as_FloatRegister($shift$$reg));
14322   %}
14323   ins_pipe(pipe_class_default);
14324 %}
14325 
14326 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
14327   predicate(n->as_Vector()->length() == 16);
14328   match(Set dst (LShiftVB src shift));
14329   match(Set dst (RShiftVB src shift));
14330   ins_cost(INSN_COST);
14331   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
14332   ins_encode %{
14333     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
14334             as_FloatRegister($src$$reg),
14335             as_FloatRegister($shift$$reg));
14336   %}
14337   ins_pipe(pipe_class_default);
14338 %}
14339 
14340 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
14341   predicate(n->as_Vector()->length() == 4 ||
14342             n->as_Vector()->length() == 8);
14343   match(Set dst (URShiftVB src shift));
14344   ins_cost(INSN_COST);
14345   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
14346   ins_encode %{
14347     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
14348             as_FloatRegister($src$$reg),
14349             as_FloatRegister($shift$$reg));
14350   %}
14351   ins_pipe(pipe_class_default);
14352 %}
14353 
14354 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
14355   predicate(n->as_Vector()->length() == 16);
14356   match(Set dst (URShiftVB src shift));
14357   ins_cost(INSN_COST);
14358   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
14359   ins_encode %{
14360     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
14361             as_FloatRegister($src$$reg),
14362             as_FloatRegister($shift$$reg));
14363   %}
14364   ins_pipe(pipe_class_default);
14365 %}
14366 
14367 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
14368   predicate(n->as_Vector()->length() == 4 ||
14369             n->as_Vector()->length() == 8);
14370   match(Set dst (LShiftVB src shift));
14371   ins_cost(INSN_COST);
14372   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
14373   ins_encode %{
14374     int sh = (int)$shift$$constant & 31;
14375     if (sh >= 8) {
14376       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14377              as_FloatRegister($src$$reg),
14378              as_FloatRegister($src$$reg));
14379     } else {
14380       __ shl(as_FloatRegister($dst$$reg), __ T8B,
14381              as_FloatRegister($src$$reg), sh);
14382     }
14383   %}
14384   ins_pipe(pipe_class_default);
14385 %}
14386 
14387 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
14388   predicate(n->as_Vector()->length() == 16);
14389   match(Set dst (LShiftVB src shift));
14390   ins_cost(INSN_COST);
14391   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
14392   ins_encode %{
14393     int sh = (int)$shift$$constant & 31;
14394     if (sh >= 8) {
14395       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14396              as_FloatRegister($src$$reg),
14397              as_FloatRegister($src$$reg));
14398     } else {
14399       __ shl(as_FloatRegister($dst$$reg), __ T16B,
14400              as_FloatRegister($src$$reg), sh);
14401     }
14402   %}
14403   ins_pipe(pipe_class_default);
14404 %}
14405 
14406 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
14407   predicate(n->as_Vector()->length() == 4 ||
14408             n->as_Vector()->length() == 8);
14409   match(Set dst (RShiftVB src shift));
14410   ins_cost(INSN_COST);
14411   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
14412   ins_encode %{
14413     int sh = (int)$shift$$constant & 31;
14414     if (sh >= 8) sh = 7;
14415     sh = -sh & 7;
14416     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
14417            as_FloatRegister($src$$reg), sh);
14418   %}
14419   ins_pipe(pipe_class_default);
14420 %}
14421 
14422 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
14423   predicate(n->as_Vector()->length() == 16);
14424   match(Set dst (RShiftVB src shift));
14425   ins_cost(INSN_COST);
14426   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
14427   ins_encode %{
14428     int sh = (int)$shift$$constant & 31;
14429     if (sh >= 8) sh = 7;
14430     sh = -sh & 7;
14431     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
14432            as_FloatRegister($src$$reg), sh);
14433   %}
14434   ins_pipe(pipe_class_default);
14435 %}
14436 
14437 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
14438   predicate(n->as_Vector()->length() == 4 ||
14439             n->as_Vector()->length() == 8);
14440   match(Set dst (URShiftVB src shift));
14441   ins_cost(INSN_COST);
14442   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
14443   ins_encode %{
14444     int sh = (int)$shift$$constant & 31;
14445     if (sh >= 8) {
14446       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14447              as_FloatRegister($src$$reg),
14448              as_FloatRegister($src$$reg));
14449     } else {
14450       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
14451              as_FloatRegister($src$$reg), -sh & 7);
14452     }
14453   %}
14454   ins_pipe(pipe_class_default);
14455 %}
14456 
14457 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
14458   predicate(n->as_Vector()->length() == 16);
14459   match(Set dst (URShiftVB src shift));
14460   ins_cost(INSN_COST);
14461   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
14462   ins_encode %{
14463     int sh = (int)$shift$$constant & 31;
14464     if (sh >= 8) {
14465       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14466              as_FloatRegister($src$$reg),
14467              as_FloatRegister($src$$reg));
14468     } else {
14469       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
14470              as_FloatRegister($src$$reg), -sh & 7);
14471     }
14472   %}
14473   ins_pipe(pipe_class_default);
14474 %}
14475 
14476 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
14477   predicate(n->as_Vector()->length() == 2 ||
14478             n->as_Vector()->length() == 4);
14479   match(Set dst (LShiftVS src shift));
14480   match(Set dst (RShiftVS src shift));
14481   ins_cost(INSN_COST);
14482   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
14483   ins_encode %{
14484     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
14485             as_FloatRegister($src$$reg),
14486             as_FloatRegister($shift$$reg));
14487   %}
14488   ins_pipe(pipe_class_default);
14489 %}
14490 
14491 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
14492   predicate(n->as_Vector()->length() == 8);
14493   match(Set dst (LShiftVS src shift));
14494   match(Set dst (RShiftVS src shift));
14495   ins_cost(INSN_COST);
14496   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
14497   ins_encode %{
14498     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
14499             as_FloatRegister($src$$reg),
14500             as_FloatRegister($shift$$reg));
14501   %}
14502   ins_pipe(pipe_class_default);
14503 %}
14504 
14505 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
14506   predicate(n->as_Vector()->length() == 2 ||
14507             n->as_Vector()->length() == 4);
14508   match(Set dst (URShiftVS src shift));
14509   ins_cost(INSN_COST);
14510   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
14511   ins_encode %{
14512     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
14513             as_FloatRegister($src$$reg),
14514             as_FloatRegister($shift$$reg));
14515   %}
14516   ins_pipe(pipe_class_default);
14517 %}
14518 
14519 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
14520   predicate(n->as_Vector()->length() == 8);
14521   match(Set dst (URShiftVS src shift));
14522   ins_cost(INSN_COST);
14523   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
14524   ins_encode %{
14525     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
14526             as_FloatRegister($src$$reg),
14527             as_FloatRegister($shift$$reg));
14528   %}
14529   ins_pipe(pipe_class_default);
14530 %}
14531 
14532 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
14533   predicate(n->as_Vector()->length() == 2 ||
14534             n->as_Vector()->length() == 4);
14535   match(Set dst (LShiftVS src shift));
14536   ins_cost(INSN_COST);
14537   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
14538   ins_encode %{
14539     int sh = (int)$shift$$constant & 31;
14540     if (sh >= 16) {
14541       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14542              as_FloatRegister($src$$reg),
14543              as_FloatRegister($src$$reg));
14544     } else {
14545       __ shl(as_FloatRegister($dst$$reg), __ T4H,
14546              as_FloatRegister($src$$reg), sh);
14547     }
14548   %}
14549   ins_pipe(pipe_class_default);
14550 %}
14551 
14552 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
14553   predicate(n->as_Vector()->length() == 8);
14554   match(Set dst (LShiftVS src shift));
14555   ins_cost(INSN_COST);
14556   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
14557   ins_encode %{
14558     int sh = (int)$shift$$constant & 31;
14559     if (sh >= 16) {
14560       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14561              as_FloatRegister($src$$reg),
14562              as_FloatRegister($src$$reg));
14563     } else {
14564       __ shl(as_FloatRegister($dst$$reg), __ T8H,
14565              as_FloatRegister($src$$reg), sh);
14566     }
14567   %}
14568   ins_pipe(pipe_class_default);
14569 %}
14570 
14571 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
14572   predicate(n->as_Vector()->length() == 2 ||
14573             n->as_Vector()->length() == 4);
14574   match(Set dst (RShiftVS src shift));
14575   ins_cost(INSN_COST);
14576   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
14577   ins_encode %{
14578     int sh = (int)$shift$$constant & 31;
14579     if (sh >= 16) sh = 15;
14580     sh = -sh & 15;
14581     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
14582            as_FloatRegister($src$$reg), sh);
14583   %}
14584   ins_pipe(pipe_class_default);
14585 %}
14586 
14587 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
14588   predicate(n->as_Vector()->length() == 8);
14589   match(Set dst (RShiftVS src shift));
14590   ins_cost(INSN_COST);
14591   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
14592   ins_encode %{
14593     int sh = (int)$shift$$constant & 31;
14594     if (sh >= 16) sh = 15;
14595     sh = -sh & 15;
14596     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
14597            as_FloatRegister($src$$reg), sh);
14598   %}
14599   ins_pipe(pipe_class_default);
14600 %}
14601 
14602 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
14603   predicate(n->as_Vector()->length() == 2 ||
14604             n->as_Vector()->length() == 4);
14605   match(Set dst (URShiftVS src shift));
14606   ins_cost(INSN_COST);
14607   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
14608   ins_encode %{
14609     int sh = (int)$shift$$constant & 31;
14610     if (sh >= 16) {
14611       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14612              as_FloatRegister($src$$reg),
14613              as_FloatRegister($src$$reg));
14614     } else {
14615       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
14616              as_FloatRegister($src$$reg), -sh & 15);
14617     }
14618   %}
14619   ins_pipe(pipe_class_default);
14620 %}
14621 
14622 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
14623   predicate(n->as_Vector()->length() == 8);
14624   match(Set dst (URShiftVS src shift));
14625   ins_cost(INSN_COST);
14626   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
14627   ins_encode %{
14628     int sh = (int)$shift$$constant & 31;
14629     if (sh >= 16) {
14630       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14631              as_FloatRegister($src$$reg),
14632              as_FloatRegister($src$$reg));
14633     } else {
14634       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
14635              as_FloatRegister($src$$reg), -sh & 15);
14636     }
14637   %}
14638   ins_pipe(pipe_class_default);
14639 %}
14640 
14641 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
14642   predicate(n->as_Vector()->length() == 2);
14643   match(Set dst (LShiftVI src shift));
14644   match(Set dst (RShiftVI src shift));
14645   ins_cost(INSN_COST);
14646   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
14647   ins_encode %{
14648     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
14649             as_FloatRegister($src$$reg),
14650             as_FloatRegister($shift$$reg));
14651   %}
14652   ins_pipe(pipe_class_default);
14653 %}
14654 
14655 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
14656   predicate(n->as_Vector()->length() == 4);
14657   match(Set dst (LShiftVI src shift));
14658   match(Set dst (RShiftVI src shift));
14659   ins_cost(INSN_COST);
14660   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
14661   ins_encode %{
14662     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
14663             as_FloatRegister($src$$reg),
14664             as_FloatRegister($shift$$reg));
14665   %}
14666   ins_pipe(pipe_class_default);
14667 %}
14668 
14669 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
14670   predicate(n->as_Vector()->length() == 2);
14671   match(Set dst (URShiftVI src shift));
14672   ins_cost(INSN_COST);
14673   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
14674   ins_encode %{
14675     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
14676             as_FloatRegister($src$$reg),
14677             as_FloatRegister($shift$$reg));
14678   %}
14679   ins_pipe(pipe_class_default);
14680 %}
14681 
14682 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
14683   predicate(n->as_Vector()->length() == 4);
14684   match(Set dst (URShiftVI src shift));
14685   ins_cost(INSN_COST);
14686   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
14687   ins_encode %{
14688     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
14689             as_FloatRegister($src$$reg),
14690             as_FloatRegister($shift$$reg));
14691   %}
14692   ins_pipe(pipe_class_default);
14693 %}
14694 
14695 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
14696   predicate(n->as_Vector()->length() == 2);
14697   match(Set dst (LShiftVI src shift));
14698   ins_cost(INSN_COST);
14699   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
14700   ins_encode %{
14701     __ shl(as_FloatRegister($dst$$reg), __ T2S,
14702            as_FloatRegister($src$$reg),
14703            (int)$shift$$constant & 31);
14704   %}
14705   ins_pipe(pipe_class_default);
14706 %}
14707 
14708 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
14709   predicate(n->as_Vector()->length() == 4);
14710   match(Set dst (LShiftVI src shift));
14711   ins_cost(INSN_COST);
14712   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
14713   ins_encode %{
14714     __ shl(as_FloatRegister($dst$$reg), __ T4S,
14715            as_FloatRegister($src$$reg),
14716            (int)$shift$$constant & 31);
14717   %}
14718   ins_pipe(pipe_class_default);
14719 %}
14720 
14721 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
14722   predicate(n->as_Vector()->length() == 2);
14723   match(Set dst (RShiftVI src shift));
14724   ins_cost(INSN_COST);
14725   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
14726   ins_encode %{
14727     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
14728             as_FloatRegister($src$$reg),
14729             -(int)$shift$$constant & 31);
14730   %}
14731   ins_pipe(pipe_class_default);
14732 %}
14733 
14734 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
14735   predicate(n->as_Vector()->length() == 4);
14736   match(Set dst (RShiftVI src shift));
14737   ins_cost(INSN_COST);
14738   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
14739   ins_encode %{
14740     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
14741             as_FloatRegister($src$$reg),
14742             -(int)$shift$$constant & 31);
14743   %}
14744   ins_pipe(pipe_class_default);
14745 %}
14746 
14747 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
14748   predicate(n->as_Vector()->length() == 2);
14749   match(Set dst (URShiftVI src shift));
14750   ins_cost(INSN_COST);
14751   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
14752   ins_encode %{
14753     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
14754             as_FloatRegister($src$$reg),
14755             -(int)$shift$$constant & 31);
14756   %}
14757   ins_pipe(pipe_class_default);
14758 %}
14759 
14760 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
14761   predicate(n->as_Vector()->length() == 4);
14762   match(Set dst (URShiftVI src shift));
14763   ins_cost(INSN_COST);
14764   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
14765   ins_encode %{
14766     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
14767             as_FloatRegister($src$$reg),
14768             -(int)$shift$$constant & 31);
14769   %}
14770   ins_pipe(pipe_class_default);
14771 %}
14772 
14773 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
14774   predicate(n->as_Vector()->length() == 2);
14775   match(Set dst (LShiftVL src shift));
14776   match(Set dst (RShiftVL src shift));
14777   ins_cost(INSN_COST);
14778   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
14779   ins_encode %{
14780     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
14781             as_FloatRegister($src$$reg),
14782             as_FloatRegister($shift$$reg));
14783   %}
14784   ins_pipe(pipe_class_default);
14785 %}
14786 
14787 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
14788   predicate(n->as_Vector()->length() == 2);
14789   match(Set dst (URShiftVL src shift));
14790   ins_cost(INSN_COST);
14791   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
14792   ins_encode %{
14793     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
14794             as_FloatRegister($src$$reg),
14795             as_FloatRegister($shift$$reg));
14796   %}
14797   ins_pipe(pipe_class_default);
14798 %}
14799 
14800 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
14801   predicate(n->as_Vector()->length() == 2);
14802   match(Set dst (LShiftVL src shift));
14803   ins_cost(INSN_COST);
14804   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
14805   ins_encode %{
14806     __ shl(as_FloatRegister($dst$$reg), __ T2D,
14807            as_FloatRegister($src$$reg),
14808            (int)$shift$$constant & 63);
14809   %}
14810   ins_pipe(pipe_class_default);
14811 %}
14812 
14813 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
14814   predicate(n->as_Vector()->length() == 2);
14815   match(Set dst (RShiftVL src shift));
14816   ins_cost(INSN_COST);
14817   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
14818   ins_encode %{
14819     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
14820             as_FloatRegister($src$$reg),
14821             -(int)$shift$$constant & 63);
14822   %}
14823   ins_pipe(pipe_class_default);
14824 %}
14825 
14826 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
14827   predicate(n->as_Vector()->length() == 2);
14828   match(Set dst (URShiftVL src shift));
14829   ins_cost(INSN_COST);
14830   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
14831   ins_encode %{
14832     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
14833             as_FloatRegister($src$$reg),
14834             -(int)$shift$$constant & 63);
14835   %}
14836   ins_pipe(pipe_class_default);
14837 %}
14838 
14839 //----------PEEPHOLE RULES-----------------------------------------------------
14840 // These must follow all instruction definitions as they use the names
14841 // defined in the instructions definitions.
14842 //
14843 // peepmatch ( root_instr_name [preceding_instruction]* );
14844 //
14845 // peepconstraint %{
14846 // (instruction_number.operand_name relational_op instruction_number.operand_name
14847 //  [, ...] );


< prev index next >