< prev index next >

src/cpu/aarch64/vm/aarch64.ad

Print this page
rev 8393 : 8079565: aarch64: Add vectorization support for aarch64
Summary: Add vectorization support
Reviewed-by: duke


 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()         );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next() );



 166   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()         );
 167   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next() );



 168   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()         );
 169   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next() );



 170   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()         );
 171   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next() );



 172   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()         );
 173   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next() );



 174   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()         );
 175   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next() );



 176   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()         );
 177   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next() );



 178   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()         );
 179   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next() );
 180   reg_def V8   ( SOC, SOE, Op_RegF,  8, v8->as_VMReg()         );
 181   reg_def V8_H ( SOC, SOE, Op_RegF,  8, v8->as_VMReg()->next() );
 182   reg_def V9   ( SOC, SOE, Op_RegF,  9, v9->as_VMReg()         );
 183   reg_def V9_H ( SOC, SOE, Op_RegF,  9, v9->as_VMReg()->next() );
 184   reg_def V10  ( SOC, SOE, Op_RegF, 10, v10->as_VMReg()        );
 185   reg_def V10_H( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next());
 186   reg_def V11  ( SOC, SOE, Op_RegF, 11, v11->as_VMReg()        );
 187   reg_def V11_H( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next());
 188   reg_def V12  ( SOC, SOE, Op_RegF, 12, v12->as_VMReg()        );
 189   reg_def V12_H( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next());
 190   reg_def V13  ( SOC, SOE, Op_RegF, 13, v13->as_VMReg()        );
 191   reg_def V13_H( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next());
 192   reg_def V14  ( SOC, SOE, Op_RegF, 14, v14->as_VMReg()        );
 193   reg_def V14_H( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next());
 194   reg_def V15  ( SOC, SOE, Op_RegF, 15, v15->as_VMReg()        );
 195   reg_def V15_H( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next());



























 196   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()        );
 197   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next());



 198   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()        );
 199   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next());



 200   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()        );
 201   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next());



 202   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()        );
 203   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next());



 204   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()        );
 205   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next());



 206   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()        );
 207   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next());



 208   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()        );
 209   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next());



 210   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()        );
 211   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next());



 212   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()        );
 213   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next());



 214   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()        );
 215   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next());



 216   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()        );
 217   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next());



 218   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()        );
 219   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next());



 220   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()        );
 221   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next());



 222   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()        );
 223   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next());



 224   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()        );
 225   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next());



 226   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()        );
 227   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next());


 228 
 229 // ----------------------------
 230 // Special Registers
 231 // ----------------------------
 232 
 233 // the AArch64 CSPR status flag register is not directly acessible as
 234 // instruction operand. the FPSR status flag register is a system
 235 // register which can be written/read using MSR/MRS but again does not
 236 // appear as an operand (a code identifying the FSPR occurs as an
 237 // immediate value in the instruction).
 238 
 239 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 240 
 241 
 242 // Specify priority of register selection within phases of register
 243 // allocation.  Highest priority is first.  A useful heuristic is to
 244 // give registers a low priority when they are required by machine
 245 // instructions, like EAX and EDX on I486, and choose no-save registers
 246 // before save-on-call, & save-on-call before save-on-entry.  Registers
 247 // which participate in fixed calling sequences should come last.


 274     R20, R20_H,
 275     R21, R21_H,
 276     R22, R22_H,
 277     R23, R23_H,
 278     R24, R24_H,
 279     R25, R25_H,
 280     R26, R26_H,
 281 
 282     // non-allocatable registers
 283 
 284     R27, R27_H, // heapbase
 285     R28, R28_H, // thread
 286     R29, R29_H, // fp
 287     R30, R30_H, // lr
 288     R31, R31_H, // sp
 289 );
 290 
 291 alloc_class chunk1(
 292 
 293     // no save
 294     V16, V16_H,
 295     V17, V17_H,
 296     V18, V18_H,
 297     V19, V19_H,
 298     V20, V20_H,
 299     V21, V21_H,
 300     V22, V22_H,
 301     V23, V23_H,
 302     V24, V24_H,
 303     V25, V25_H,
 304     V26, V26_H,
 305     V27, V27_H,
 306     V28, V28_H,
 307     V29, V29_H,
 308     V30, V30_H,
 309     V31, V31_H,
 310 
 311     // arg registers
 312     V0, V0_H,
 313     V1, V1_H,
 314     V2, V2_H,
 315     V3, V3_H,
 316     V4, V4_H,
 317     V5, V5_H,
 318     V6, V6_H,
 319     V7, V7_H,
 320 
 321     // non-volatiles
 322     V8, V8_H,
 323     V9, V9_H,
 324     V10, V10_H,
 325     V11, V11_H,
 326     V12, V12_H,
 327     V13, V13_H,
 328     V14, V14_H,
 329     V15, V15_H,
 330 );
 331 
 332 alloc_class chunk2(RFLAGS);
 333 
 334 //----------Architecture Description Register Classes--------------------------
 335 // Several register classes are automatically defined based upon information in
 336 // this architecture description.
 337 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 338 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 339 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 340 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 341 //
 342 
 343 // Class for all 32 bit integer registers -- excludes SP which will
 344 // never be used as an integer register
 345 reg_class any_reg32(
 346     R0,
 347     R1,
 348     R2,
 349     R3,


 753     V14, V14_H,
 754     V15, V15_H,
 755     V16, V16_H,
 756     V17, V17_H,
 757     V18, V18_H,
 758     V19, V19_H,
 759     V20, V20_H,
 760     V21, V21_H,
 761     V22, V22_H,
 762     V23, V23_H,
 763     V24, V24_H,
 764     V25, V25_H,
 765     V26, V26_H,
 766     V27, V27_H,
 767     V28, V28_H,
 768     V29, V29_H,
 769     V30, V30_H,
 770     V31, V31_H
 771 );
 772 




































 773 // Class for 128 bit register v0
 774 reg_class v0_reg(
 775     V0, V0_H
 776 );
 777 
 778 // Class for 128 bit register v1
 779 reg_class v1_reg(
 780     V1, V1_H
 781 );
 782 
 783 // Class for 128 bit register v2
 784 reg_class v2_reg(
 785     V2, V2_H
 786 );
 787 
 788 // Class for 128 bit register v3
 789 reg_class v3_reg(
 790     V3, V3_H
 791 );
 792 


1947 //=============================================================================
1948 
1949 // Figure out which register class each belongs in: rc_int, rc_float or
1950 // rc_stack.
1951 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1952 
1953 static enum RC rc_class(OptoReg::Name reg) {
1954 
1955   if (reg == OptoReg::Bad) {
1956     return rc_bad;
1957   }
1958 
1959   // we have 30 int registers * 2 halves
1960   // (rscratch1 and rscratch2 are omitted)
1961 
1962   if (reg < 60) {
1963     return rc_int;
1964   }
1965 
1966   // we have 32 float register * 2 halves
1967   if (reg < 60 + 64) {
1968     return rc_float;
1969   }
1970 
1971   // Between float regs & stack is the flags regs.
1972   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1973 
1974   return rc_stack;
1975 }
1976 
1977 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1978   Compile* C = ra_->C;
1979 
1980   // Get registers to move.
1981   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1982   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1983   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1984   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1985 
1986   enum RC src_hi_rc = rc_class(src_hi);
1987   enum RC src_lo_rc = rc_class(src_lo);
1988   enum RC dst_hi_rc = rc_class(dst_hi);
1989   enum RC dst_lo_rc = rc_class(dst_lo);
1990 
1991   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1992 
1993   if (src_hi != OptoReg::Bad) {
1994     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1995            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1996            "expected aligned-adjacent pairs");
1997   }
1998 
1999   if (src_lo == dst_lo && src_hi == dst_hi) {
2000     return 0;            // Self copy, no move.
2001   }
2002 








































































2003   switch (src_lo_rc) {
2004   case rc_int:
2005     if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
2006       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2007           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2008           // 64 bit
2009         if (cbuf) {
2010           MacroAssembler _masm(cbuf);
2011           __ mov(as_Register(Matcher::_regEncode[dst_lo]),
2012                  as_Register(Matcher::_regEncode[src_lo]));
2013         } else if (st) {
2014           st->print("mov  %s, %s\t# shuffle",
2015                     Matcher::regName[dst_lo],
2016                     Matcher::regName[src_lo]);
2017         }
2018       } else {
2019         // 32 bit
2020         if (cbuf) {
2021           MacroAssembler _masm(cbuf);
2022           __ movw(as_Register(Matcher::_regEncode[dst_lo]),


2405 
2406 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
2407 {
2408   Unimplemented();
2409   return false;
2410 }
2411 
2412 const bool Matcher::isSimpleConstant64(jlong value) {
2413   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2414   // Probably always true, even if a temp register is required.
2415   return true;
2416 }
2417 
2418 // true just means we have fast l2f conversion
2419 const bool Matcher::convL2FSupported(void) {
2420   return true;
2421 }
2422 
2423 // Vector width in bytes.
2424 const int Matcher::vector_width_in_bytes(BasicType bt) {
2425   // TODO fixme
2426   return 0;




2427 }
2428 
2429 // Limits on vector size (number of elements) loaded into vector.
2430 const int Matcher::max_vector_size(const BasicType bt) {
2431   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2432 }
2433 const int Matcher::min_vector_size(const BasicType bt) {
2434   int max_size = max_vector_size(bt);
2435   // Min size which can be loaded into vector is 4 bytes.
2436   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
2437   return MIN2(size,max_size);
2438 }
2439 
2440 // Vector ideal reg.
2441 const int Matcher::vector_ideal_reg(int len) {
2442   // TODO fixme
2443   return Op_RegD;
2444 }
2445 
2446 // Only lowest bits of xmm reg are used for vector shift count.
2447 const int Matcher::vector_shift_count_ideal_reg(int size) {
2448   // TODO fixme
2449   return Op_RegL;
2450 }
2451 
2452 // AES support not yet implemented
2453 const bool Matcher::pass_original_key_for_aes() {
2454   return false;
2455 }
2456 
2457 // x86 supports misaligned vectors store/load.
2458 const bool Matcher::misaligned_vectors_ok() {
2459   // TODO fixme
2460   // return !AlignVector; // can be changed by flag
2461   return false;
2462 }
2463 
2464 // false => size gets scaled to BytesPerLong, ok.
2465 const bool Matcher::init_array_count_is_in_bytes = false;
2466 
2467 // Threshold size for cleararray.
2468 const int Matcher::init_array_short_size = 18 * BytesPerLong;
2469 


2640   case T_FLOAT:
2641     rtype = MacroAssembler::ret_type_float;
2642     break;
2643   case T_DOUBLE:
2644     rtype = MacroAssembler::ret_type_double;
2645     break;
2646   }
2647 }
2648 
2649 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2650   MacroAssembler _masm(&cbuf);                                          \
2651   {                                                                     \
2652     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2653     guarantee(DISP == 0, "mode not permitted for volatile");            \
2654     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2655     __ INSN(REG, as_Register(BASE));                                    \
2656   }
2657 
2658 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2659 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);


2660 
2661   // Used for all non-volatile memory accesses.  The use of
2662   // $mem->opcode() to discover whether this pattern uses sign-extended
2663   // offsets is something of a kludge.
2664   static void loadStore(MacroAssembler masm, mem_insn insn,
2665                          Register reg, int opcode,
2666                          Register base, int index, int size, int disp)
2667   {
2668     Address::extend scale;
2669 
2670     // Hooboy, this is fugly.  We need a way to communicate to the
2671     // encoder that the index needs to be sign extended, so we have to
2672     // enumerate all the cases.
2673     switch (opcode) {
2674     case INDINDEXSCALEDOFFSETI2L:
2675     case INDINDEXSCALEDI2L:
2676     case INDINDEXSCALEDOFFSETI2LN:
2677     case INDINDEXSCALEDI2LN:
2678     case INDINDEXOFFSETI2L:
2679     case INDINDEXOFFSETI2LN:


2707     case INDINDEXSCALEDOFFSETI2LN:
2708     case INDINDEXSCALEDI2LN:
2709       scale = Address::sxtw(size);
2710       break;
2711     default:
2712       scale = Address::lsl(size);
2713     }
2714 
2715      if (index == -1) {
2716       (masm.*insn)(reg, Address(base, disp));
2717     } else {
2718       if (disp == 0) {
2719         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2720       } else {
2721         masm.lea(rscratch1, Address(base, disp));
2722         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2723       }
2724     }
2725   }
2726 












2727 %}
2728 
2729 
2730 
2731 //----------ENCODING BLOCK-----------------------------------------------------
2732 // This block specifies the encoding classes used by the compiler to
2733 // output byte streams.  Encoding classes are parameterized macros
2734 // used by Machine Instruction Nodes in order to generate the bit
2735 // encoding of the instruction.  Operands specify their base encoding
2736 // interface with the interface keyword.  There are currently
2737 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2738 // COND_INTER.  REG_INTER causes an operand to generate a function
2739 // which returns its register number when queried.  CONST_INTER causes
2740 // an operand to generate a function which returns the value of the
2741 // constant when queried.  MEMORY_INTER causes an operand to generate
2742 // four functions which return the Base Register, the Index Register,
2743 // the Scale Value, and the Offset Value of the operand when queried.
2744 // COND_INTER causes an operand to generate six functions which return
2745 // the encoding code (ie - encoding bits for the instruction)
2746 // associated with each basic boolean condition for a conditional


2838   %}
2839 
2840   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2841     Register dst_reg = as_Register($dst$$reg);
2842     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2843                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2844   %}
2845 
2846   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2847     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2848     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2849                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2850   %}
2851 
2852   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2853     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2854     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2855                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2856   %}
2857 


















2858   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2859     Register src_reg = as_Register($src$$reg);
2860     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2861                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2862   %}
2863 
2864   enc_class aarch64_enc_strb0(memory mem) %{
2865     MacroAssembler _masm(&cbuf);
2866     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2867                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2868   %}
2869 
2870   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2871     Register src_reg = as_Register($src$$reg);
2872     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2873                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2874   %}
2875 
2876   enc_class aarch64_enc_strh0(memory mem) %{
2877     MacroAssembler _masm(&cbuf);


2906   %}
2907 
2908   enc_class aarch64_enc_str0(memory mem) %{
2909     MacroAssembler _masm(&cbuf);
2910     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2911                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2912   %}
2913 
2914   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2915     FloatRegister src_reg = as_FloatRegister($src$$reg);
2916     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2917                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2918   %}
2919 
2920   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2921     FloatRegister src_reg = as_FloatRegister($src$$reg);
2922     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2923                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2924   %}
2925 


















2926   // END Non-volatile memory access
2927 
2928   // volatile loads and stores
2929 
2930   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2931     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2932                  rscratch1, stlrb);
2933   %}
2934 
2935   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2936     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2937                  rscratch1, stlrh);
2938   %}
2939 
2940   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2941     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2942                  rscratch1, stlrw);
2943   %}
2944 
2945 


4916   constraint(ALLOC_IN_RC(float_reg));
4917   match(RegF);
4918 
4919   op_cost(0);
4920   format %{ %}
4921   interface(REG_INTER);
4922 %}
4923 
4924 // Double Register
4925 // Double register operands
4926 operand vRegD()
4927 %{
4928   constraint(ALLOC_IN_RC(double_reg));
4929   match(RegD);
4930 
4931   op_cost(0);
4932   format %{ %}
4933   interface(REG_INTER);
4934 %}
4935 










4936 operand vRegD_V0()
4937 %{
4938   constraint(ALLOC_IN_RC(v0_reg));
4939   match(RegD);
4940   op_cost(0);
4941   format %{ %}
4942   interface(REG_INTER);
4943 %}
4944 
4945 operand vRegD_V1()
4946 %{
4947   constraint(ALLOC_IN_RC(v1_reg));
4948   match(RegD);
4949   op_cost(0);
4950   format %{ %}
4951   interface(REG_INTER);
4952 %}
4953 
4954 operand vRegD_V2()
4955 %{


5488     less_equal(0x9, "ls");
5489     greater(0x8, "hi");
5490     overflow(0x6, "vs");
5491     no_overflow(0x7, "vc");
5492   %}
5493 %}
5494 
5495 // Special operand allowing long args to int ops to be truncated for free
5496 
5497 operand iRegL2I(iRegL reg) %{
5498 
5499   op_cost(0);
5500 
5501   match(ConvL2I reg);
5502 
5503   format %{ "l2i($reg)" %}
5504 
5505   interface(REG_INTER)
5506 %}
5507 

5508 
5509 //----------OPERAND CLASSES----------------------------------------------------
5510 // Operand Classes are groups of operands that are used as to simplify
5511 // instruction definitions by not requiring the AD writer to specify
5512 // separate instructions for every form of operand when the
5513 // instruction accepts multiple operand types with the same basic
5514 // encoding and format. The classic case of this is memory operands.
5515 
5516 // memory is used to define read/write location for load/store
5517 // instruction defs. we can turn a memory op into an Address
5518 
5519 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5520                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5521 
5522 
5523 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5524 // operations. it allows the src to be either an iRegI or a (ConvL2I
5525 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5526 // can be elided because the 32-bit instruction will just employ the
5527 // lower 32 bits anyway.


12909 
12910 // ============================================================================
12911 // This name is KNOWN by the ADLC and cannot be changed.
12912 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12913 // for this guy.
12914 instruct tlsLoadP(thread_RegP dst)
12915 %{
12916   match(Set dst (ThreadLocal));
12917 
12918   ins_cost(0);
12919 
12920   format %{ " -- \t// $dst=Thread::current(), empty" %}
12921 
12922   size(0);
12923 
12924   ins_encode( /*empty*/ );
12925 
12926   ins_pipe(pipe_class_empty);
12927 %}
12928 


































































































































































































































































12929 














































































































































































































































































































































































































































































































































































































































































12930 
12931 //----------PEEPHOLE RULES-----------------------------------------------------
12932 // These must follow all instruction definitions as they use the names
12933 // defined in the instructions definitions.
12934 //
12935 // peepmatch ( root_instr_name [preceding_instruction]* );
12936 //
12937 // peepconstraint %{
12938 // (instruction_number.operand_name relational_op instruction_number.operand_name
12939 //  [, ...] );
12940 // // instruction numbers are zero-based using left to right order in peepmatch
12941 //
12942 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12943 // // provide an instruction_number.operand_name for each operand that appears
12944 // // in the replacement instruction's match rule
12945 //
12946 // ---------VM FLAGS---------------------------------------------------------
12947 //
12948 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12949 //




 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.


 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,


 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 128bit vector registers
 869 reg_class vectorx_reg(
 870     V0, V0_H, V0_J, V0_K,
 871     V1, V1_H, V1_J, V1_K,
 872     V2, V2_H, V2_J, V2_K,
 873     V3, V3_H, V3_J, V3_K,
 874     V4, V4_H, V4_J, V4_K,
 875     V5, V5_H, V5_J, V5_K,
 876     V6, V6_H, V6_J, V6_K,
 877     V7, V7_H, V7_J, V7_K,
 878     V8, V8_H, V8_J, V8_K,
 879     V9, V9_H, V9_J, V9_K,
 880     V10, V10_H, V10_J, V10_K,
 881     V11, V11_H, V11_J, V11_K,
 882     V12, V12_H, V12_J, V12_K,
 883     V13, V13_H, V13_J, V13_K,
 884     V14, V14_H, V14_J, V14_K,
 885     V15, V15_H, V15_J, V15_K,
 886     V16, V16_H, V16_J, V16_K,
 887     V17, V17_H, V17_J, V17_K,
 888     V18, V18_H, V18_J, V18_K,
 889     V19, V19_H, V19_J, V19_K,
 890     V20, V20_H, V20_J, V20_K,
 891     V21, V21_H, V21_J, V21_K,
 892     V22, V22_H, V22_J, V22_K,
 893     V23, V23_H, V23_J, V23_K,
 894     V24, V24_H, V24_J, V24_K,
 895     V25, V25_H, V25_J, V25_K,
 896     V26, V26_H, V26_J, V26_K,
 897     V27, V27_H, V27_J, V27_K,
 898     V28, V28_H, V28_J, V28_K,
 899     V29, V29_H, V29_J, V29_K,
 900     V30, V30_H, V30_J, V30_K,
 901     V31, V31_H, V31_J, V31_K
 902 );
 903 
 904 // Class for 128 bit register v0
 905 reg_class v0_reg(
 906     V0, V0_H
 907 );
 908 
 909 // Class for 128 bit register v1
 910 reg_class v1_reg(
 911     V1, V1_H
 912 );
 913 
 914 // Class for 128 bit register v2
 915 reg_class v2_reg(
 916     V2, V2_H
 917 );
 918 
 919 // Class for 128 bit register v3
 920 reg_class v3_reg(
 921     V3, V3_H
 922 );
 923 


2078 //=============================================================================
2079 
2080 // Figure out which register class each belongs in: rc_int, rc_float or
2081 // rc_stack.
2082 enum RC { rc_bad, rc_int, rc_float, rc_stack };
2083 
2084 static enum RC rc_class(OptoReg::Name reg) {
2085 
2086   if (reg == OptoReg::Bad) {
2087     return rc_bad;
2088   }
2089 
2090   // we have 30 int registers * 2 halves
2091   // (rscratch1 and rscratch2 are omitted)
2092 
2093   if (reg < 60) {
2094     return rc_int;
2095   }
2096 
2097   // we have 32 float register * 2 halves
2098   if (reg < 60 + 128) {
2099     return rc_float;
2100   }
2101 
2102   // Between float regs & stack is the flags regs.
2103   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
2104 
2105   return rc_stack;
2106 }
2107 
2108 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
2109   Compile* C = ra_->C;
2110 
2111   // Get registers to move.
2112   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
2113   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
2114   OptoReg::Name dst_hi = ra_->get_reg_second(this);
2115   OptoReg::Name dst_lo = ra_->get_reg_first(this);
2116 
2117   enum RC src_hi_rc = rc_class(src_hi);
2118   enum RC src_lo_rc = rc_class(src_lo);
2119   enum RC dst_hi_rc = rc_class(dst_hi);
2120   enum RC dst_lo_rc = rc_class(dst_lo);
2121 
2122   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
2123 
2124   if (src_hi != OptoReg::Bad) {
2125     assert((src_lo&1)==0 && src_lo+1==src_hi &&
2126            (dst_lo&1)==0 && dst_lo+1==dst_hi,
2127            "expected aligned-adjacent pairs");
2128   }
2129 
2130   if (src_lo == dst_lo && src_hi == dst_hi) {
2131     return 0;            // Self copy, no move.
2132   }
2133 
2134   if (bottom_type()->isa_vect() != NULL) {
2135     uint len = 4;
2136     if (cbuf) {
2137       MacroAssembler _masm(cbuf);
2138       uint ireg = ideal_reg();
2139       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
2140       assert(ireg == Op_VecX, "sanity");
2141       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2142         // stack->stack
2143         int src_offset = ra_->reg2offset(src_lo);
2144         int dst_offset = ra_->reg2offset(dst_lo);
2145         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
2146         len = 8;
2147         if (src_offset < 512) {
2148           __ ldp(rscratch1, rscratch2, Address(sp, src_offset));
2149         } else {
2150           __ ldr(rscratch1, Address(sp, src_offset));
2151           __ ldr(rscratch2, Address(sp, src_offset+4));
2152           len += 4;
2153         }
2154         if (dst_offset < 512) {
2155           __ stp(rscratch1, rscratch2, Address(sp, dst_offset));
2156         } else {
2157           __ str(rscratch1, Address(sp, dst_offset));
2158           __ str(rscratch2, Address(sp, dst_offset+4));
2159           len += 4;
2160         }
2161       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2162         __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ T16B,
2163                as_FloatRegister(Matcher::_regEncode[src_lo]),
2164                as_FloatRegister(Matcher::_regEncode[src_lo]));
2165       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2166         __ str(as_FloatRegister(Matcher::_regEncode[src_lo]), __ Q,
2167                Address(sp, ra_->reg2offset(dst_lo)));
2168       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2169         __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ Q,
2170                Address(sp, ra_->reg2offset(src_lo)));
2171       } else {
2172         ShouldNotReachHere();
2173       }
2174     } else if (st) {
2175       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2176         // stack->stack
2177         int src_offset = ra_->reg2offset(src_lo);
2178         int dst_offset = ra_->reg2offset(dst_lo);
2179         if (src_offset < 512) {
2180           st->print("ldp  rscratch1, rscratch2, [sp, #%d]", src_offset);
2181         } else {
2182           st->print("ldr  rscratch1, [sp, #%d]", src_offset);
2183           st->print("\nldr  rscratch2, [sp, #%d]", src_offset+4);
2184         }
2185         if (dst_offset < 512) {
2186           st->print("\nstp  rscratch1, rscratch2, [sp, #%d]", dst_offset);
2187         } else {
2188           st->print("\nstr  rscratch1, [sp, #%d]", dst_offset);
2189           st->print("\nstr  rscratch2, [sp, #%d]", dst_offset+4);
2190         }
2191         st->print("\t# vector spill, stack to stack");
2192       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2193         st->print("mov  %s, %s\t# vector spill, reg to reg",
2194                    Matcher::regName[dst_lo], Matcher::regName[src_lo]);
2195       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2196         st->print("str  %s, [sp, #%d]\t# vector spill, reg to stack",
2197                    Matcher::regName[src_lo], ra_->reg2offset(dst_lo));
2198       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2199         st->print("ldr  %s, [sp, #%d]\t# vector spill, stack to reg",
2200                    Matcher::regName[dst_lo], ra_->reg2offset(src_lo));
2201       }
2202     }
2203     return len;
2204   }
2205 
2206   switch (src_lo_rc) {
2207   case rc_int:
2208     if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
2209       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2210           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2211           // 64 bit
2212         if (cbuf) {
2213           MacroAssembler _masm(cbuf);
2214           __ mov(as_Register(Matcher::_regEncode[dst_lo]),
2215                  as_Register(Matcher::_regEncode[src_lo]));
2216         } else if (st) {
2217           st->print("mov  %s, %s\t# shuffle",
2218                     Matcher::regName[dst_lo],
2219                     Matcher::regName[src_lo]);
2220         }
2221       } else {
2222         // 32 bit
2223         if (cbuf) {
2224           MacroAssembler _masm(cbuf);
2225           __ movw(as_Register(Matcher::_regEncode[dst_lo]),


2608 
2609 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
2610 {
2611   Unimplemented();
2612   return false;
2613 }
2614 
2615 const bool Matcher::isSimpleConstant64(jlong value) {
2616   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2617   // Probably always true, even if a temp register is required.
2618   return true;
2619 }
2620 
2621 // true just means we have fast l2f conversion
2622 const bool Matcher::convL2FSupported(void) {
2623   return true;
2624 }
2625 
2626 // Vector width in bytes.
2627 const int Matcher::vector_width_in_bytes(BasicType bt) {
2628   int size = MIN2(16,(int)MaxVectorSize);
2629   // Minimum 2 values in vector
2630   if (size < 2*type2aelembytes(bt)) size = 0;
2631   // But never < 4
2632   if (size < 4) size = 0;
2633   return size;
2634 }
2635 
2636 // Limits on vector size (number of elements) loaded into vector.
2637 const int Matcher::max_vector_size(const BasicType bt) {
2638   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2639 }
2640 const int Matcher::min_vector_size(const BasicType bt) {
2641   //return (type2aelembytes(bt) == 1) ? 4 : 2;
2642   // For the moment, only support 1 vector size, 128 bits
2643   return max_vector_size(bt);

2644 }
2645 
2646 // Vector ideal reg.
2647 const int Matcher::vector_ideal_reg(int len) {
2648   return Op_VecX;

2649 }
2650 
2651 // Only lowest bits of xmm reg are used for vector shift count.
2652 const int Matcher::vector_shift_count_ideal_reg(int size) {
2653   return Op_VecX;

2654 }
2655 
2656 // AES support not yet implemented
2657 const bool Matcher::pass_original_key_for_aes() {
2658   return false;
2659 }
2660 
2661 // x86 supports misaligned vectors store/load.
2662 const bool Matcher::misaligned_vectors_ok() {
2663   // TODO fixme
2664   // return !AlignVector; // can be changed by flag
2665   return false;
2666 }
2667 
2668 // false => size gets scaled to BytesPerLong, ok.
2669 const bool Matcher::init_array_count_is_in_bytes = false;
2670 
2671 // Threshold size for cleararray.
2672 const int Matcher::init_array_short_size = 18 * BytesPerLong;
2673 


2844   case T_FLOAT:
2845     rtype = MacroAssembler::ret_type_float;
2846     break;
2847   case T_DOUBLE:
2848     rtype = MacroAssembler::ret_type_double;
2849     break;
2850   }
2851 }
2852 
2853 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2854   MacroAssembler _masm(&cbuf);                                          \
2855   {                                                                     \
2856     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2857     guarantee(DISP == 0, "mode not permitted for volatile");            \
2858     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2859     __ INSN(REG, as_Register(BASE));                                    \
2860   }
2861 
2862 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2863 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2864 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2865                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2866 
2867   // Used for all non-volatile memory accesses.  The use of
2868   // $mem->opcode() to discover whether this pattern uses sign-extended
2869   // offsets is something of a kludge.
2870   static void loadStore(MacroAssembler masm, mem_insn insn,
2871                          Register reg, int opcode,
2872                          Register base, int index, int size, int disp)
2873   {
2874     Address::extend scale;
2875 
2876     // Hooboy, this is fugly.  We need a way to communicate to the
2877     // encoder that the index needs to be sign extended, so we have to
2878     // enumerate all the cases.
2879     switch (opcode) {
2880     case INDINDEXSCALEDOFFSETI2L:
2881     case INDINDEXSCALEDI2L:
2882     case INDINDEXSCALEDOFFSETI2LN:
2883     case INDINDEXSCALEDI2LN:
2884     case INDINDEXOFFSETI2L:
2885     case INDINDEXOFFSETI2LN:


2913     case INDINDEXSCALEDOFFSETI2LN:
2914     case INDINDEXSCALEDI2LN:
2915       scale = Address::sxtw(size);
2916       break;
2917     default:
2918       scale = Address::lsl(size);
2919     }
2920 
2921      if (index == -1) {
2922       (masm.*insn)(reg, Address(base, disp));
2923     } else {
2924       if (disp == 0) {
2925         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2926       } else {
2927         masm.lea(rscratch1, Address(base, disp));
2928         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2929       }
2930     }
2931   }
2932 
2933   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2934                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2935                          int opcode, Register base, int index, int size, int disp)
2936   {
2937     if (index == -1) {
2938       (masm.*insn)(reg, T, Address(base, disp));
2939     } else {
2940       assert(disp == 0, "unsupported address mode");
2941       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2942     }
2943   }
2944 
2945 %}
2946 
2947 
2948 
2949 //----------ENCODING BLOCK-----------------------------------------------------
2950 // This block specifies the encoding classes used by the compiler to
2951 // output byte streams.  Encoding classes are parameterized macros
2952 // used by Machine Instruction Nodes in order to generate the bit
2953 // encoding of the instruction.  Operands specify their base encoding
2954 // interface with the interface keyword.  There are currently
2955 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2956 // COND_INTER.  REG_INTER causes an operand to generate a function
2957 // which returns its register number when queried.  CONST_INTER causes
2958 // an operand to generate a function which returns the value of the
2959 // constant when queried.  MEMORY_INTER causes an operand to generate
2960 // four functions which return the Base Register, the Index Register,
2961 // the Scale Value, and the Offset Value of the operand when queried.
2962 // COND_INTER causes an operand to generate six functions which return
2963 // the encoding code (ie - encoding bits for the instruction)
2964 // associated with each basic boolean condition for a conditional


3056   %}
3057 
3058   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3059     Register dst_reg = as_Register($dst$$reg);
3060     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3061                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3062   %}
3063 
3064   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3065     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3066     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3067                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3068   %}
3069 
3070   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3071     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3072     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3073                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3074   %}
3075 
3076   enc_class aarch64_enc_ldrvS(vecX dst, memory mem) %{
3077     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3078     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3079        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3080   %}
3081 
3082   enc_class aarch64_enc_ldrvD(vecX dst, memory mem) %{
3083     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3084     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3085        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3086   %}
3087 
3088   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3089     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3090     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3091        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3092   %}
3093 
3094   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3095     Register src_reg = as_Register($src$$reg);
3096     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3097                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3098   %}
3099 
3100   enc_class aarch64_enc_strb0(memory mem) %{
3101     MacroAssembler _masm(&cbuf);
3102     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3103                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3104   %}
3105 
3106   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3107     Register src_reg = as_Register($src$$reg);
3108     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
3109                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3110   %}
3111 
3112   enc_class aarch64_enc_strh0(memory mem) %{
3113     MacroAssembler _masm(&cbuf);


3142   %}
3143 
3144   enc_class aarch64_enc_str0(memory mem) %{
3145     MacroAssembler _masm(&cbuf);
3146     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
3147                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3148   %}
3149 
3150   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
3151     FloatRegister src_reg = as_FloatRegister($src$$reg);
3152     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
3153                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3154   %}
3155 
3156   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
3157     FloatRegister src_reg = as_FloatRegister($src$$reg);
3158     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
3159                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3160   %}
3161 
3162   enc_class aarch64_enc_strvS(vecX src, memory mem) %{
3163     FloatRegister src_reg = as_FloatRegister($src$$reg);
3164     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
3165        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3166   %}
3167 
3168   enc_class aarch64_enc_strvD(vecX src, memory mem) %{
3169     FloatRegister src_reg = as_FloatRegister($src$$reg);
3170     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
3171        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3172   %}
3173 
3174   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
3175     FloatRegister src_reg = as_FloatRegister($src$$reg);
3176     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
3177        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3178   %}
3179 
3180   // END Non-volatile memory access
3181 
3182   // volatile loads and stores
3183 
3184   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
3185     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3186                  rscratch1, stlrb);
3187   %}
3188 
3189   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
3190     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3191                  rscratch1, stlrh);
3192   %}
3193 
3194   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
3195     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3196                  rscratch1, stlrw);
3197   %}
3198 
3199 


5170   constraint(ALLOC_IN_RC(float_reg));
5171   match(RegF);
5172 
5173   op_cost(0);
5174   format %{ %}
5175   interface(REG_INTER);
5176 %}
5177 
5178 // Double Register
5179 // Double register operands
5180 operand vRegD()
5181 %{
5182   constraint(ALLOC_IN_RC(double_reg));
5183   match(RegD);
5184 
5185   op_cost(0);
5186   format %{ %}
5187   interface(REG_INTER);
5188 %}
5189 
5190 operand vecX()
5191 %{
5192   constraint(ALLOC_IN_RC(vectorx_reg));
5193   match(VecX);
5194 
5195   op_cost(0);
5196   format %{ %}
5197   interface(REG_INTER);
5198 %}
5199 
5200 operand vRegD_V0()
5201 %{
5202   constraint(ALLOC_IN_RC(v0_reg));
5203   match(RegD);
5204   op_cost(0);
5205   format %{ %}
5206   interface(REG_INTER);
5207 %}
5208 
5209 operand vRegD_V1()
5210 %{
5211   constraint(ALLOC_IN_RC(v1_reg));
5212   match(RegD);
5213   op_cost(0);
5214   format %{ %}
5215   interface(REG_INTER);
5216 %}
5217 
5218 operand vRegD_V2()
5219 %{


5752     less_equal(0x9, "ls");
5753     greater(0x8, "hi");
5754     overflow(0x6, "vs");
5755     no_overflow(0x7, "vc");
5756   %}
5757 %}
5758 
5759 // Special operand allowing long args to int ops to be truncated for free
5760 
5761 operand iRegL2I(iRegL reg) %{
5762 
5763   op_cost(0);
5764 
5765   match(ConvL2I reg);
5766 
5767   format %{ "l2i($reg)" %}
5768 
5769   interface(REG_INTER)
5770 %}
5771 
5772 opclass vmem(indirect, indIndex, indOffI, indOffL);
5773 
5774 //----------OPERAND CLASSES----------------------------------------------------
5775 // Operand Classes are groups of operands that are used as to simplify
5776 // instruction definitions by not requiring the AD writer to specify
5777 // separate instructions for every form of operand when the
5778 // instruction accepts multiple operand types with the same basic
5779 // encoding and format. The classic case of this is memory operands.
5780 
5781 // memory is used to define read/write location for load/store
5782 // instruction defs. we can turn a memory op into an Address
5783 
5784 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5785                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5786 
5787 
5788 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5789 // operations. it allows the src to be either an iRegI or a (ConvL2I
5790 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5791 // can be elided because the 32-bit instruction will just employ the
5792 // lower 32 bits anyway.


13174 
13175 // ============================================================================
13176 // This name is KNOWN by the ADLC and cannot be changed.
13177 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13178 // for this guy.
13179 instruct tlsLoadP(thread_RegP dst)
13180 %{
13181   match(Set dst (ThreadLocal));
13182 
13183   ins_cost(0);
13184 
13185   format %{ " -- \t// $dst=Thread::current(), empty" %}
13186 
13187   size(0);
13188 
13189   ins_encode( /*empty*/ );
13190 
13191   ins_pipe(pipe_class_empty);
13192 %}
13193 
13194 // ====================VECTOR INSTRUCTIONS=====================================
13195 
13196 // Load vector (32 bits)
13197 instruct loadV4(vecX dst, vmem mem)
13198 %{
13199   predicate(n->as_LoadVector()->memory_size() == 4);
13200   match(Set dst (LoadVector mem));
13201   ins_cost(4 * INSN_COST);
13202   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
13203   ins_encode( aarch64_enc_ldrvS(dst, mem) );
13204   ins_pipe(pipe_class_memory);
13205 %}
13206 
13207 // Load vector (64 bits)
13208 instruct loadV8(vecX dst, vmem mem)
13209 %{
13210   predicate(n->as_LoadVector()->memory_size() == 8);
13211   match(Set dst (LoadVector mem));
13212   ins_cost(4 * INSN_COST);
13213   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
13214   ins_encode( aarch64_enc_ldrvD(dst, mem) );
13215   ins_pipe(pipe_class_memory);
13216 %}
13217 
13218 // Load Vector (128 bits)
13219 instruct loadV16(vecX dst, vmem mem)
13220 %{
13221   predicate(n->as_LoadVector()->memory_size() == 16);
13222   match(Set dst (LoadVector mem));
13223   ins_cost(4 * INSN_COST);
13224   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
13225   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
13226   ins_pipe(pipe_class_memory);
13227 %}
13228 
13229 // Store Vector (32 bits)
13230 instruct storeV4(vecX src, vmem mem)
13231 %{
13232   predicate(n->as_StoreVector()->memory_size() == 4);
13233   match(Set mem (StoreVector mem src));
13234   ins_cost(4 * INSN_COST);
13235   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
13236   ins_encode( aarch64_enc_strvS(src, mem) );
13237   ins_pipe(pipe_class_memory);
13238 %}
13239 
13240 // Store Vector (64 bits)
13241 instruct storeV8(vecX src, vmem mem)
13242 %{
13243   predicate(n->as_StoreVector()->memory_size() == 8);
13244   match(Set mem (StoreVector mem src));
13245   ins_cost(4 * INSN_COST);
13246   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
13247   ins_encode( aarch64_enc_strvD(src, mem) );
13248   ins_pipe(pipe_class_memory);
13249 %}
13250 
13251 // Store Vector (128 bits)
13252 instruct storeV16(vecX src, vmem mem)
13253 %{
13254   predicate(n->as_StoreVector()->memory_size() == 16);
13255   match(Set mem (StoreVector mem src));
13256   ins_cost(4 * INSN_COST);
13257   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
13258   ins_encode( aarch64_enc_strvQ(src, mem) );
13259   ins_pipe(pipe_class_memory);
13260 %}
13261 
13262 instruct replicate16B(vecX dst, iRegIorL2I src)
13263 %{
13264   match(Set dst (ReplicateB src));
13265   ins_cost(INSN_COST);
13266   format %{ "dup  $dst, $src\t# vector (16B)" %}
13267   ins_encode %{
13268     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
13269   %}
13270   ins_pipe(pipe_class_default);
13271 %}
13272 
13273 instruct replicate16B_imm(vecX dst, immI con)
13274 %{
13275   match(Set dst (ReplicateB con));
13276   ins_cost(INSN_COST);
13277   format %{ "movi  $dst, $con\t# vector(16B)" %}
13278   ins_encode %{
13279     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant);
13280   %}
13281   ins_pipe(pipe_class_default);
13282 %}
13283 
13284 instruct replicate8S(vecX dst, iRegIorL2I src)
13285 %{
13286   match(Set dst (ReplicateS src));
13287   ins_cost(INSN_COST);
13288   format %{ "dup  $dst, $src\t# vector (8S)" %}
13289   ins_encode %{
13290     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
13291   %}
13292   ins_pipe(pipe_class_default);
13293 %}
13294 
13295 instruct replicate8S_imm(vecX dst, immI con)
13296 %{
13297   match(Set dst (ReplicateS con));
13298   ins_cost(INSN_COST);
13299   format %{ "movi  $dst, $con\t# vector(8H)" %}
13300   ins_encode %{
13301     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant);
13302   %}
13303   ins_pipe(pipe_class_default);
13304 %}
13305 
13306 instruct replicate4I(vecX dst, iRegIorL2I src)
13307 %{
13308   match(Set dst (ReplicateI src));
13309   ins_cost(INSN_COST);
13310   format %{ "dup  $dst, $src\t# vector (4I)" %}
13311   ins_encode %{
13312     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
13313   %}
13314   ins_pipe(pipe_class_default);
13315 %}
13316 
13317 instruct replicate4I_imm(vecX dst, immI con)
13318 %{
13319   match(Set dst (ReplicateI con));
13320   ins_cost(INSN_COST);
13321   format %{ "movi  $dst, $con\t# vector(4I)" %}
13322   ins_encode %{
13323     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
13324   %}
13325   ins_pipe(pipe_class_default);
13326 %}
13327 
13328 instruct replicate2L(vecX dst, iRegL src)
13329 %{
13330   match(Set dst (ReplicateL src));
13331   ins_cost(INSN_COST);
13332   format %{ "dup  $dst, $src\t# vector (2L)" %}
13333   ins_encode %{
13334     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
13335   %}
13336   ins_pipe(pipe_class_default);
13337 %}
13338 
13339 instruct replicate2L_zero(vecX dst, immI0 zero)
13340 %{
13341   match(Set dst (ReplicateI zero));
13342   ins_cost(INSN_COST);
13343   format %{ "movi  $dst, $zero\t# vector(4I)" %}
13344   ins_encode %{
13345     __ eor(as_FloatRegister($dst$$reg), __ T16B,
13346            as_FloatRegister($dst$$reg),
13347            as_FloatRegister($dst$$reg));
13348   %}
13349   ins_pipe(pipe_class_default);
13350 %}
13351 
13352 instruct replicate4F(vecX dst, vRegF src)
13353 %{
13354   match(Set dst (ReplicateF src));
13355   ins_cost(INSN_COST);
13356   format %{ "dup  $dst, $src\t# vector (4F)" %}
13357   ins_encode %{
13358     __ dup(as_FloatRegister($dst$$reg), __ T4S,
13359            as_FloatRegister($src$$reg));
13360   %}
13361   ins_pipe(pipe_class_default);
13362 %}
13363 
13364 instruct replicate2D(vecX dst, vRegD src)
13365 %{
13366   match(Set dst (ReplicateD src));
13367   ins_cost(INSN_COST);
13368   format %{ "dup  $dst, $src\t# vector (2D)" %}
13369   ins_encode %{
13370     __ dup(as_FloatRegister($dst$$reg), __ T2D,
13371            as_FloatRegister($src$$reg));
13372   %}
13373   ins_pipe(pipe_class_default);
13374 %}
13375 
13376 // ====================REDUCTION ARITHMETIC====================================
13377 
13378 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13379 %{
13380   match(Set dst (AddReductionVI src1 src2));
13381   ins_cost(INSN_COST);
13382   effect(TEMP tmp, TEMP tmp2);
13383   format %{ "addv  $tmp, T4S, $src2\n\t"
13384             "umov  $tmp2, $tmp, S, 0\n\t"
13385             "addw  $dst, $tmp2, $src1\t add reduction4i"
13386   %}
13387   ins_encode %{
13388     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
13389             as_FloatRegister($src2$$reg));
13390     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13391     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
13392   %}
13393   ins_pipe(pipe_class_default);
13394 %}
13395 
13396 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13397 %{
13398   match(Set dst (MulReductionVI src1 src2));
13399   ins_cost(INSN_COST);
13400   effect(TEMP tmp, TEMP tmp2, TEMP dst);
13401   format %{ "ins   $tmp, $src2, 0, 1\n\t"
13402             "mul   $tmp, $tmp, $src2\n\t"
13403             "umov  $tmp2, $tmp, S, 0\n\t"
13404             "mul   $dst, $tmp2, $src1\n\t"
13405             "umov  $tmp2, $tmp, S, 1\n\t"
13406             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
13407   %}
13408   ins_encode %{
13409     __ ins(as_FloatRegister($tmp$$reg), __ D,
13410            as_FloatRegister($src2$$reg), 0, 1);
13411     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
13412            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
13413     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13414     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
13415     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
13416     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
13417   %}
13418   ins_pipe(pipe_class_default);
13419 %}
13420 
13421 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13422 %{
13423   match(Set dst (AddReductionVF src1 src2));
13424   ins_cost(INSN_COST);
13425   effect(TEMP tmp, TEMP dst);
13426   format %{ "fadds $dst, $src1, $src2\n\t"
13427             "ins   $tmp, S, $src2, 0, 1\n\t"
13428             "fadds $dst, $dst, $tmp\n\t"
13429             "ins   $tmp, S, $src2, 0, 2\n\t"
13430             "fadds $dst, $dst, $tmp\n\t"
13431             "ins   $tmp, S, $src2, 0, 3\n\t"
13432             "fadds $dst, $dst, $tmp\t add reduction4f"
13433   %}
13434   ins_encode %{
13435     __ fadds(as_FloatRegister($dst$$reg),
13436              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13437     __ ins(as_FloatRegister($tmp$$reg), __ S,
13438            as_FloatRegister($src2$$reg), 0, 1);
13439     __ fadds(as_FloatRegister($dst$$reg),
13440              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13441     __ ins(as_FloatRegister($tmp$$reg), __ S,
13442            as_FloatRegister($src2$$reg), 0, 2);
13443     __ fadds(as_FloatRegister($dst$$reg),
13444              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13445     __ ins(as_FloatRegister($tmp$$reg), __ S,
13446            as_FloatRegister($src2$$reg), 0, 3);
13447     __ fadds(as_FloatRegister($dst$$reg),
13448              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13449   %}
13450   ins_pipe(pipe_class_default);
13451 %}
13452 
13453 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13454 %{
13455   match(Set dst (MulReductionVF src1 src2));
13456   ins_cost(INSN_COST);
13457   effect(TEMP tmp, TEMP dst);
13458   format %{ "fmuls $dst, $src1, $src2\n\t"
13459             "ins   $tmp, S, $src2, 0, 1\n\t"
13460             "fmuls $dst, $dst, $tmp\n\t"
13461             "ins   $tmp, S, $src2, 0, 2\n\t"
13462             "fmuls $dst, $dst, $tmp\n\t"
13463             "ins   $tmp, S, $src2, 0, 3\n\t"
13464             "fmuls $dst, $dst, $tmp\t add reduction4f"
13465   %}
13466   ins_encode %{
13467     __ fmuls(as_FloatRegister($dst$$reg),
13468              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13469     __ ins(as_FloatRegister($tmp$$reg), __ S,
13470            as_FloatRegister($src2$$reg), 0, 1);
13471     __ fmuls(as_FloatRegister($dst$$reg),
13472              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13473     __ ins(as_FloatRegister($tmp$$reg), __ S,
13474            as_FloatRegister($src2$$reg), 0, 2);
13475     __ fmuls(as_FloatRegister($dst$$reg),
13476              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13477     __ ins(as_FloatRegister($tmp$$reg), __ S,
13478            as_FloatRegister($src2$$reg), 0, 3);
13479     __ fmuls(as_FloatRegister($dst$$reg),
13480              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13481   %}
13482   ins_pipe(pipe_class_default);
13483 %}
13484 
13485 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
13486 %{
13487   match(Set dst (AddReductionVD src1 src2));
13488   ins_cost(INSN_COST);
13489   effect(TEMP tmp, TEMP dst);
13490   format %{ "faddd $dst, $src1, $src2\n\t"
13491             "ins   $tmp, D, $src2, 0, 1\n\t"
13492             "faddd $dst, $dst, $tmp\t add reduction2d"
13493   %}
13494   ins_encode %{
13495     __ faddd(as_FloatRegister($dst$$reg),
13496              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13497     __ ins(as_FloatRegister($tmp$$reg), __ D,
13498            as_FloatRegister($src2$$reg), 0, 1);
13499     __ faddd(as_FloatRegister($dst$$reg),
13500              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13501   %}
13502   ins_pipe(pipe_class_default);
13503 %}
13504 
13505 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
13506 %{
13507   match(Set dst (MulReductionVD src1 src2));
13508   ins_cost(INSN_COST);
13509   effect(TEMP tmp, TEMP dst);
13510   format %{ "fmuld $dst, $src1, $src2\n\t"
13511             "ins   $tmp, D, $src2, 0, 1\n\t"
13512             "fmuld $dst, $dst, $tmp\t add reduction2d"
13513   %}
13514   ins_encode %{
13515     __ fmuld(as_FloatRegister($dst$$reg),
13516              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13517     __ ins(as_FloatRegister($tmp$$reg), __ D,
13518            as_FloatRegister($src2$$reg), 0, 1);
13519     __ fmuld(as_FloatRegister($dst$$reg),
13520              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13521   %}
13522   ins_pipe(pipe_class_default);
13523 %}
13524 
13525 // ====================VECTOR ARITHMETIC=======================================
13526 
13527 // --------------------------------- ADD --------------------------------------
13528 
13529 instruct vadd16B(vecX dst, vecX src1, vecX src2)
13530 %{
13531   match(Set dst (AddVB src1 src2));
13532   ins_cost(INSN_COST);
13533   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
13534   ins_encode %{
13535     __ addv(as_FloatRegister($dst$$reg), __ T16B,
13536             as_FloatRegister($src1$$reg),
13537             as_FloatRegister($src2$$reg));
13538   %}
13539   ins_pipe(pipe_class_default);
13540 %}
13541 
13542 instruct vadd8S(vecX dst, vecX src1, vecX src2)
13543 %{
13544   match(Set dst (AddVS src1 src2));
13545   ins_cost(INSN_COST);
13546   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
13547   ins_encode %{
13548     __ addv(as_FloatRegister($dst$$reg), __ T8H,
13549             as_FloatRegister($src1$$reg),
13550             as_FloatRegister($src2$$reg));
13551   %}
13552   ins_pipe(pipe_class_default);
13553 %}
13554 
13555 instruct vadd4I(vecX dst, vecX src1, vecX src2)
13556 %{
13557   match(Set dst (AddVI src1 src2));
13558   ins_cost(INSN_COST);
13559   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
13560   ins_encode %{
13561     __ addv(as_FloatRegister($dst$$reg), __ T4S,
13562             as_FloatRegister($src1$$reg),
13563             as_FloatRegister($src2$$reg));
13564   %}
13565   ins_pipe(pipe_class_default);
13566 %}
13567 
13568 instruct vadd2L(vecX dst, vecX src1, vecX src2)
13569 %{
13570   match(Set dst (AddVL src1 src2));
13571   ins_cost(INSN_COST);
13572   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
13573   ins_encode %{
13574     __ addv(as_FloatRegister($dst$$reg), __ T2D,
13575             as_FloatRegister($src1$$reg),
13576             as_FloatRegister($src2$$reg));
13577   %}
13578   ins_pipe(pipe_class_default);
13579 %}
13580 
13581 instruct vadd4F(vecX dst, vecX src1, vecX src2)
13582 %{
13583   match(Set dst (AddVF src1 src2));
13584   ins_cost(INSN_COST);
13585   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
13586   ins_encode %{
13587     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
13588             as_FloatRegister($src1$$reg),
13589             as_FloatRegister($src2$$reg));
13590   %}
13591   ins_pipe(pipe_class_default);
13592 %}
13593 
13594 instruct vadd2D(vecX dst, vecX src1, vecX src2)
13595 %{
13596   match(Set dst (AddVD src1 src2));
13597   ins_cost(INSN_COST);
13598   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
13599   ins_encode %{
13600     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
13601             as_FloatRegister($src1$$reg),
13602             as_FloatRegister($src2$$reg));
13603   %}
13604   ins_pipe(pipe_class_default);
13605 %}
13606 
13607 // --------------------------------- SUB --------------------------------------
13608 
13609 instruct vsub16B(vecX dst, vecX src1, vecX src2)
13610 %{
13611   match(Set dst (SubVB src1 src2));
13612   ins_cost(INSN_COST);
13613   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
13614   ins_encode %{
13615     __ subv(as_FloatRegister($dst$$reg), __ T16B,
13616             as_FloatRegister($src1$$reg),
13617             as_FloatRegister($src2$$reg));
13618   %}
13619   ins_pipe(pipe_class_default);
13620 %}
13621 
13622 instruct vsub8S(vecX dst, vecX src1, vecX src2)
13623 %{
13624   match(Set dst (SubVS src1 src2));
13625   ins_cost(INSN_COST);
13626   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
13627   ins_encode %{
13628     __ subv(as_FloatRegister($dst$$reg), __ T8H,
13629             as_FloatRegister($src1$$reg),
13630             as_FloatRegister($src2$$reg));
13631   %}
13632   ins_pipe(pipe_class_default);
13633 %}
13634 
13635 instruct vsub4I(vecX dst, vecX src1, vecX src2)
13636 %{
13637   match(Set dst (SubVI src1 src2));
13638   ins_cost(INSN_COST);
13639   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
13640   ins_encode %{
13641     __ subv(as_FloatRegister($dst$$reg), __ T4S,
13642             as_FloatRegister($src1$$reg),
13643             as_FloatRegister($src2$$reg));
13644   %}
13645   ins_pipe(pipe_class_default);
13646 %}
13647 
13648 instruct vsub2L(vecX dst, vecX src1, vecX src2)
13649 %{
13650   match(Set dst (SubVL src1 src2));
13651   ins_cost(INSN_COST);
13652   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
13653   ins_encode %{
13654     __ subv(as_FloatRegister($dst$$reg), __ T2D,
13655             as_FloatRegister($src1$$reg),
13656             as_FloatRegister($src2$$reg));
13657   %}
13658   ins_pipe(pipe_class_default);
13659 %}
13660 
13661 instruct vsub4F(vecX dst, vecX src1, vecX src2)
13662 %{
13663   match(Set dst (SubVF src1 src2));
13664   ins_cost(INSN_COST);
13665   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
13666   ins_encode %{
13667     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
13668             as_FloatRegister($src1$$reg),
13669             as_FloatRegister($src2$$reg));
13670   %}
13671   ins_pipe(pipe_class_default);
13672 %}
13673 
13674 instruct vsub2D(vecX dst, vecX src1, vecX src2)
13675 %{
13676   match(Set dst (SubVD src1 src2));
13677   ins_cost(INSN_COST);
13678   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
13679   ins_encode %{
13680     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
13681             as_FloatRegister($src1$$reg),
13682             as_FloatRegister($src2$$reg));
13683   %}
13684   ins_pipe(pipe_class_default);
13685 %}
13686 
13687 // --------------------------------- MUL --------------------------------------
13688 
13689 instruct vmul8S(vecX dst, vecX src1, vecX src2)
13690 %{
13691   match(Set dst (MulVS src1 src2));
13692   ins_cost(INSN_COST);
13693   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
13694   ins_encode %{
13695     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
13696             as_FloatRegister($src1$$reg),
13697             as_FloatRegister($src2$$reg));
13698   %}
13699   ins_pipe(pipe_class_default);
13700 %}
13701 
13702 instruct vmul4I(vecX dst, vecX src1, vecX src2)
13703 %{
13704   match(Set dst (MulVI src1 src2));
13705   ins_cost(INSN_COST);
13706   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
13707   ins_encode %{
13708     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
13709             as_FloatRegister($src1$$reg),
13710             as_FloatRegister($src2$$reg));
13711   %}
13712   ins_pipe(pipe_class_default);
13713 %}
13714 
13715 instruct vmul4F(vecX dst, vecX src1, vecX src2)
13716 %{
13717   match(Set dst (MulVF src1 src2));
13718   ins_cost(INSN_COST);
13719   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
13720   ins_encode %{
13721     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
13722             as_FloatRegister($src1$$reg),
13723             as_FloatRegister($src2$$reg));
13724   %}
13725   ins_pipe(pipe_class_default);
13726 %}
13727 
13728 instruct vmul2D(vecX dst, vecX src1, vecX src2)
13729 %{
13730   match(Set dst (MulVD src1 src2));
13731   ins_cost(INSN_COST);
13732   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
13733   ins_encode %{
13734     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
13735             as_FloatRegister($src1$$reg),
13736             as_FloatRegister($src2$$reg));
13737   %}
13738   ins_pipe(pipe_class_default);
13739 %}
13740 
13741 // --------------------------------- DIV --------------------------------------
13742 
13743 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
13744 %{
13745   match(Set dst (DivVF src1 src2));
13746   ins_cost(INSN_COST);
13747   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
13748   ins_encode %{
13749     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
13750             as_FloatRegister($src1$$reg),
13751             as_FloatRegister($src2$$reg));
13752   %}
13753   ins_pipe(pipe_class_default);
13754 %}
13755 
13756 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
13757 %{
13758   match(Set dst (DivVD src1 src2));
13759   ins_cost(INSN_COST);
13760   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
13761   ins_encode %{
13762     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
13763             as_FloatRegister($src1$$reg),
13764             as_FloatRegister($src2$$reg));
13765   %}
13766   ins_pipe(pipe_class_default);
13767 %}
13768 
13769 // --------------------------------- AND --------------------------------------
13770 
13771 instruct vand16B(vecX dst, vecX src1, vecX src2)
13772 %{
13773   match(Set dst (AndV src1 src2));
13774   ins_cost(INSN_COST);
13775   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
13776   ins_encode %{
13777     __ andr(as_FloatRegister($dst$$reg), __ T16B,
13778             as_FloatRegister($src1$$reg),
13779             as_FloatRegister($src2$$reg));
13780   %}
13781   ins_pipe(pipe_class_default);
13782 %}
13783 
13784 // --------------------------------- OR ---------------------------------------
13785 
13786 instruct vor16B(vecX dst, vecX src1, vecX src2)
13787 %{
13788   match(Set dst (OrV src1 src2));
13789   ins_cost(INSN_COST);
13790   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
13791   ins_encode %{
13792     __ orr(as_FloatRegister($dst$$reg), __ T16B,
13793             as_FloatRegister($src1$$reg),
13794             as_FloatRegister($src2$$reg));
13795   %}
13796   ins_pipe(pipe_class_default);
13797 %}
13798 
13799 // --------------------------------- XOR --------------------------------------
13800 
13801 instruct vxor16B(vecX dst, vecX src1, vecX src2)
13802 %{
13803   match(Set dst (XorV src1 src2));
13804   ins_cost(INSN_COST);
13805   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
13806   ins_encode %{
13807     __ eor(as_FloatRegister($dst$$reg), __ T16B,
13808             as_FloatRegister($src1$$reg),
13809             as_FloatRegister($src2$$reg));
13810   %}
13811   ins_pipe(pipe_class_default);
13812 %}
13813 
13814 // ------------------------------ Shift ---------------------------------------
13815 
13816 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
13817   match(Set dst (LShiftCntV cnt));
13818   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
13819   ins_encode %{
13820     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
13821   %}
13822   ins_pipe(pipe_class_default);
13823 %}
13824 
13825 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
13826 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
13827   match(Set dst (RShiftCntV cnt));
13828   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
13829   ins_encode %{
13830     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
13831     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
13832   %}
13833   ins_pipe(pipe_class_default);
13834 %}
13835 
13836 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
13837   match(Set dst (LShiftVB src shift));
13838   match(Set dst (RShiftVB src shift));
13839   ins_cost(INSN_COST);
13840   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
13841   ins_encode %{
13842     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
13843             as_FloatRegister($src$$reg),
13844             as_FloatRegister($shift$$reg));
13845   %}
13846   ins_pipe(pipe_class_default);
13847 %}
13848 
13849 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
13850   match(Set dst (URShiftVB src shift));
13851   ins_cost(INSN_COST);
13852   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
13853   ins_encode %{
13854     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
13855             as_FloatRegister($src$$reg),
13856             as_FloatRegister($shift$$reg));
13857   %}
13858   ins_pipe(pipe_class_default);
13859 %}
13860 
13861 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
13862   match(Set dst (LShiftVB src shift));
13863   ins_cost(INSN_COST);
13864   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
13865   ins_encode %{
13866     int sh = (int)$shift$$constant & 31;
13867     if (sh >= 8) {
13868       __ eor(as_FloatRegister($dst$$reg), __ T16B,
13869              as_FloatRegister($src$$reg),
13870              as_FloatRegister($src$$reg));
13871     } else {
13872       __ shl(as_FloatRegister($dst$$reg), __ T16B,
13873              as_FloatRegister($src$$reg), sh);
13874     }
13875   %}
13876   ins_pipe(pipe_class_default);
13877 %}
13878 
13879 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
13880   match(Set dst (RShiftVB src shift));
13881   ins_cost(INSN_COST);
13882   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
13883   ins_encode %{
13884     int sh = (int)$shift$$constant & 31;
13885     if (sh >= 8) sh = 7;
13886     sh = -sh & 7;
13887     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
13888            as_FloatRegister($src$$reg), sh);
13889   %}
13890   ins_pipe(pipe_class_default);
13891 %}
13892 
13893 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
13894   match(Set dst (URShiftVB src shift));
13895   ins_cost(INSN_COST);
13896   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
13897   ins_encode %{
13898     int sh = (int)$shift$$constant & 31;
13899     if (sh >= 8) {
13900       __ eor(as_FloatRegister($dst$$reg), __ T16B,
13901              as_FloatRegister($src$$reg),
13902              as_FloatRegister($src$$reg));
13903     } else {
13904       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
13905              as_FloatRegister($src$$reg), -sh & 7);
13906     }
13907   %}
13908   ins_pipe(pipe_class_default);
13909 %}
13910 
13911 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
13912   match(Set dst (LShiftVS src shift));
13913   match(Set dst (RShiftVS src shift));
13914   ins_cost(INSN_COST);
13915   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
13916   ins_encode %{
13917     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
13918             as_FloatRegister($src$$reg),
13919             as_FloatRegister($shift$$reg));
13920   %}
13921   ins_pipe(pipe_class_default);
13922 %}
13923 
13924 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
13925   match(Set dst (URShiftVS src shift));
13926   ins_cost(INSN_COST);
13927   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
13928   ins_encode %{
13929     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
13930             as_FloatRegister($src$$reg),
13931             as_FloatRegister($shift$$reg));
13932   %}
13933   ins_pipe(pipe_class_default);
13934 %}
13935 
13936 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
13937   match(Set dst (LShiftVS src shift));
13938   ins_cost(INSN_COST);
13939   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
13940   ins_encode %{
13941     int sh = (int)$shift$$constant & 31;
13942     if (sh >= 16) {
13943       __ eor(as_FloatRegister($dst$$reg), __ T16B,
13944              as_FloatRegister($src$$reg),
13945              as_FloatRegister($src$$reg));
13946     } else {
13947       __ shl(as_FloatRegister($dst$$reg), __ T8H,
13948              as_FloatRegister($src$$reg), sh);
13949     }
13950   %}
13951   ins_pipe(pipe_class_default);
13952 %}
13953 
13954 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
13955   match(Set dst (RShiftVS src shift));
13956   ins_cost(INSN_COST);
13957   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
13958   ins_encode %{
13959     int sh = (int)$shift$$constant & 31;
13960     if (sh >= 16) sh = 15;
13961     sh = -sh & 15;
13962     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
13963            as_FloatRegister($src$$reg), sh);
13964   %}
13965   ins_pipe(pipe_class_default);
13966 %}
13967 
13968 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
13969   match(Set dst (URShiftVS src shift));
13970   ins_cost(INSN_COST);
13971   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
13972   ins_encode %{
13973     int sh = (int)$shift$$constant & 31;
13974     if (sh >= 16) {
13975       __ eor(as_FloatRegister($dst$$reg), __ T16B,
13976              as_FloatRegister($src$$reg),
13977              as_FloatRegister($src$$reg));
13978     } else {
13979       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
13980              as_FloatRegister($src$$reg), -sh & 15);
13981     }
13982   %}
13983   ins_pipe(pipe_class_default);
13984 %}
13985 
13986 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
13987   match(Set dst (LShiftVI src shift));
13988   match(Set dst (RShiftVI src shift));
13989   ins_cost(INSN_COST);
13990   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
13991   ins_encode %{
13992     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
13993             as_FloatRegister($src$$reg),
13994             as_FloatRegister($shift$$reg));
13995   %}
13996   ins_pipe(pipe_class_default);
13997 %}
13998 
13999 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
14000   match(Set dst (URShiftVI src shift));
14001   ins_cost(INSN_COST);
14002   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
14003   ins_encode %{
14004     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
14005             as_FloatRegister($src$$reg),
14006             as_FloatRegister($shift$$reg));
14007   %}
14008   ins_pipe(pipe_class_default);
14009 %}
14010 
14011 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
14012   match(Set dst (LShiftVI src shift));
14013   ins_cost(INSN_COST);
14014   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
14015   ins_encode %{
14016     __ shl(as_FloatRegister($dst$$reg), __ T4S,
14017            as_FloatRegister($src$$reg),
14018            (int)$shift$$constant & 31);
14019   %}
14020   ins_pipe(pipe_class_default);
14021 %}
14022 
14023 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
14024   match(Set dst (RShiftVI src shift));
14025   ins_cost(INSN_COST);
14026   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
14027   ins_encode %{
14028     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
14029             as_FloatRegister($src$$reg),
14030             -(int)$shift$$constant & 31);
14031   %}
14032   ins_pipe(pipe_class_default);
14033 %}
14034 
14035 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
14036   match(Set dst (URShiftVI src shift));
14037   ins_cost(INSN_COST);
14038   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
14039   ins_encode %{
14040     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
14041             as_FloatRegister($src$$reg),
14042             -(int)$shift$$constant & 31);
14043   %}
14044   ins_pipe(pipe_class_default);
14045 %}
14046 
14047 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
14048   match(Set dst (LShiftVL src shift));
14049   match(Set dst (RShiftVL src shift));
14050   ins_cost(INSN_COST);
14051   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
14052   ins_encode %{
14053     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
14054             as_FloatRegister($src$$reg),
14055             as_FloatRegister($shift$$reg));
14056   %}
14057   ins_pipe(pipe_class_default);
14058 %}
14059 
14060 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
14061   match(Set dst (URShiftVL src shift));
14062   ins_cost(INSN_COST);
14063   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
14064   ins_encode %{
14065     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
14066             as_FloatRegister($src$$reg),
14067             as_FloatRegister($shift$$reg));
14068   %}
14069   ins_pipe(pipe_class_default);
14070 %}
14071 
14072 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
14073   match(Set dst (LShiftVL src shift));
14074   ins_cost(INSN_COST);
14075   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
14076   ins_encode %{
14077     __ shl(as_FloatRegister($dst$$reg), __ T2D,
14078            as_FloatRegister($src$$reg),
14079            (int)$shift$$constant & 63);
14080   %}
14081   ins_pipe(pipe_class_default);
14082 %}
14083 
14084 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
14085   match(Set dst (RShiftVL src shift));
14086   ins_cost(INSN_COST);
14087   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
14088   ins_encode %{
14089     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
14090             as_FloatRegister($src$$reg),
14091             -(int)$shift$$constant & 63);
14092   %}
14093   ins_pipe(pipe_class_default);
14094 %}
14095 
14096 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
14097   match(Set dst (URShiftVL src shift));
14098   ins_cost(INSN_COST);
14099   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
14100   ins_encode %{
14101     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
14102             as_FloatRegister($src$$reg),
14103             -(int)$shift$$constant & 63);
14104   %}
14105   ins_pipe(pipe_class_default);
14106 %}
14107 
14108 //----------PEEPHOLE RULES-----------------------------------------------------
14109 // These must follow all instruction definitions as they use the names
14110 // defined in the instructions definitions.
14111 //
14112 // peepmatch ( root_instr_name [preceding_instruction]* );
14113 //
14114 // peepconstraint %{
14115 // (instruction_number.operand_name relational_op instruction_number.operand_name
14116 //  [, ...] );
14117 // // instruction numbers are zero-based using left to right order in peepmatch
14118 //
14119 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
14120 // // provide an instruction_number.operand_name for each operand that appears
14121 // // in the replacement instruction's match rule
14122 //
14123 // ---------VM FLAGS---------------------------------------------------------
14124 //
14125 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14126 //


< prev index next >