146 // Double Registers
147
148 // The rules of ADL require that double registers be defined in pairs.
149 // Each pair must be two 32-bit values, but not necessarily a pair of
150 // single float registers. In each pair, ADLC-assigned register numbers
151 // must be adjacent, with the lower number even. Finally, when the
152 // CPU stores such a register pair to memory, the word associated with
153 // the lower ADLC-assigned number must be stored to the lower address.
154
155 // AArch64 has 32 floating-point registers. Each can store a vector of
156 // single or double precision floating-point values up to 8 * 32
157 // floats, 4 * 64 bit floats or 2 * 128 bit floats. We currently only
158 // use the first float or double element of the vector.
159
160 // for Java use float registers v0-v15 are always save on call whereas
161 // the platform ABI treats v8-v15 as callee save). float registers
162 // v16-v31 are SOC as per the platform spec
163
164 reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() );
165 reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() );
166 reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() );
167 reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() );
168 reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() );
169 reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() );
170 reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() );
171 reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() );
172 reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() );
173 reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() );
174 reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() );
175 reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() );
176 reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() );
177 reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() );
178 reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() );
179 reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() );
180 reg_def V8 ( SOC, SOE, Op_RegF, 8, v8->as_VMReg() );
181 reg_def V8_H ( SOC, SOE, Op_RegF, 8, v8->as_VMReg()->next() );
182 reg_def V9 ( SOC, SOE, Op_RegF, 9, v9->as_VMReg() );
183 reg_def V9_H ( SOC, SOE, Op_RegF, 9, v9->as_VMReg()->next() );
184 reg_def V10 ( SOC, SOE, Op_RegF, 10, v10->as_VMReg() );
185 reg_def V10_H( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next());
186 reg_def V11 ( SOC, SOE, Op_RegF, 11, v11->as_VMReg() );
187 reg_def V11_H( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next());
188 reg_def V12 ( SOC, SOE, Op_RegF, 12, v12->as_VMReg() );
189 reg_def V12_H( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next());
190 reg_def V13 ( SOC, SOE, Op_RegF, 13, v13->as_VMReg() );
191 reg_def V13_H( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next());
192 reg_def V14 ( SOC, SOE, Op_RegF, 14, v14->as_VMReg() );
193 reg_def V14_H( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next());
194 reg_def V15 ( SOC, SOE, Op_RegF, 15, v15->as_VMReg() );
195 reg_def V15_H( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next());
196 reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() );
197 reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next());
198 reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() );
199 reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next());
200 reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() );
201 reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next());
202 reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() );
203 reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next());
204 reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() );
205 reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next());
206 reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() );
207 reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next());
208 reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() );
209 reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next());
210 reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() );
211 reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next());
212 reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() );
213 reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next());
214 reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() );
215 reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next());
216 reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() );
217 reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next());
218 reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() );
219 reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next());
220 reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() );
221 reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next());
222 reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() );
223 reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next());
224 reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() );
225 reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next());
226 reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() );
227 reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next());
228
229 // ----------------------------
230 // Special Registers
231 // ----------------------------
232
233 // the AArch64 CSPR status flag register is not directly acessible as
234 // instruction operand. the FPSR status flag register is a system
235 // register which can be written/read using MSR/MRS but again does not
236 // appear as an operand (a code identifying the FSPR occurs as an
237 // immediate value in the instruction).
238
239 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
240
241
242 // Specify priority of register selection within phases of register
243 // allocation. Highest priority is first. A useful heuristic is to
244 // give registers a low priority when they are required by machine
245 // instructions, like EAX and EDX on I486, and choose no-save registers
246 // before save-on-call, & save-on-call before save-on-entry. Registers
247 // which participate in fixed calling sequences should come last.
274 R20, R20_H,
275 R21, R21_H,
276 R22, R22_H,
277 R23, R23_H,
278 R24, R24_H,
279 R25, R25_H,
280 R26, R26_H,
281
282 // non-allocatable registers
283
284 R27, R27_H, // heapbase
285 R28, R28_H, // thread
286 R29, R29_H, // fp
287 R30, R30_H, // lr
288 R31, R31_H, // sp
289 );
290
291 alloc_class chunk1(
292
293 // no save
294 V16, V16_H,
295 V17, V17_H,
296 V18, V18_H,
297 V19, V19_H,
298 V20, V20_H,
299 V21, V21_H,
300 V22, V22_H,
301 V23, V23_H,
302 V24, V24_H,
303 V25, V25_H,
304 V26, V26_H,
305 V27, V27_H,
306 V28, V28_H,
307 V29, V29_H,
308 V30, V30_H,
309 V31, V31_H,
310
311 // arg registers
312 V0, V0_H,
313 V1, V1_H,
314 V2, V2_H,
315 V3, V3_H,
316 V4, V4_H,
317 V5, V5_H,
318 V6, V6_H,
319 V7, V7_H,
320
321 // non-volatiles
322 V8, V8_H,
323 V9, V9_H,
324 V10, V10_H,
325 V11, V11_H,
326 V12, V12_H,
327 V13, V13_H,
328 V14, V14_H,
329 V15, V15_H,
330 );
331
332 alloc_class chunk2(RFLAGS);
333
334 //----------Architecture Description Register Classes--------------------------
335 // Several register classes are automatically defined based upon information in
336 // this architecture description.
337 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
338 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )
339 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
340 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
341 //
342
343 // Class for all 32 bit integer registers -- excludes SP which will
344 // never be used as an integer register
345 reg_class any_reg32(
346 R0,
347 R1,
348 R2,
349 R3,
753 V14, V14_H,
754 V15, V15_H,
755 V16, V16_H,
756 V17, V17_H,
757 V18, V18_H,
758 V19, V19_H,
759 V20, V20_H,
760 V21, V21_H,
761 V22, V22_H,
762 V23, V23_H,
763 V24, V24_H,
764 V25, V25_H,
765 V26, V26_H,
766 V27, V27_H,
767 V28, V28_H,
768 V29, V29_H,
769 V30, V30_H,
770 V31, V31_H
771 );
772
773 // Class for 128 bit register v0
774 reg_class v0_reg(
775 V0, V0_H
776 );
777
778 // Class for 128 bit register v1
779 reg_class v1_reg(
780 V1, V1_H
781 );
782
783 // Class for 128 bit register v2
784 reg_class v2_reg(
785 V2, V2_H
786 );
787
788 // Class for 128 bit register v3
789 reg_class v3_reg(
790 V3, V3_H
791 );
792
1947 //=============================================================================
1948
1949 // Figure out which register class each belongs in: rc_int, rc_float or
1950 // rc_stack.
1951 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1952
1953 static enum RC rc_class(OptoReg::Name reg) {
1954
1955 if (reg == OptoReg::Bad) {
1956 return rc_bad;
1957 }
1958
1959 // we have 30 int registers * 2 halves
1960 // (rscratch1 and rscratch2 are omitted)
1961
1962 if (reg < 60) {
1963 return rc_int;
1964 }
1965
1966 // we have 32 float register * 2 halves
1967 if (reg < 60 + 64) {
1968 return rc_float;
1969 }
1970
1971 // Between float regs & stack is the flags regs.
1972 assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1973
1974 return rc_stack;
1975 }
1976
1977 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1978 Compile* C = ra_->C;
1979
1980 // Get registers to move.
1981 OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1982 OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1983 OptoReg::Name dst_hi = ra_->get_reg_second(this);
1984 OptoReg::Name dst_lo = ra_->get_reg_first(this);
1985
1986 enum RC src_hi_rc = rc_class(src_hi);
1987 enum RC src_lo_rc = rc_class(src_lo);
1988 enum RC dst_hi_rc = rc_class(dst_hi);
1989 enum RC dst_lo_rc = rc_class(dst_lo);
1990
1991 assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1992
1993 if (src_hi != OptoReg::Bad) {
1994 assert((src_lo&1)==0 && src_lo+1==src_hi &&
1995 (dst_lo&1)==0 && dst_lo+1==dst_hi,
1996 "expected aligned-adjacent pairs");
1997 }
1998
1999 if (src_lo == dst_lo && src_hi == dst_hi) {
2000 return 0; // Self copy, no move.
2001 }
2002
2003 switch (src_lo_rc) {
2004 case rc_int:
2005 if (dst_lo_rc == rc_int) { // gpr --> gpr copy
2006 if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2007 (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2008 // 64 bit
2009 if (cbuf) {
2010 MacroAssembler _masm(cbuf);
2011 __ mov(as_Register(Matcher::_regEncode[dst_lo]),
2012 as_Register(Matcher::_regEncode[src_lo]));
2013 } else if (st) {
2014 st->print("mov %s, %s\t# shuffle",
2015 Matcher::regName[dst_lo],
2016 Matcher::regName[src_lo]);
2017 }
2018 } else {
2019 // 32 bit
2020 if (cbuf) {
2021 MacroAssembler _masm(cbuf);
2022 __ movw(as_Register(Matcher::_regEncode[dst_lo]),
2405
2406 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
2407 {
2408 Unimplemented();
2409 return false;
2410 }
2411
2412 const bool Matcher::isSimpleConstant64(jlong value) {
2413 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2414 // Probably always true, even if a temp register is required.
2415 return true;
2416 }
2417
2418 // true just means we have fast l2f conversion
2419 const bool Matcher::convL2FSupported(void) {
2420 return true;
2421 }
2422
2423 // Vector width in bytes.
2424 const int Matcher::vector_width_in_bytes(BasicType bt) {
2425 // TODO fixme
2426 return 0;
2427 }
2428
2429 // Limits on vector size (number of elements) loaded into vector.
2430 const int Matcher::max_vector_size(const BasicType bt) {
2431 return vector_width_in_bytes(bt)/type2aelembytes(bt);
2432 }
2433 const int Matcher::min_vector_size(const BasicType bt) {
2434 int max_size = max_vector_size(bt);
2435 // Min size which can be loaded into vector is 4 bytes.
2436 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
2437 return MIN2(size,max_size);
2438 }
2439
2440 // Vector ideal reg.
2441 const int Matcher::vector_ideal_reg(int len) {
2442 // TODO fixme
2443 return Op_RegD;
2444 }
2445
2446 // Only lowest bits of xmm reg are used for vector shift count.
2447 const int Matcher::vector_shift_count_ideal_reg(int size) {
2448 // TODO fixme
2449 return Op_RegL;
2450 }
2451
2452 // AES support not yet implemented
2453 const bool Matcher::pass_original_key_for_aes() {
2454 return false;
2455 }
2456
2457 // x86 supports misaligned vectors store/load.
2458 const bool Matcher::misaligned_vectors_ok() {
2459 // TODO fixme
2460 // return !AlignVector; // can be changed by flag
2461 return false;
2462 }
2463
2464 // false => size gets scaled to BytesPerLong, ok.
2465 const bool Matcher::init_array_count_is_in_bytes = false;
2466
2467 // Threshold size for cleararray.
2468 const int Matcher::init_array_short_size = 18 * BytesPerLong;
2469
2640 case T_FLOAT:
2641 rtype = MacroAssembler::ret_type_float;
2642 break;
2643 case T_DOUBLE:
2644 rtype = MacroAssembler::ret_type_double;
2645 break;
2646 }
2647 }
2648
2649 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN) \
2650 MacroAssembler _masm(&cbuf); \
2651 { \
2652 guarantee(INDEX == -1, "mode not permitted for volatile"); \
2653 guarantee(DISP == 0, "mode not permitted for volatile"); \
2654 guarantee(SCALE == 0, "mode not permitted for volatile"); \
2655 __ INSN(REG, as_Register(BASE)); \
2656 }
2657
2658 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2659 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2660
2661 // Used for all non-volatile memory accesses. The use of
2662 // $mem->opcode() to discover whether this pattern uses sign-extended
2663 // offsets is something of a kludge.
2664 static void loadStore(MacroAssembler masm, mem_insn insn,
2665 Register reg, int opcode,
2666 Register base, int index, int size, int disp)
2667 {
2668 Address::extend scale;
2669
2670 // Hooboy, this is fugly. We need a way to communicate to the
2671 // encoder that the index needs to be sign extended, so we have to
2672 // enumerate all the cases.
2673 switch (opcode) {
2674 case INDINDEXSCALEDOFFSETI2L:
2675 case INDINDEXSCALEDI2L:
2676 case INDINDEXSCALEDOFFSETI2LN:
2677 case INDINDEXSCALEDI2LN:
2678 case INDINDEXOFFSETI2L:
2679 case INDINDEXOFFSETI2LN:
2707 case INDINDEXSCALEDOFFSETI2LN:
2708 case INDINDEXSCALEDI2LN:
2709 scale = Address::sxtw(size);
2710 break;
2711 default:
2712 scale = Address::lsl(size);
2713 }
2714
2715 if (index == -1) {
2716 (masm.*insn)(reg, Address(base, disp));
2717 } else {
2718 if (disp == 0) {
2719 (masm.*insn)(reg, Address(base, as_Register(index), scale));
2720 } else {
2721 masm.lea(rscratch1, Address(base, disp));
2722 (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2723 }
2724 }
2725 }
2726
2727 %}
2728
2729
2730
2731 //----------ENCODING BLOCK-----------------------------------------------------
2732 // This block specifies the encoding classes used by the compiler to
2733 // output byte streams. Encoding classes are parameterized macros
2734 // used by Machine Instruction Nodes in order to generate the bit
2735 // encoding of the instruction. Operands specify their base encoding
2736 // interface with the interface keyword. There are currently
2737 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2738 // COND_INTER. REG_INTER causes an operand to generate a function
2739 // which returns its register number when queried. CONST_INTER causes
2740 // an operand to generate a function which returns the value of the
2741 // constant when queried. MEMORY_INTER causes an operand to generate
2742 // four functions which return the Base Register, the Index Register,
2743 // the Scale Value, and the Offset Value of the operand when queried.
2744 // COND_INTER causes an operand to generate six functions which return
2745 // the encoding code (ie - encoding bits for the instruction)
2746 // associated with each basic boolean condition for a conditional
2838 %}
2839
2840 enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2841 Register dst_reg = as_Register($dst$$reg);
2842 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2843 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2844 %}
2845
2846 enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2847 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2848 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2849 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2850 %}
2851
2852 enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2853 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2854 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2855 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2856 %}
2857
2858 enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2859 Register src_reg = as_Register($src$$reg);
2860 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2861 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2862 %}
2863
2864 enc_class aarch64_enc_strb0(memory mem) %{
2865 MacroAssembler _masm(&cbuf);
2866 loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2867 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2868 %}
2869
2870 enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2871 Register src_reg = as_Register($src$$reg);
2872 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2873 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2874 %}
2875
2876 enc_class aarch64_enc_strh0(memory mem) %{
2877 MacroAssembler _masm(&cbuf);
2906 %}
2907
2908 enc_class aarch64_enc_str0(memory mem) %{
2909 MacroAssembler _masm(&cbuf);
2910 loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2911 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2912 %}
2913
2914 enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2915 FloatRegister src_reg = as_FloatRegister($src$$reg);
2916 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2917 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2918 %}
2919
2920 enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2921 FloatRegister src_reg = as_FloatRegister($src$$reg);
2922 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2923 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2924 %}
2925
2926 // END Non-volatile memory access
2927
2928 // volatile loads and stores
2929
2930 enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2931 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2932 rscratch1, stlrb);
2933 %}
2934
2935 enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2936 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2937 rscratch1, stlrh);
2938 %}
2939
2940 enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2941 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2942 rscratch1, stlrw);
2943 %}
2944
2945
4916 constraint(ALLOC_IN_RC(float_reg));
4917 match(RegF);
4918
4919 op_cost(0);
4920 format %{ %}
4921 interface(REG_INTER);
4922 %}
4923
4924 // Double Register
4925 // Double register operands
4926 operand vRegD()
4927 %{
4928 constraint(ALLOC_IN_RC(double_reg));
4929 match(RegD);
4930
4931 op_cost(0);
4932 format %{ %}
4933 interface(REG_INTER);
4934 %}
4935
4936 operand vRegD_V0()
4937 %{
4938 constraint(ALLOC_IN_RC(v0_reg));
4939 match(RegD);
4940 op_cost(0);
4941 format %{ %}
4942 interface(REG_INTER);
4943 %}
4944
4945 operand vRegD_V1()
4946 %{
4947 constraint(ALLOC_IN_RC(v1_reg));
4948 match(RegD);
4949 op_cost(0);
4950 format %{ %}
4951 interface(REG_INTER);
4952 %}
4953
4954 operand vRegD_V2()
4955 %{
5488 less_equal(0x9, "ls");
5489 greater(0x8, "hi");
5490 overflow(0x6, "vs");
5491 no_overflow(0x7, "vc");
5492 %}
5493 %}
5494
5495 // Special operand allowing long args to int ops to be truncated for free
5496
5497 operand iRegL2I(iRegL reg) %{
5498
5499 op_cost(0);
5500
5501 match(ConvL2I reg);
5502
5503 format %{ "l2i($reg)" %}
5504
5505 interface(REG_INTER)
5506 %}
5507
5508
5509 //----------OPERAND CLASSES----------------------------------------------------
5510 // Operand Classes are groups of operands that are used as to simplify
5511 // instruction definitions by not requiring the AD writer to specify
5512 // separate instructions for every form of operand when the
5513 // instruction accepts multiple operand types with the same basic
5514 // encoding and format. The classic case of this is memory operands.
5515
5516 // memory is used to define read/write location for load/store
5517 // instruction defs. we can turn a memory op into an Address
5518
5519 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5520 indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5521
5522
5523 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5524 // operations. it allows the src to be either an iRegI or a (ConvL2I
5525 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5526 // can be elided because the 32-bit instruction will just employ the
5527 // lower 32 bits anyway.
12909
12910 // ============================================================================
12911 // This name is KNOWN by the ADLC and cannot be changed.
12912 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12913 // for this guy.
12914 instruct tlsLoadP(thread_RegP dst)
12915 %{
12916 match(Set dst (ThreadLocal));
12917
12918 ins_cost(0);
12919
12920 format %{ " -- \t// $dst=Thread::current(), empty" %}
12921
12922 size(0);
12923
12924 ins_encode( /*empty*/ );
12925
12926 ins_pipe(pipe_class_empty);
12927 %}
12928
12929
12930
12931 //----------PEEPHOLE RULES-----------------------------------------------------
12932 // These must follow all instruction definitions as they use the names
12933 // defined in the instructions definitions.
12934 //
12935 // peepmatch ( root_instr_name [preceding_instruction]* );
12936 //
12937 // peepconstraint %{
12938 // (instruction_number.operand_name relational_op instruction_number.operand_name
12939 // [, ...] );
12940 // // instruction numbers are zero-based using left to right order in peepmatch
12941 //
12942 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
12943 // // provide an instruction_number.operand_name for each operand that appears
12944 // // in the replacement instruction's match rule
12945 //
12946 // ---------VM FLAGS---------------------------------------------------------
12947 //
12948 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12949 //
|
146 // Double Registers
147
148 // The rules of ADL require that double registers be defined in pairs.
149 // Each pair must be two 32-bit values, but not necessarily a pair of
150 // single float registers. In each pair, ADLC-assigned register numbers
151 // must be adjacent, with the lower number even. Finally, when the
152 // CPU stores such a register pair to memory, the word associated with
153 // the lower ADLC-assigned number must be stored to the lower address.
154
155 // AArch64 has 32 floating-point registers. Each can store a vector of
156 // single or double precision floating-point values up to 8 * 32
157 // floats, 4 * 64 bit floats or 2 * 128 bit floats. We currently only
158 // use the first float or double element of the vector.
159
160 // for Java use float registers v0-v15 are always save on call whereas
161 // the platform ABI treats v8-v15 as callee save). float registers
162 // v16-v31 are SOC as per the platform spec
163
164 reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() );
165 reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() );
166 reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) );
167 reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) );
168
169 reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() );
170 reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() );
171 reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) );
172 reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) );
173
174 reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() );
175 reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() );
176 reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) );
177 reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) );
178
179 reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() );
180 reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() );
181 reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) );
182 reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) );
183
184 reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() );
185 reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() );
186 reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) );
187 reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) );
188
189 reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() );
190 reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() );
191 reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) );
192 reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) );
193
194 reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() );
195 reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() );
196 reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) );
197 reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) );
198
199 reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() );
200 reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() );
201 reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) );
202 reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) );
203
204 reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() );
205 reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() );
206 reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) );
207 reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) );
208
209 reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() );
210 reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() );
211 reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) );
212 reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) );
213
214 reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() );
215 reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
216 reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
217 reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
218
219 reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() );
220 reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
221 reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
222 reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
223
224 reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() );
225 reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
226 reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
227 reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
228
229 reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() );
230 reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
231 reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
232 reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
233
234 reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() );
235 reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
236 reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
237 reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
238
239 reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() );
240 reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
241 reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
242 reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
243
244 reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() );
245 reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
246 reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
247 reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
248
249 reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() );
250 reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
251 reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
252 reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
253
254 reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() );
255 reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
256 reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
257 reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
258
259 reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() );
260 reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
261 reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
262 reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
263
264 reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() );
265 reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
266 reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
267 reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
268
269 reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() );
270 reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
271 reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
272 reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
273
274 reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() );
275 reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
276 reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
277 reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
278
279 reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() );
280 reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
281 reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
282 reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
283
284 reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() );
285 reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
286 reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
287 reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
288
289 reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() );
290 reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
291 reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
292 reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
293
294 reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() );
295 reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
296 reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
297 reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
298
299 reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() );
300 reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
301 reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
302 reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
303
304 reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() );
305 reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
306 reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
307 reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
308
309 reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() );
310 reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
311 reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
312 reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
313
314 reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() );
315 reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
316 reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
317 reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
318
319 reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() );
320 reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
321 reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
322 reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
323
324 // ----------------------------
325 // Special Registers
326 // ----------------------------
327
328 // the AArch64 CSPR status flag register is not directly acessible as
329 // instruction operand. the FPSR status flag register is a system
330 // register which can be written/read using MSR/MRS but again does not
331 // appear as an operand (a code identifying the FSPR occurs as an
332 // immediate value in the instruction).
333
334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
335
336
337 // Specify priority of register selection within phases of register
338 // allocation. Highest priority is first. A useful heuristic is to
339 // give registers a low priority when they are required by machine
340 // instructions, like EAX and EDX on I486, and choose no-save registers
341 // before save-on-call, & save-on-call before save-on-entry. Registers
342 // which participate in fixed calling sequences should come last.
369 R20, R20_H,
370 R21, R21_H,
371 R22, R22_H,
372 R23, R23_H,
373 R24, R24_H,
374 R25, R25_H,
375 R26, R26_H,
376
377 // non-allocatable registers
378
379 R27, R27_H, // heapbase
380 R28, R28_H, // thread
381 R29, R29_H, // fp
382 R30, R30_H, // lr
383 R31, R31_H, // sp
384 );
385
386 alloc_class chunk1(
387
388 // no save
389 V16, V16_H, V16_J, V16_K,
390 V17, V17_H, V17_J, V17_K,
391 V18, V18_H, V18_J, V18_K,
392 V19, V19_H, V19_J, V19_K,
393 V20, V20_H, V20_J, V20_K,
394 V21, V21_H, V21_J, V21_K,
395 V22, V22_H, V22_J, V22_K,
396 V23, V23_H, V23_J, V23_K,
397 V24, V24_H, V24_J, V24_K,
398 V25, V25_H, V25_J, V25_K,
399 V26, V26_H, V26_J, V26_K,
400 V27, V27_H, V27_J, V27_K,
401 V28, V28_H, V28_J, V28_K,
402 V29, V29_H, V29_J, V29_K,
403 V30, V30_H, V30_J, V30_K,
404 V31, V31_H, V31_J, V31_K,
405
406 // arg registers
407 V0, V0_H, V0_J, V0_K,
408 V1, V1_H, V1_J, V1_K,
409 V2, V2_H, V2_J, V2_K,
410 V3, V3_H, V3_J, V3_K,
411 V4, V4_H, V4_J, V4_K,
412 V5, V5_H, V5_J, V5_K,
413 V6, V6_H, V6_J, V6_K,
414 V7, V7_H, V7_J, V7_K,
415
416 // non-volatiles
417 V8, V8_H, V8_J, V8_K,
418 V9, V9_H, V9_J, V9_K,
419 V10, V10_H, V10_J, V10_K,
420 V11, V11_H, V11_J, V11_K,
421 V12, V12_H, V12_J, V12_K,
422 V13, V13_H, V13_J, V13_K,
423 V14, V14_H, V14_J, V14_K,
424 V15, V15_H, V15_J, V15_K,
425 );
426
427 alloc_class chunk2(RFLAGS);
428
429 //----------Architecture Description Register Classes--------------------------
430 // Several register classes are automatically defined based upon information in
431 // this architecture description.
432 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
433 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )
434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
436 //
437
438 // Class for all 32 bit integer registers -- excludes SP which will
439 // never be used as an integer register
440 reg_class any_reg32(
441 R0,
442 R1,
443 R2,
444 R3,
848 V14, V14_H,
849 V15, V15_H,
850 V16, V16_H,
851 V17, V17_H,
852 V18, V18_H,
853 V19, V19_H,
854 V20, V20_H,
855 V21, V21_H,
856 V22, V22_H,
857 V23, V23_H,
858 V24, V24_H,
859 V25, V25_H,
860 V26, V26_H,
861 V27, V27_H,
862 V28, V28_H,
863 V29, V29_H,
864 V30, V30_H,
865 V31, V31_H
866 );
867
868 // Class for all 128bit vector registers
869 reg_class vectorx_reg(
870 V0, V0_H, V0_J, V0_K,
871 V1, V1_H, V1_J, V1_K,
872 V2, V2_H, V2_J, V2_K,
873 V3, V3_H, V3_J, V3_K,
874 V4, V4_H, V4_J, V4_K,
875 V5, V5_H, V5_J, V5_K,
876 V6, V6_H, V6_J, V6_K,
877 V7, V7_H, V7_J, V7_K,
878 V8, V8_H, V8_J, V8_K,
879 V9, V9_H, V9_J, V9_K,
880 V10, V10_H, V10_J, V10_K,
881 V11, V11_H, V11_J, V11_K,
882 V12, V12_H, V12_J, V12_K,
883 V13, V13_H, V13_J, V13_K,
884 V14, V14_H, V14_J, V14_K,
885 V15, V15_H, V15_J, V15_K,
886 V16, V16_H, V16_J, V16_K,
887 V17, V17_H, V17_J, V17_K,
888 V18, V18_H, V18_J, V18_K,
889 V19, V19_H, V19_J, V19_K,
890 V20, V20_H, V20_J, V20_K,
891 V21, V21_H, V21_J, V21_K,
892 V22, V22_H, V22_J, V22_K,
893 V23, V23_H, V23_J, V23_K,
894 V24, V24_H, V24_J, V24_K,
895 V25, V25_H, V25_J, V25_K,
896 V26, V26_H, V26_J, V26_K,
897 V27, V27_H, V27_J, V27_K,
898 V28, V28_H, V28_J, V28_K,
899 V29, V29_H, V29_J, V29_K,
900 V30, V30_H, V30_J, V30_K,
901 V31, V31_H, V31_J, V31_K
902 );
903
904 // Class for 128 bit register v0
905 reg_class v0_reg(
906 V0, V0_H
907 );
908
909 // Class for 128 bit register v1
910 reg_class v1_reg(
911 V1, V1_H
912 );
913
914 // Class for 128 bit register v2
915 reg_class v2_reg(
916 V2, V2_H
917 );
918
919 // Class for 128 bit register v3
920 reg_class v3_reg(
921 V3, V3_H
922 );
923
2078 //=============================================================================
2079
2080 // Figure out which register class each belongs in: rc_int, rc_float or
2081 // rc_stack.
2082 enum RC { rc_bad, rc_int, rc_float, rc_stack };
2083
2084 static enum RC rc_class(OptoReg::Name reg) {
2085
2086 if (reg == OptoReg::Bad) {
2087 return rc_bad;
2088 }
2089
2090 // we have 30 int registers * 2 halves
2091 // (rscratch1 and rscratch2 are omitted)
2092
2093 if (reg < 60) {
2094 return rc_int;
2095 }
2096
2097 // we have 32 float register * 2 halves
2098 if (reg < 60 + 128) {
2099 return rc_float;
2100 }
2101
2102 // Between float regs & stack is the flags regs.
2103 assert(OptoReg::is_stack(reg), "blow up if spilling flags");
2104
2105 return rc_stack;
2106 }
2107
2108 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
2109 Compile* C = ra_->C;
2110
2111 // Get registers to move.
2112 OptoReg::Name src_hi = ra_->get_reg_second(in(1));
2113 OptoReg::Name src_lo = ra_->get_reg_first(in(1));
2114 OptoReg::Name dst_hi = ra_->get_reg_second(this);
2115 OptoReg::Name dst_lo = ra_->get_reg_first(this);
2116
2117 enum RC src_hi_rc = rc_class(src_hi);
2118 enum RC src_lo_rc = rc_class(src_lo);
2119 enum RC dst_hi_rc = rc_class(dst_hi);
2120 enum RC dst_lo_rc = rc_class(dst_lo);
2121
2122 assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
2123
2124 if (src_hi != OptoReg::Bad) {
2125 assert((src_lo&1)==0 && src_lo+1==src_hi &&
2126 (dst_lo&1)==0 && dst_lo+1==dst_hi,
2127 "expected aligned-adjacent pairs");
2128 }
2129
2130 if (src_lo == dst_lo && src_hi == dst_hi) {
2131 return 0; // Self copy, no move.
2132 }
2133
2134 if (bottom_type()->isa_vect() != NULL) {
2135 uint len = 4;
2136 if (cbuf) {
2137 MacroAssembler _masm(cbuf);
2138 uint ireg = ideal_reg();
2139 assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
2140 assert(ireg == Op_VecX, "sanity");
2141 if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2142 // stack->stack
2143 int src_offset = ra_->reg2offset(src_lo);
2144 int dst_offset = ra_->reg2offset(dst_lo);
2145 assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
2146 len = 8;
2147 if (src_offset < 512) {
2148 __ ldp(rscratch1, rscratch2, Address(sp, src_offset));
2149 } else {
2150 __ ldr(rscratch1, Address(sp, src_offset));
2151 __ ldr(rscratch2, Address(sp, src_offset+4));
2152 len += 4;
2153 }
2154 if (dst_offset < 512) {
2155 __ stp(rscratch1, rscratch2, Address(sp, dst_offset));
2156 } else {
2157 __ str(rscratch1, Address(sp, dst_offset));
2158 __ str(rscratch2, Address(sp, dst_offset+4));
2159 len += 4;
2160 }
2161 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2162 __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ T16B,
2163 as_FloatRegister(Matcher::_regEncode[src_lo]),
2164 as_FloatRegister(Matcher::_regEncode[src_lo]));
2165 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2166 __ str(as_FloatRegister(Matcher::_regEncode[src_lo]), __ Q,
2167 Address(sp, ra_->reg2offset(dst_lo)));
2168 } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2169 __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ Q,
2170 Address(sp, ra_->reg2offset(src_lo)));
2171 } else {
2172 ShouldNotReachHere();
2173 }
2174 } else if (st) {
2175 if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2176 // stack->stack
2177 int src_offset = ra_->reg2offset(src_lo);
2178 int dst_offset = ra_->reg2offset(dst_lo);
2179 if (src_offset < 512) {
2180 st->print("ldp rscratch1, rscratch2, [sp, #%d]", src_offset);
2181 } else {
2182 st->print("ldr rscratch1, [sp, #%d]", src_offset);
2183 st->print("\nldr rscratch2, [sp, #%d]", src_offset+4);
2184 }
2185 if (dst_offset < 512) {
2186 st->print("\nstp rscratch1, rscratch2, [sp, #%d]", dst_offset);
2187 } else {
2188 st->print("\nstr rscratch1, [sp, #%d]", dst_offset);
2189 st->print("\nstr rscratch2, [sp, #%d]", dst_offset+4);
2190 }
2191 st->print("\t# vector spill, stack to stack");
2192 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2193 st->print("mov %s, %s\t# vector spill, reg to reg",
2194 Matcher::regName[dst_lo], Matcher::regName[src_lo]);
2195 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2196 st->print("str %s, [sp, #%d]\t# vector spill, reg to stack",
2197 Matcher::regName[src_lo], ra_->reg2offset(dst_lo));
2198 } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2199 st->print("ldr %s, [sp, #%d]\t# vector spill, stack to reg",
2200 Matcher::regName[dst_lo], ra_->reg2offset(src_lo));
2201 }
2202 }
2203 return len;
2204 }
2205
2206 switch (src_lo_rc) {
2207 case rc_int:
2208 if (dst_lo_rc == rc_int) { // gpr --> gpr copy
2209 if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2210 (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2211 // 64 bit
2212 if (cbuf) {
2213 MacroAssembler _masm(cbuf);
2214 __ mov(as_Register(Matcher::_regEncode[dst_lo]),
2215 as_Register(Matcher::_regEncode[src_lo]));
2216 } else if (st) {
2217 st->print("mov %s, %s\t# shuffle",
2218 Matcher::regName[dst_lo],
2219 Matcher::regName[src_lo]);
2220 }
2221 } else {
2222 // 32 bit
2223 if (cbuf) {
2224 MacroAssembler _masm(cbuf);
2225 __ movw(as_Register(Matcher::_regEncode[dst_lo]),
2608
2609 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
2610 {
2611 Unimplemented();
2612 return false;
2613 }
2614
2615 const bool Matcher::isSimpleConstant64(jlong value) {
2616 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2617 // Probably always true, even if a temp register is required.
2618 return true;
2619 }
2620
2621 // true just means we have fast l2f conversion
2622 const bool Matcher::convL2FSupported(void) {
2623 return true;
2624 }
2625
2626 // Vector width in bytes.
2627 const int Matcher::vector_width_in_bytes(BasicType bt) {
2628 int size = MIN2(16,(int)MaxVectorSize);
2629 // Minimum 2 values in vector
2630 if (size < 2*type2aelembytes(bt)) size = 0;
2631 // But never < 4
2632 if (size < 4) size = 0;
2633 return size;
2634 }
2635
2636 // Limits on vector size (number of elements) loaded into vector.
2637 const int Matcher::max_vector_size(const BasicType bt) {
2638 return vector_width_in_bytes(bt)/type2aelembytes(bt);
2639 }
2640 const int Matcher::min_vector_size(const BasicType bt) {
2641 //return (type2aelembytes(bt) == 1) ? 4 : 2;
2642 // For the moment, only support 1 vector size, 128 bits
2643 return max_vector_size(bt);
2644 }
2645
2646 // Vector ideal reg.
2647 const int Matcher::vector_ideal_reg(int len) {
2648 return Op_VecX;
2649 }
2650
2651 // Only lowest bits of xmm reg are used for vector shift count.
2652 const int Matcher::vector_shift_count_ideal_reg(int size) {
2653 return Op_VecX;
2654 }
2655
2656 // AES support not yet implemented
2657 const bool Matcher::pass_original_key_for_aes() {
2658 return false;
2659 }
2660
2661 // x86 supports misaligned vectors store/load.
2662 const bool Matcher::misaligned_vectors_ok() {
2663 // TODO fixme
2664 // return !AlignVector; // can be changed by flag
2665 return false;
2666 }
2667
2668 // false => size gets scaled to BytesPerLong, ok.
2669 const bool Matcher::init_array_count_is_in_bytes = false;
2670
2671 // Threshold size for cleararray.
2672 const int Matcher::init_array_short_size = 18 * BytesPerLong;
2673
2844 case T_FLOAT:
2845 rtype = MacroAssembler::ret_type_float;
2846 break;
2847 case T_DOUBLE:
2848 rtype = MacroAssembler::ret_type_double;
2849 break;
2850 }
2851 }
2852
2853 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN) \
2854 MacroAssembler _masm(&cbuf); \
2855 { \
2856 guarantee(INDEX == -1, "mode not permitted for volatile"); \
2857 guarantee(DISP == 0, "mode not permitted for volatile"); \
2858 guarantee(SCALE == 0, "mode not permitted for volatile"); \
2859 __ INSN(REG, as_Register(BASE)); \
2860 }
2861
2862 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2863 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2864 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2865 MacroAssembler::SIMD_RegVariant T, const Address &adr);
2866
2867 // Used for all non-volatile memory accesses. The use of
2868 // $mem->opcode() to discover whether this pattern uses sign-extended
2869 // offsets is something of a kludge.
2870 static void loadStore(MacroAssembler masm, mem_insn insn,
2871 Register reg, int opcode,
2872 Register base, int index, int size, int disp)
2873 {
2874 Address::extend scale;
2875
2876 // Hooboy, this is fugly. We need a way to communicate to the
2877 // encoder that the index needs to be sign extended, so we have to
2878 // enumerate all the cases.
2879 switch (opcode) {
2880 case INDINDEXSCALEDOFFSETI2L:
2881 case INDINDEXSCALEDI2L:
2882 case INDINDEXSCALEDOFFSETI2LN:
2883 case INDINDEXSCALEDI2LN:
2884 case INDINDEXOFFSETI2L:
2885 case INDINDEXOFFSETI2LN:
2913 case INDINDEXSCALEDOFFSETI2LN:
2914 case INDINDEXSCALEDI2LN:
2915 scale = Address::sxtw(size);
2916 break;
2917 default:
2918 scale = Address::lsl(size);
2919 }
2920
2921 if (index == -1) {
2922 (masm.*insn)(reg, Address(base, disp));
2923 } else {
2924 if (disp == 0) {
2925 (masm.*insn)(reg, Address(base, as_Register(index), scale));
2926 } else {
2927 masm.lea(rscratch1, Address(base, disp));
2928 (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2929 }
2930 }
2931 }
2932
2933 static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2934 FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2935 int opcode, Register base, int index, int size, int disp)
2936 {
2937 if (index == -1) {
2938 (masm.*insn)(reg, T, Address(base, disp));
2939 } else {
2940 assert(disp == 0, "unsupported address mode");
2941 (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2942 }
2943 }
2944
2945 %}
2946
2947
2948
2949 //----------ENCODING BLOCK-----------------------------------------------------
2950 // This block specifies the encoding classes used by the compiler to
2951 // output byte streams. Encoding classes are parameterized macros
2952 // used by Machine Instruction Nodes in order to generate the bit
2953 // encoding of the instruction. Operands specify their base encoding
2954 // interface with the interface keyword. There are currently
2955 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2956 // COND_INTER. REG_INTER causes an operand to generate a function
2957 // which returns its register number when queried. CONST_INTER causes
2958 // an operand to generate a function which returns the value of the
2959 // constant when queried. MEMORY_INTER causes an operand to generate
2960 // four functions which return the Base Register, the Index Register,
2961 // the Scale Value, and the Offset Value of the operand when queried.
2962 // COND_INTER causes an operand to generate six functions which return
2963 // the encoding code (ie - encoding bits for the instruction)
2964 // associated with each basic boolean condition for a conditional
3056 %}
3057
3058 enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3059 Register dst_reg = as_Register($dst$$reg);
3060 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3061 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3062 %}
3063
3064 enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3065 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3066 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3067 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3068 %}
3069
3070 enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3071 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3072 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3073 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3074 %}
3075
3076 enc_class aarch64_enc_ldrvS(vecX dst, memory mem) %{
3077 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3078 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3079 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3080 %}
3081
3082 enc_class aarch64_enc_ldrvD(vecX dst, memory mem) %{
3083 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3084 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3085 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3086 %}
3087
3088 enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3089 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3090 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3091 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3092 %}
3093
3094 enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3095 Register src_reg = as_Register($src$$reg);
3096 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3097 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3098 %}
3099
3100 enc_class aarch64_enc_strb0(memory mem) %{
3101 MacroAssembler _masm(&cbuf);
3102 loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3103 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3104 %}
3105
3106 enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3107 Register src_reg = as_Register($src$$reg);
3108 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
3109 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3110 %}
3111
3112 enc_class aarch64_enc_strh0(memory mem) %{
3113 MacroAssembler _masm(&cbuf);
3142 %}
3143
3144 enc_class aarch64_enc_str0(memory mem) %{
3145 MacroAssembler _masm(&cbuf);
3146 loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
3147 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3148 %}
3149
3150 enc_class aarch64_enc_strs(vRegF src, memory mem) %{
3151 FloatRegister src_reg = as_FloatRegister($src$$reg);
3152 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
3153 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3154 %}
3155
3156 enc_class aarch64_enc_strd(vRegD src, memory mem) %{
3157 FloatRegister src_reg = as_FloatRegister($src$$reg);
3158 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
3159 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3160 %}
3161
3162 enc_class aarch64_enc_strvS(vecX src, memory mem) %{
3163 FloatRegister src_reg = as_FloatRegister($src$$reg);
3164 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
3165 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3166 %}
3167
3168 enc_class aarch64_enc_strvD(vecX src, memory mem) %{
3169 FloatRegister src_reg = as_FloatRegister($src$$reg);
3170 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
3171 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3172 %}
3173
3174 enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
3175 FloatRegister src_reg = as_FloatRegister($src$$reg);
3176 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
3177 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3178 %}
3179
3180 // END Non-volatile memory access
3181
3182 // volatile loads and stores
3183
3184 enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
3185 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3186 rscratch1, stlrb);
3187 %}
3188
3189 enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
3190 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3191 rscratch1, stlrh);
3192 %}
3193
3194 enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
3195 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3196 rscratch1, stlrw);
3197 %}
3198
3199
5170 constraint(ALLOC_IN_RC(float_reg));
5171 match(RegF);
5172
5173 op_cost(0);
5174 format %{ %}
5175 interface(REG_INTER);
5176 %}
5177
5178 // Double Register
5179 // Double register operands
5180 operand vRegD()
5181 %{
5182 constraint(ALLOC_IN_RC(double_reg));
5183 match(RegD);
5184
5185 op_cost(0);
5186 format %{ %}
5187 interface(REG_INTER);
5188 %}
5189
5190 operand vecX()
5191 %{
5192 constraint(ALLOC_IN_RC(vectorx_reg));
5193 match(VecX);
5194
5195 op_cost(0);
5196 format %{ %}
5197 interface(REG_INTER);
5198 %}
5199
5200 operand vRegD_V0()
5201 %{
5202 constraint(ALLOC_IN_RC(v0_reg));
5203 match(RegD);
5204 op_cost(0);
5205 format %{ %}
5206 interface(REG_INTER);
5207 %}
5208
5209 operand vRegD_V1()
5210 %{
5211 constraint(ALLOC_IN_RC(v1_reg));
5212 match(RegD);
5213 op_cost(0);
5214 format %{ %}
5215 interface(REG_INTER);
5216 %}
5217
5218 operand vRegD_V2()
5219 %{
5752 less_equal(0x9, "ls");
5753 greater(0x8, "hi");
5754 overflow(0x6, "vs");
5755 no_overflow(0x7, "vc");
5756 %}
5757 %}
5758
5759 // Special operand allowing long args to int ops to be truncated for free
5760
5761 operand iRegL2I(iRegL reg) %{
5762
5763 op_cost(0);
5764
5765 match(ConvL2I reg);
5766
5767 format %{ "l2i($reg)" %}
5768
5769 interface(REG_INTER)
5770 %}
5771
5772 opclass vmem(indirect, indIndex, indOffI, indOffL);
5773
5774 //----------OPERAND CLASSES----------------------------------------------------
5775 // Operand Classes are groups of operands that are used as to simplify
5776 // instruction definitions by not requiring the AD writer to specify
5777 // separate instructions for every form of operand when the
5778 // instruction accepts multiple operand types with the same basic
5779 // encoding and format. The classic case of this is memory operands.
5780
5781 // memory is used to define read/write location for load/store
5782 // instruction defs. we can turn a memory op into an Address
5783
5784 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5785 indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5786
5787
5788 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5789 // operations. it allows the src to be either an iRegI or a (ConvL2I
5790 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5791 // can be elided because the 32-bit instruction will just employ the
5792 // lower 32 bits anyway.
13174
13175 // ============================================================================
13176 // This name is KNOWN by the ADLC and cannot be changed.
13177 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13178 // for this guy.
13179 instruct tlsLoadP(thread_RegP dst)
13180 %{
13181 match(Set dst (ThreadLocal));
13182
13183 ins_cost(0);
13184
13185 format %{ " -- \t// $dst=Thread::current(), empty" %}
13186
13187 size(0);
13188
13189 ins_encode( /*empty*/ );
13190
13191 ins_pipe(pipe_class_empty);
13192 %}
13193
13194 // ====================VECTOR INSTRUCTIONS=====================================
13195
13196 // Load vector (32 bits)
13197 instruct loadV4(vecX dst, vmem mem)
13198 %{
13199 predicate(n->as_LoadVector()->memory_size() == 4);
13200 match(Set dst (LoadVector mem));
13201 ins_cost(4 * INSN_COST);
13202 format %{ "ldrs $dst,$mem\t# vector (32 bits)" %}
13203 ins_encode( aarch64_enc_ldrvS(dst, mem) );
13204 ins_pipe(pipe_class_memory);
13205 %}
13206
13207 // Load vector (64 bits)
13208 instruct loadV8(vecX dst, vmem mem)
13209 %{
13210 predicate(n->as_LoadVector()->memory_size() == 8);
13211 match(Set dst (LoadVector mem));
13212 ins_cost(4 * INSN_COST);
13213 format %{ "ldrd $dst,$mem\t# vector (64 bits)" %}
13214 ins_encode( aarch64_enc_ldrvD(dst, mem) );
13215 ins_pipe(pipe_class_memory);
13216 %}
13217
13218 // Load Vector (128 bits)
13219 instruct loadV16(vecX dst, vmem mem)
13220 %{
13221 predicate(n->as_LoadVector()->memory_size() == 16);
13222 match(Set dst (LoadVector mem));
13223 ins_cost(4 * INSN_COST);
13224 format %{ "ldrq $dst,$mem\t# vector (128 bits)" %}
13225 ins_encode( aarch64_enc_ldrvQ(dst, mem) );
13226 ins_pipe(pipe_class_memory);
13227 %}
13228
13229 // Store Vector (32 bits)
13230 instruct storeV4(vecX src, vmem mem)
13231 %{
13232 predicate(n->as_StoreVector()->memory_size() == 4);
13233 match(Set mem (StoreVector mem src));
13234 ins_cost(4 * INSN_COST);
13235 format %{ "strs $mem,$src\t# vector (32 bits)" %}
13236 ins_encode( aarch64_enc_strvS(src, mem) );
13237 ins_pipe(pipe_class_memory);
13238 %}
13239
13240 // Store Vector (64 bits)
13241 instruct storeV8(vecX src, vmem mem)
13242 %{
13243 predicate(n->as_StoreVector()->memory_size() == 8);
13244 match(Set mem (StoreVector mem src));
13245 ins_cost(4 * INSN_COST);
13246 format %{ "strd $mem,$src\t# vector (64 bits)" %}
13247 ins_encode( aarch64_enc_strvD(src, mem) );
13248 ins_pipe(pipe_class_memory);
13249 %}
13250
13251 // Store Vector (128 bits)
13252 instruct storeV16(vecX src, vmem mem)
13253 %{
13254 predicate(n->as_StoreVector()->memory_size() == 16);
13255 match(Set mem (StoreVector mem src));
13256 ins_cost(4 * INSN_COST);
13257 format %{ "strq $mem,$src\t# vector (128 bits)" %}
13258 ins_encode( aarch64_enc_strvQ(src, mem) );
13259 ins_pipe(pipe_class_memory);
13260 %}
13261
13262 instruct replicate16B(vecX dst, iRegIorL2I src)
13263 %{
13264 match(Set dst (ReplicateB src));
13265 ins_cost(INSN_COST);
13266 format %{ "dup $dst, $src\t# vector (16B)" %}
13267 ins_encode %{
13268 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
13269 %}
13270 ins_pipe(pipe_class_default);
13271 %}
13272
13273 instruct replicate16B_imm(vecX dst, immI con)
13274 %{
13275 match(Set dst (ReplicateB con));
13276 ins_cost(INSN_COST);
13277 format %{ "movi $dst, $con\t# vector(16B)" %}
13278 ins_encode %{
13279 __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant);
13280 %}
13281 ins_pipe(pipe_class_default);
13282 %}
13283
13284 instruct replicate8S(vecX dst, iRegIorL2I src)
13285 %{
13286 match(Set dst (ReplicateS src));
13287 ins_cost(INSN_COST);
13288 format %{ "dup $dst, $src\t# vector (8S)" %}
13289 ins_encode %{
13290 __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
13291 %}
13292 ins_pipe(pipe_class_default);
13293 %}
13294
13295 instruct replicate8S_imm(vecX dst, immI con)
13296 %{
13297 match(Set dst (ReplicateS con));
13298 ins_cost(INSN_COST);
13299 format %{ "movi $dst, $con\t# vector(8H)" %}
13300 ins_encode %{
13301 __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant);
13302 %}
13303 ins_pipe(pipe_class_default);
13304 %}
13305
13306 instruct replicate4I(vecX dst, iRegIorL2I src)
13307 %{
13308 match(Set dst (ReplicateI src));
13309 ins_cost(INSN_COST);
13310 format %{ "dup $dst, $src\t# vector (4I)" %}
13311 ins_encode %{
13312 __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
13313 %}
13314 ins_pipe(pipe_class_default);
13315 %}
13316
13317 instruct replicate4I_imm(vecX dst, immI con)
13318 %{
13319 match(Set dst (ReplicateI con));
13320 ins_cost(INSN_COST);
13321 format %{ "movi $dst, $con\t# vector(4I)" %}
13322 ins_encode %{
13323 __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
13324 %}
13325 ins_pipe(pipe_class_default);
13326 %}
13327
13328 instruct replicate2L(vecX dst, iRegL src)
13329 %{
13330 match(Set dst (ReplicateL src));
13331 ins_cost(INSN_COST);
13332 format %{ "dup $dst, $src\t# vector (2L)" %}
13333 ins_encode %{
13334 __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
13335 %}
13336 ins_pipe(pipe_class_default);
13337 %}
13338
13339 instruct replicate2L_zero(vecX dst, immI0 zero)
13340 %{
13341 match(Set dst (ReplicateI zero));
13342 ins_cost(INSN_COST);
13343 format %{ "movi $dst, $zero\t# vector(4I)" %}
13344 ins_encode %{
13345 __ eor(as_FloatRegister($dst$$reg), __ T16B,
13346 as_FloatRegister($dst$$reg),
13347 as_FloatRegister($dst$$reg));
13348 %}
13349 ins_pipe(pipe_class_default);
13350 %}
13351
13352 instruct replicate4F(vecX dst, vRegF src)
13353 %{
13354 match(Set dst (ReplicateF src));
13355 ins_cost(INSN_COST);
13356 format %{ "dup $dst, $src\t# vector (4F)" %}
13357 ins_encode %{
13358 __ dup(as_FloatRegister($dst$$reg), __ T4S,
13359 as_FloatRegister($src$$reg));
13360 %}
13361 ins_pipe(pipe_class_default);
13362 %}
13363
13364 instruct replicate2D(vecX dst, vRegD src)
13365 %{
13366 match(Set dst (ReplicateD src));
13367 ins_cost(INSN_COST);
13368 format %{ "dup $dst, $src\t# vector (2D)" %}
13369 ins_encode %{
13370 __ dup(as_FloatRegister($dst$$reg), __ T2D,
13371 as_FloatRegister($src$$reg));
13372 %}
13373 ins_pipe(pipe_class_default);
13374 %}
13375
13376 // ====================REDUCTION ARITHMETIC====================================
13377
13378 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13379 %{
13380 match(Set dst (AddReductionVI src1 src2));
13381 ins_cost(INSN_COST);
13382 effect(TEMP tmp, TEMP tmp2);
13383 format %{ "addv $tmp, T4S, $src2\n\t"
13384 "umov $tmp2, $tmp, S, 0\n\t"
13385 "addw $dst, $tmp2, $src1\t add reduction4i"
13386 %}
13387 ins_encode %{
13388 __ addv(as_FloatRegister($tmp$$reg), __ T4S,
13389 as_FloatRegister($src2$$reg));
13390 __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13391 __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
13392 %}
13393 ins_pipe(pipe_class_default);
13394 %}
13395
13396 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13397 %{
13398 match(Set dst (MulReductionVI src1 src2));
13399 ins_cost(INSN_COST);
13400 effect(TEMP tmp, TEMP tmp2, TEMP dst);
13401 format %{ "ins $tmp, $src2, 0, 1\n\t"
13402 "mul $tmp, $tmp, $src2\n\t"
13403 "umov $tmp2, $tmp, S, 0\n\t"
13404 "mul $dst, $tmp2, $src1\n\t"
13405 "umov $tmp2, $tmp, S, 1\n\t"
13406 "mul $dst, $tmp2, $dst\t mul reduction4i\n\t"
13407 %}
13408 ins_encode %{
13409 __ ins(as_FloatRegister($tmp$$reg), __ D,
13410 as_FloatRegister($src2$$reg), 0, 1);
13411 __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
13412 as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
13413 __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13414 __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
13415 __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
13416 __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
13417 %}
13418 ins_pipe(pipe_class_default);
13419 %}
13420
13421 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13422 %{
13423 match(Set dst (AddReductionVF src1 src2));
13424 ins_cost(INSN_COST);
13425 effect(TEMP tmp, TEMP dst);
13426 format %{ "fadds $dst, $src1, $src2\n\t"
13427 "ins $tmp, S, $src2, 0, 1\n\t"
13428 "fadds $dst, $dst, $tmp\n\t"
13429 "ins $tmp, S, $src2, 0, 2\n\t"
13430 "fadds $dst, $dst, $tmp\n\t"
13431 "ins $tmp, S, $src2, 0, 3\n\t"
13432 "fadds $dst, $dst, $tmp\t add reduction4f"
13433 %}
13434 ins_encode %{
13435 __ fadds(as_FloatRegister($dst$$reg),
13436 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13437 __ ins(as_FloatRegister($tmp$$reg), __ S,
13438 as_FloatRegister($src2$$reg), 0, 1);
13439 __ fadds(as_FloatRegister($dst$$reg),
13440 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13441 __ ins(as_FloatRegister($tmp$$reg), __ S,
13442 as_FloatRegister($src2$$reg), 0, 2);
13443 __ fadds(as_FloatRegister($dst$$reg),
13444 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13445 __ ins(as_FloatRegister($tmp$$reg), __ S,
13446 as_FloatRegister($src2$$reg), 0, 3);
13447 __ fadds(as_FloatRegister($dst$$reg),
13448 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13449 %}
13450 ins_pipe(pipe_class_default);
13451 %}
13452
13453 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13454 %{
13455 match(Set dst (MulReductionVF src1 src2));
13456 ins_cost(INSN_COST);
13457 effect(TEMP tmp, TEMP dst);
13458 format %{ "fmuls $dst, $src1, $src2\n\t"
13459 "ins $tmp, S, $src2, 0, 1\n\t"
13460 "fmuls $dst, $dst, $tmp\n\t"
13461 "ins $tmp, S, $src2, 0, 2\n\t"
13462 "fmuls $dst, $dst, $tmp\n\t"
13463 "ins $tmp, S, $src2, 0, 3\n\t"
13464 "fmuls $dst, $dst, $tmp\t add reduction4f"
13465 %}
13466 ins_encode %{
13467 __ fmuls(as_FloatRegister($dst$$reg),
13468 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13469 __ ins(as_FloatRegister($tmp$$reg), __ S,
13470 as_FloatRegister($src2$$reg), 0, 1);
13471 __ fmuls(as_FloatRegister($dst$$reg),
13472 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13473 __ ins(as_FloatRegister($tmp$$reg), __ S,
13474 as_FloatRegister($src2$$reg), 0, 2);
13475 __ fmuls(as_FloatRegister($dst$$reg),
13476 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13477 __ ins(as_FloatRegister($tmp$$reg), __ S,
13478 as_FloatRegister($src2$$reg), 0, 3);
13479 __ fmuls(as_FloatRegister($dst$$reg),
13480 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13481 %}
13482 ins_pipe(pipe_class_default);
13483 %}
13484
13485 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
13486 %{
13487 match(Set dst (AddReductionVD src1 src2));
13488 ins_cost(INSN_COST);
13489 effect(TEMP tmp, TEMP dst);
13490 format %{ "faddd $dst, $src1, $src2\n\t"
13491 "ins $tmp, D, $src2, 0, 1\n\t"
13492 "faddd $dst, $dst, $tmp\t add reduction2d"
13493 %}
13494 ins_encode %{
13495 __ faddd(as_FloatRegister($dst$$reg),
13496 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13497 __ ins(as_FloatRegister($tmp$$reg), __ D,
13498 as_FloatRegister($src2$$reg), 0, 1);
13499 __ faddd(as_FloatRegister($dst$$reg),
13500 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13501 %}
13502 ins_pipe(pipe_class_default);
13503 %}
13504
13505 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
13506 %{
13507 match(Set dst (MulReductionVD src1 src2));
13508 ins_cost(INSN_COST);
13509 effect(TEMP tmp, TEMP dst);
13510 format %{ "fmuld $dst, $src1, $src2\n\t"
13511 "ins $tmp, D, $src2, 0, 1\n\t"
13512 "fmuld $dst, $dst, $tmp\t add reduction2d"
13513 %}
13514 ins_encode %{
13515 __ fmuld(as_FloatRegister($dst$$reg),
13516 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13517 __ ins(as_FloatRegister($tmp$$reg), __ D,
13518 as_FloatRegister($src2$$reg), 0, 1);
13519 __ fmuld(as_FloatRegister($dst$$reg),
13520 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13521 %}
13522 ins_pipe(pipe_class_default);
13523 %}
13524
13525 // ====================VECTOR ARITHMETIC=======================================
13526
13527 // --------------------------------- ADD --------------------------------------
13528
13529 instruct vadd16B(vecX dst, vecX src1, vecX src2)
13530 %{
13531 match(Set dst (AddVB src1 src2));
13532 ins_cost(INSN_COST);
13533 format %{ "addv $dst,$src1,$src2\t# vector (16B)" %}
13534 ins_encode %{
13535 __ addv(as_FloatRegister($dst$$reg), __ T16B,
13536 as_FloatRegister($src1$$reg),
13537 as_FloatRegister($src2$$reg));
13538 %}
13539 ins_pipe(pipe_class_default);
13540 %}
13541
13542 instruct vadd8S(vecX dst, vecX src1, vecX src2)
13543 %{
13544 match(Set dst (AddVS src1 src2));
13545 ins_cost(INSN_COST);
13546 format %{ "addv $dst,$src1,$src2\t# vector (8H)" %}
13547 ins_encode %{
13548 __ addv(as_FloatRegister($dst$$reg), __ T8H,
13549 as_FloatRegister($src1$$reg),
13550 as_FloatRegister($src2$$reg));
13551 %}
13552 ins_pipe(pipe_class_default);
13553 %}
13554
13555 instruct vadd4I(vecX dst, vecX src1, vecX src2)
13556 %{
13557 match(Set dst (AddVI src1 src2));
13558 ins_cost(INSN_COST);
13559 format %{ "addv $dst,$src1,$src2\t# vector (4S)" %}
13560 ins_encode %{
13561 __ addv(as_FloatRegister($dst$$reg), __ T4S,
13562 as_FloatRegister($src1$$reg),
13563 as_FloatRegister($src2$$reg));
13564 %}
13565 ins_pipe(pipe_class_default);
13566 %}
13567
13568 instruct vadd2L(vecX dst, vecX src1, vecX src2)
13569 %{
13570 match(Set dst (AddVL src1 src2));
13571 ins_cost(INSN_COST);
13572 format %{ "addv $dst,$src1,$src2\t# vector (2L)" %}
13573 ins_encode %{
13574 __ addv(as_FloatRegister($dst$$reg), __ T2D,
13575 as_FloatRegister($src1$$reg),
13576 as_FloatRegister($src2$$reg));
13577 %}
13578 ins_pipe(pipe_class_default);
13579 %}
13580
13581 instruct vadd4F(vecX dst, vecX src1, vecX src2)
13582 %{
13583 match(Set dst (AddVF src1 src2));
13584 ins_cost(INSN_COST);
13585 format %{ "fadd $dst,$src1,$src2\t# vector (4S)" %}
13586 ins_encode %{
13587 __ fadd(as_FloatRegister($dst$$reg), __ T4S,
13588 as_FloatRegister($src1$$reg),
13589 as_FloatRegister($src2$$reg));
13590 %}
13591 ins_pipe(pipe_class_default);
13592 %}
13593
13594 instruct vadd2D(vecX dst, vecX src1, vecX src2)
13595 %{
13596 match(Set dst (AddVD src1 src2));
13597 ins_cost(INSN_COST);
13598 format %{ "fadd $dst,$src1,$src2\t# vector (2D)" %}
13599 ins_encode %{
13600 __ fadd(as_FloatRegister($dst$$reg), __ T2D,
13601 as_FloatRegister($src1$$reg),
13602 as_FloatRegister($src2$$reg));
13603 %}
13604 ins_pipe(pipe_class_default);
13605 %}
13606
13607 // --------------------------------- SUB --------------------------------------
13608
13609 instruct vsub16B(vecX dst, vecX src1, vecX src2)
13610 %{
13611 match(Set dst (SubVB src1 src2));
13612 ins_cost(INSN_COST);
13613 format %{ "subv $dst,$src1,$src2\t# vector (16B)" %}
13614 ins_encode %{
13615 __ subv(as_FloatRegister($dst$$reg), __ T16B,
13616 as_FloatRegister($src1$$reg),
13617 as_FloatRegister($src2$$reg));
13618 %}
13619 ins_pipe(pipe_class_default);
13620 %}
13621
13622 instruct vsub8S(vecX dst, vecX src1, vecX src2)
13623 %{
13624 match(Set dst (SubVS src1 src2));
13625 ins_cost(INSN_COST);
13626 format %{ "subv $dst,$src1,$src2\t# vector (8H)" %}
13627 ins_encode %{
13628 __ subv(as_FloatRegister($dst$$reg), __ T8H,
13629 as_FloatRegister($src1$$reg),
13630 as_FloatRegister($src2$$reg));
13631 %}
13632 ins_pipe(pipe_class_default);
13633 %}
13634
13635 instruct vsub4I(vecX dst, vecX src1, vecX src2)
13636 %{
13637 match(Set dst (SubVI src1 src2));
13638 ins_cost(INSN_COST);
13639 format %{ "subv $dst,$src1,$src2\t# vector (4S)" %}
13640 ins_encode %{
13641 __ subv(as_FloatRegister($dst$$reg), __ T4S,
13642 as_FloatRegister($src1$$reg),
13643 as_FloatRegister($src2$$reg));
13644 %}
13645 ins_pipe(pipe_class_default);
13646 %}
13647
13648 instruct vsub2L(vecX dst, vecX src1, vecX src2)
13649 %{
13650 match(Set dst (SubVL src1 src2));
13651 ins_cost(INSN_COST);
13652 format %{ "subv $dst,$src1,$src2\t# vector (2L)" %}
13653 ins_encode %{
13654 __ subv(as_FloatRegister($dst$$reg), __ T2D,
13655 as_FloatRegister($src1$$reg),
13656 as_FloatRegister($src2$$reg));
13657 %}
13658 ins_pipe(pipe_class_default);
13659 %}
13660
13661 instruct vsub4F(vecX dst, vecX src1, vecX src2)
13662 %{
13663 match(Set dst (SubVF src1 src2));
13664 ins_cost(INSN_COST);
13665 format %{ "fsub $dst,$src1,$src2\t# vector (4S)" %}
13666 ins_encode %{
13667 __ fsub(as_FloatRegister($dst$$reg), __ T4S,
13668 as_FloatRegister($src1$$reg),
13669 as_FloatRegister($src2$$reg));
13670 %}
13671 ins_pipe(pipe_class_default);
13672 %}
13673
13674 instruct vsub2D(vecX dst, vecX src1, vecX src2)
13675 %{
13676 match(Set dst (SubVD src1 src2));
13677 ins_cost(INSN_COST);
13678 format %{ "fsub $dst,$src1,$src2\t# vector (2D)" %}
13679 ins_encode %{
13680 __ fsub(as_FloatRegister($dst$$reg), __ T2D,
13681 as_FloatRegister($src1$$reg),
13682 as_FloatRegister($src2$$reg));
13683 %}
13684 ins_pipe(pipe_class_default);
13685 %}
13686
13687 // --------------------------------- MUL --------------------------------------
13688
13689 instruct vmul8S(vecX dst, vecX src1, vecX src2)
13690 %{
13691 match(Set dst (MulVS src1 src2));
13692 ins_cost(INSN_COST);
13693 format %{ "mulv $dst,$src1,$src2\t# vector (8H)" %}
13694 ins_encode %{
13695 __ mulv(as_FloatRegister($dst$$reg), __ T8H,
13696 as_FloatRegister($src1$$reg),
13697 as_FloatRegister($src2$$reg));
13698 %}
13699 ins_pipe(pipe_class_default);
13700 %}
13701
13702 instruct vmul4I(vecX dst, vecX src1, vecX src2)
13703 %{
13704 match(Set dst (MulVI src1 src2));
13705 ins_cost(INSN_COST);
13706 format %{ "mulv $dst,$src1,$src2\t# vector (4S)" %}
13707 ins_encode %{
13708 __ mulv(as_FloatRegister($dst$$reg), __ T4S,
13709 as_FloatRegister($src1$$reg),
13710 as_FloatRegister($src2$$reg));
13711 %}
13712 ins_pipe(pipe_class_default);
13713 %}
13714
13715 instruct vmul4F(vecX dst, vecX src1, vecX src2)
13716 %{
13717 match(Set dst (MulVF src1 src2));
13718 ins_cost(INSN_COST);
13719 format %{ "fmul $dst,$src1,$src2\t# vector (4S)" %}
13720 ins_encode %{
13721 __ fmul(as_FloatRegister($dst$$reg), __ T4S,
13722 as_FloatRegister($src1$$reg),
13723 as_FloatRegister($src2$$reg));
13724 %}
13725 ins_pipe(pipe_class_default);
13726 %}
13727
13728 instruct vmul2D(vecX dst, vecX src1, vecX src2)
13729 %{
13730 match(Set dst (MulVD src1 src2));
13731 ins_cost(INSN_COST);
13732 format %{ "fmul $dst,$src1,$src2\t# vector (2D)" %}
13733 ins_encode %{
13734 __ fmul(as_FloatRegister($dst$$reg), __ T2D,
13735 as_FloatRegister($src1$$reg),
13736 as_FloatRegister($src2$$reg));
13737 %}
13738 ins_pipe(pipe_class_default);
13739 %}
13740
13741 // --------------------------------- DIV --------------------------------------
13742
13743 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
13744 %{
13745 match(Set dst (DivVF src1 src2));
13746 ins_cost(INSN_COST);
13747 format %{ "fdiv $dst,$src1,$src2\t# vector (4S)" %}
13748 ins_encode %{
13749 __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
13750 as_FloatRegister($src1$$reg),
13751 as_FloatRegister($src2$$reg));
13752 %}
13753 ins_pipe(pipe_class_default);
13754 %}
13755
13756 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
13757 %{
13758 match(Set dst (DivVD src1 src2));
13759 ins_cost(INSN_COST);
13760 format %{ "fdiv $dst,$src1,$src2\t# vector (2D)" %}
13761 ins_encode %{
13762 __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
13763 as_FloatRegister($src1$$reg),
13764 as_FloatRegister($src2$$reg));
13765 %}
13766 ins_pipe(pipe_class_default);
13767 %}
13768
13769 // --------------------------------- AND --------------------------------------
13770
13771 instruct vand16B(vecX dst, vecX src1, vecX src2)
13772 %{
13773 match(Set dst (AndV src1 src2));
13774 ins_cost(INSN_COST);
13775 format %{ "and $dst,$src1,$src2\t# vector (16B)" %}
13776 ins_encode %{
13777 __ andr(as_FloatRegister($dst$$reg), __ T16B,
13778 as_FloatRegister($src1$$reg),
13779 as_FloatRegister($src2$$reg));
13780 %}
13781 ins_pipe(pipe_class_default);
13782 %}
13783
13784 // --------------------------------- OR ---------------------------------------
13785
13786 instruct vor16B(vecX dst, vecX src1, vecX src2)
13787 %{
13788 match(Set dst (OrV src1 src2));
13789 ins_cost(INSN_COST);
13790 format %{ "orr $dst,$src1,$src2\t# vector (16B)" %}
13791 ins_encode %{
13792 __ orr(as_FloatRegister($dst$$reg), __ T16B,
13793 as_FloatRegister($src1$$reg),
13794 as_FloatRegister($src2$$reg));
13795 %}
13796 ins_pipe(pipe_class_default);
13797 %}
13798
13799 // --------------------------------- XOR --------------------------------------
13800
13801 instruct vxor16B(vecX dst, vecX src1, vecX src2)
13802 %{
13803 match(Set dst (XorV src1 src2));
13804 ins_cost(INSN_COST);
13805 format %{ "xor $dst,$src1,$src2\t# vector (16B)" %}
13806 ins_encode %{
13807 __ eor(as_FloatRegister($dst$$reg), __ T16B,
13808 as_FloatRegister($src1$$reg),
13809 as_FloatRegister($src2$$reg));
13810 %}
13811 ins_pipe(pipe_class_default);
13812 %}
13813
13814 // ------------------------------ Shift ---------------------------------------
13815
13816 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
13817 match(Set dst (LShiftCntV cnt));
13818 format %{ "dup $dst, $cnt\t# shift count (vecX)" %}
13819 ins_encode %{
13820 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
13821 %}
13822 ins_pipe(pipe_class_default);
13823 %}
13824
13825 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
13826 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
13827 match(Set dst (RShiftCntV cnt));
13828 format %{ "dup $dst, $cnt\t# shift count (vecX)\n\tneg $dst, $dst\t T16B" %}
13829 ins_encode %{
13830 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
13831 __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
13832 %}
13833 ins_pipe(pipe_class_default);
13834 %}
13835
13836 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
13837 match(Set dst (LShiftVB src shift));
13838 match(Set dst (RShiftVB src shift));
13839 ins_cost(INSN_COST);
13840 format %{ "sshl $dst,$src,$shift\t# vector (16B)" %}
13841 ins_encode %{
13842 __ sshl(as_FloatRegister($dst$$reg), __ T16B,
13843 as_FloatRegister($src$$reg),
13844 as_FloatRegister($shift$$reg));
13845 %}
13846 ins_pipe(pipe_class_default);
13847 %}
13848
13849 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
13850 match(Set dst (URShiftVB src shift));
13851 ins_cost(INSN_COST);
13852 format %{ "ushl $dst,$src,$shift\t# vector (16B)" %}
13853 ins_encode %{
13854 __ ushl(as_FloatRegister($dst$$reg), __ T16B,
13855 as_FloatRegister($src$$reg),
13856 as_FloatRegister($shift$$reg));
13857 %}
13858 ins_pipe(pipe_class_default);
13859 %}
13860
13861 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
13862 match(Set dst (LShiftVB src shift));
13863 ins_cost(INSN_COST);
13864 format %{ "shl $dst, $src, $shift\t# vector (16B)" %}
13865 ins_encode %{
13866 int sh = (int)$shift$$constant & 31;
13867 if (sh >= 8) {
13868 __ eor(as_FloatRegister($dst$$reg), __ T16B,
13869 as_FloatRegister($src$$reg),
13870 as_FloatRegister($src$$reg));
13871 } else {
13872 __ shl(as_FloatRegister($dst$$reg), __ T16B,
13873 as_FloatRegister($src$$reg), sh);
13874 }
13875 %}
13876 ins_pipe(pipe_class_default);
13877 %}
13878
13879 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
13880 match(Set dst (RShiftVB src shift));
13881 ins_cost(INSN_COST);
13882 format %{ "sshr $dst, $src, $shift\t# vector (16B)" %}
13883 ins_encode %{
13884 int sh = (int)$shift$$constant & 31;
13885 if (sh >= 8) sh = 7;
13886 sh = -sh & 7;
13887 __ sshr(as_FloatRegister($dst$$reg), __ T16B,
13888 as_FloatRegister($src$$reg), sh);
13889 %}
13890 ins_pipe(pipe_class_default);
13891 %}
13892
13893 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
13894 match(Set dst (URShiftVB src shift));
13895 ins_cost(INSN_COST);
13896 format %{ "ushr $dst, $src, $shift\t# vector (16B)" %}
13897 ins_encode %{
13898 int sh = (int)$shift$$constant & 31;
13899 if (sh >= 8) {
13900 __ eor(as_FloatRegister($dst$$reg), __ T16B,
13901 as_FloatRegister($src$$reg),
13902 as_FloatRegister($src$$reg));
13903 } else {
13904 __ ushr(as_FloatRegister($dst$$reg), __ T16B,
13905 as_FloatRegister($src$$reg), -sh & 7);
13906 }
13907 %}
13908 ins_pipe(pipe_class_default);
13909 %}
13910
13911 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
13912 match(Set dst (LShiftVS src shift));
13913 match(Set dst (RShiftVS src shift));
13914 ins_cost(INSN_COST);
13915 format %{ "sshl $dst,$src,$shift\t# vector (8H)" %}
13916 ins_encode %{
13917 __ sshl(as_FloatRegister($dst$$reg), __ T8H,
13918 as_FloatRegister($src$$reg),
13919 as_FloatRegister($shift$$reg));
13920 %}
13921 ins_pipe(pipe_class_default);
13922 %}
13923
13924 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
13925 match(Set dst (URShiftVS src shift));
13926 ins_cost(INSN_COST);
13927 format %{ "ushl $dst,$src,$shift\t# vector (8H)" %}
13928 ins_encode %{
13929 __ ushl(as_FloatRegister($dst$$reg), __ T8H,
13930 as_FloatRegister($src$$reg),
13931 as_FloatRegister($shift$$reg));
13932 %}
13933 ins_pipe(pipe_class_default);
13934 %}
13935
13936 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
13937 match(Set dst (LShiftVS src shift));
13938 ins_cost(INSN_COST);
13939 format %{ "shl $dst, $src, $shift\t# vector (8H)" %}
13940 ins_encode %{
13941 int sh = (int)$shift$$constant & 31;
13942 if (sh >= 16) {
13943 __ eor(as_FloatRegister($dst$$reg), __ T16B,
13944 as_FloatRegister($src$$reg),
13945 as_FloatRegister($src$$reg));
13946 } else {
13947 __ shl(as_FloatRegister($dst$$reg), __ T8H,
13948 as_FloatRegister($src$$reg), sh);
13949 }
13950 %}
13951 ins_pipe(pipe_class_default);
13952 %}
13953
13954 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
13955 match(Set dst (RShiftVS src shift));
13956 ins_cost(INSN_COST);
13957 format %{ "sshr $dst, $src, $shift\t# vector (8H)" %}
13958 ins_encode %{
13959 int sh = (int)$shift$$constant & 31;
13960 if (sh >= 16) sh = 15;
13961 sh = -sh & 15;
13962 __ sshr(as_FloatRegister($dst$$reg), __ T8H,
13963 as_FloatRegister($src$$reg), sh);
13964 %}
13965 ins_pipe(pipe_class_default);
13966 %}
13967
13968 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
13969 match(Set dst (URShiftVS src shift));
13970 ins_cost(INSN_COST);
13971 format %{ "ushr $dst, $src, $shift\t# vector (8H)" %}
13972 ins_encode %{
13973 int sh = (int)$shift$$constant & 31;
13974 if (sh >= 16) {
13975 __ eor(as_FloatRegister($dst$$reg), __ T16B,
13976 as_FloatRegister($src$$reg),
13977 as_FloatRegister($src$$reg));
13978 } else {
13979 __ ushr(as_FloatRegister($dst$$reg), __ T8H,
13980 as_FloatRegister($src$$reg), -sh & 15);
13981 }
13982 %}
13983 ins_pipe(pipe_class_default);
13984 %}
13985
13986 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
13987 match(Set dst (LShiftVI src shift));
13988 match(Set dst (RShiftVI src shift));
13989 ins_cost(INSN_COST);
13990 format %{ "sshl $dst,$src,$shift\t# vector (4S)" %}
13991 ins_encode %{
13992 __ sshl(as_FloatRegister($dst$$reg), __ T4S,
13993 as_FloatRegister($src$$reg),
13994 as_FloatRegister($shift$$reg));
13995 %}
13996 ins_pipe(pipe_class_default);
13997 %}
13998
13999 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
14000 match(Set dst (URShiftVI src shift));
14001 ins_cost(INSN_COST);
14002 format %{ "ushl $dst,$src,$shift\t# vector (4S)" %}
14003 ins_encode %{
14004 __ ushl(as_FloatRegister($dst$$reg), __ T4S,
14005 as_FloatRegister($src$$reg),
14006 as_FloatRegister($shift$$reg));
14007 %}
14008 ins_pipe(pipe_class_default);
14009 %}
14010
14011 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
14012 match(Set dst (LShiftVI src shift));
14013 ins_cost(INSN_COST);
14014 format %{ "shl $dst, $src, $shift\t# vector (4S)" %}
14015 ins_encode %{
14016 __ shl(as_FloatRegister($dst$$reg), __ T4S,
14017 as_FloatRegister($src$$reg),
14018 (int)$shift$$constant & 31);
14019 %}
14020 ins_pipe(pipe_class_default);
14021 %}
14022
14023 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
14024 match(Set dst (RShiftVI src shift));
14025 ins_cost(INSN_COST);
14026 format %{ "sshr $dst, $src, $shift\t# vector (4S)" %}
14027 ins_encode %{
14028 __ sshr(as_FloatRegister($dst$$reg), __ T4S,
14029 as_FloatRegister($src$$reg),
14030 -(int)$shift$$constant & 31);
14031 %}
14032 ins_pipe(pipe_class_default);
14033 %}
14034
14035 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
14036 match(Set dst (URShiftVI src shift));
14037 ins_cost(INSN_COST);
14038 format %{ "ushr $dst, $src, $shift\t# vector (4S)" %}
14039 ins_encode %{
14040 __ ushr(as_FloatRegister($dst$$reg), __ T4S,
14041 as_FloatRegister($src$$reg),
14042 -(int)$shift$$constant & 31);
14043 %}
14044 ins_pipe(pipe_class_default);
14045 %}
14046
14047 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
14048 match(Set dst (LShiftVL src shift));
14049 match(Set dst (RShiftVL src shift));
14050 ins_cost(INSN_COST);
14051 format %{ "sshl $dst,$src,$shift\t# vector (2D)" %}
14052 ins_encode %{
14053 __ sshl(as_FloatRegister($dst$$reg), __ T2D,
14054 as_FloatRegister($src$$reg),
14055 as_FloatRegister($shift$$reg));
14056 %}
14057 ins_pipe(pipe_class_default);
14058 %}
14059
14060 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
14061 match(Set dst (URShiftVL src shift));
14062 ins_cost(INSN_COST);
14063 format %{ "ushl $dst,$src,$shift\t# vector (2D)" %}
14064 ins_encode %{
14065 __ ushl(as_FloatRegister($dst$$reg), __ T2D,
14066 as_FloatRegister($src$$reg),
14067 as_FloatRegister($shift$$reg));
14068 %}
14069 ins_pipe(pipe_class_default);
14070 %}
14071
14072 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
14073 match(Set dst (LShiftVL src shift));
14074 ins_cost(INSN_COST);
14075 format %{ "shl $dst, $src, $shift\t# vector (2D)" %}
14076 ins_encode %{
14077 __ shl(as_FloatRegister($dst$$reg), __ T2D,
14078 as_FloatRegister($src$$reg),
14079 (int)$shift$$constant & 63);
14080 %}
14081 ins_pipe(pipe_class_default);
14082 %}
14083
14084 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
14085 match(Set dst (RShiftVL src shift));
14086 ins_cost(INSN_COST);
14087 format %{ "sshr $dst, $src, $shift\t# vector (2D)" %}
14088 ins_encode %{
14089 __ sshr(as_FloatRegister($dst$$reg), __ T2D,
14090 as_FloatRegister($src$$reg),
14091 -(int)$shift$$constant & 63);
14092 %}
14093 ins_pipe(pipe_class_default);
14094 %}
14095
14096 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
14097 match(Set dst (URShiftVL src shift));
14098 ins_cost(INSN_COST);
14099 format %{ "ushr $dst, $src, $shift\t# vector (2D)" %}
14100 ins_encode %{
14101 __ ushr(as_FloatRegister($dst$$reg), __ T2D,
14102 as_FloatRegister($src$$reg),
14103 -(int)$shift$$constant & 63);
14104 %}
14105 ins_pipe(pipe_class_default);
14106 %}
14107
14108 //----------PEEPHOLE RULES-----------------------------------------------------
14109 // These must follow all instruction definitions as they use the names
14110 // defined in the instructions definitions.
14111 //
14112 // peepmatch ( root_instr_name [preceding_instruction]* );
14113 //
14114 // peepconstraint %{
14115 // (instruction_number.operand_name relational_op instruction_number.operand_name
14116 // [, ...] );
14117 // // instruction numbers are zero-based using left to right order in peepmatch
14118 //
14119 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
14120 // // provide an instruction_number.operand_name for each operand that appears
14121 // // in the replacement instruction's match rule
14122 //
14123 // ---------VM FLAGS---------------------------------------------------------
14124 //
14125 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14126 //
|