Print this page
rev 1025 : imported patch indy.compiler.patch
Split |
Close |
Expand all |
Collapse all |
--- old/src/cpu/x86/vm/x86_32.ad
+++ new/src/cpu/x86/vm/x86_32.ad
1 1 //
2 2 // Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
3 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 //
5 5 // This code is free software; you can redistribute it and/or modify it
6 6 // under the terms of the GNU General Public License version 2 only, as
7 7 // published by the Free Software Foundation.
8 8 //
9 9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 // version 2 for more details (a copy is included in the LICENSE file that
13 13 // accompanied this code).
14 14 //
15 15 // You should have received a copy of the GNU General Public License version
16 16 // 2 along with this work; if not, write to the Free Software Foundation,
17 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 //
19 19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 20 // CA 95054 USA or visit www.sun.com if you need additional information or
21 21 // have any questions.
22 22 //
23 23 //
24 24
25 25 // X86 Architecture Description File
26 26
27 27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 28 // This information is used by the matcher and the register allocator to
29 29 // describe individual registers and classes of registers within the target
30 30 // archtecture.
31 31
32 32 register %{
33 33 //----------Architecture Description Register Definitions----------------------
34 34 // General Registers
35 35 // "reg_def" name ( register save type, C convention save type,
36 36 // ideal register type, encoding );
37 37 // Register Save Types:
38 38 //
39 39 // NS = No-Save: The register allocator assumes that these registers
40 40 // can be used without saving upon entry to the method, &
41 41 // that they do not need to be saved at call sites.
42 42 //
43 43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 44 // can be used without saving upon entry to the method,
45 45 // but that they must be saved at call sites.
46 46 //
47 47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 48 // must be saved before using them upon entry to the
49 49 // method, but they do not need to be saved at call
50 50 // sites.
51 51 //
52 52 // AS = Always-Save: The register allocator assumes that these registers
53 53 // must be saved before using them upon entry to the
54 54 // method, & that they must be saved at call sites.
55 55 //
56 56 // Ideal Register Type is used to determine how to save & restore a
57 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 59 //
60 60 // The encoding number is the actual bit-pattern placed into the opcodes.
61 61
62 62 // General Registers
63 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
64 64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
65 65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
66 66
67 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
76 76
77 77 // Special Registers
78 78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
79 79
80 80 // Float registers. We treat TOS/FPR0 special. It is invisible to the
81 81 // allocator, and only shows up in the encodings.
82 82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
83 83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
84 84 // Ok so here's the trick FPR1 is really st(0) except in the midst
85 85 // of emission of assembly for a machnode. During the emission the fpu stack
86 86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
87 87 // the stack will not have this element so FPR1 == st(0) from the
88 88 // oopMap viewpoint. This same weirdness with numbering causes
89 89 // instruction encoding to have to play games with the register
90 90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
91 91 // where it does flt->flt moves to see an example
92 92 //
93 93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
94 94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
95 95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
96 96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
97 97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
98 98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
99 99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
100 100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
101 101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
102 102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
103 103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
104 104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
105 105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
106 106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
107 107
108 108 // XMM registers. 128-bit registers or 4 words each, labeled a-d.
109 109 // Word a in each register holds a Float, words ab hold a Double.
110 110 // We currently do not use the SIMD capabilities, so registers cd
111 111 // are unused at the moment.
112 112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
113 113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
114 114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
115 115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
116 116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
117 117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
118 118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
119 119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
120 120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
121 121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
122 122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
123 123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
124 124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
125 125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
126 126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
127 127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
128 128
129 129 // Specify priority of register selection within phases of register
130 130 // allocation. Highest priority is first. A useful heuristic is to
131 131 // give registers a low priority when they are required by machine
132 132 // instructions, like EAX and EDX. Registers which are used as
133 133 // pairs must fall on an even boundary (witness the FPR#L's in this list).
134 134 // For the Intel integer registers, the equivalent Long pairs are
135 135 // EDX:EAX, EBX:ECX, and EDI:EBP.
136 136 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
137 137 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
138 138 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
139 139 FPR6L, FPR6H, FPR7L, FPR7H );
140 140
141 141 alloc_class chunk1( XMM0a, XMM0b,
142 142 XMM1a, XMM1b,
143 143 XMM2a, XMM2b,
144 144 XMM3a, XMM3b,
145 145 XMM4a, XMM4b,
146 146 XMM5a, XMM5b,
147 147 XMM6a, XMM6b,
148 148 XMM7a, XMM7b, EFLAGS);
149 149
150 150
151 151 //----------Architecture Description Register Classes--------------------------
152 152 // Several register classes are automatically defined based upon information in
153 153 // this architecture description.
154 154 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
155 155 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )
156 156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
157 157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
158 158 //
159 159 // Class for all registers
160 160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
161 161 // Class for general registers
162 162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
163 163 // Class for general registers which may be used for implicit null checks on win95
164 164 // Also safe for use by tailjump. We don't want to allocate in rbp,
165 165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
166 166 // Class of "X" registers
167 167 reg_class x_reg(EBX, ECX, EDX, EAX);
168 168 // Class of registers that can appear in an address with no offset.
169 169 // EBP and ESP require an extra instruction byte for zero offset.
170 170 // Used in fast-unlock
171 171 reg_class p_reg(EDX, EDI, ESI, EBX);
172 172 // Class for general registers not including ECX
173 173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
174 174 // Class for general registers not including EAX
175 175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
176 176 // Class for general registers not including EAX or EBX.
177 177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
178 178 // Class of EAX (for multiply and divide operations)
179 179 reg_class eax_reg(EAX);
180 180 // Class of EBX (for atomic add)
181 181 reg_class ebx_reg(EBX);
182 182 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
183 183 reg_class ecx_reg(ECX);
184 184 // Class of EDX (for multiply and divide operations)
185 185 reg_class edx_reg(EDX);
186 186 // Class of EDI (for synchronization)
187 187 reg_class edi_reg(EDI);
188 188 // Class of ESI (for synchronization)
189 189 reg_class esi_reg(ESI);
190 190 // Singleton class for interpreter's stack pointer
191 191 reg_class ebp_reg(EBP);
192 192 // Singleton class for stack pointer
193 193 reg_class sp_reg(ESP);
194 194 // Singleton class for instruction pointer
195 195 // reg_class ip_reg(EIP);
196 196 // Singleton class for condition codes
197 197 reg_class int_flags(EFLAGS);
198 198 // Class of integer register pairs
199 199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
200 200 // Class of integer register pairs that aligns with calling convention
201 201 reg_class eadx_reg( EAX,EDX );
202 202 reg_class ebcx_reg( ECX,EBX );
203 203 // Not AX or DX, used in divides
204 204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
205 205
206 206 // Floating point registers. Notice FPR0 is not a choice.
207 207 // FPR0 is not ever allocated; we use clever encodings to fake
208 208 // a 2-address instructions out of Intels FP stack.
209 209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
210 210
211 211 // make a register class for SSE registers
212 212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
213 213
214 214 // make a double register class for SSE2 registers
215 215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
216 216 XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
217 217
218 218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
219 219 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
220 220 FPR7L,FPR7H );
221 221
222 222 reg_class flt_reg0( FPR1L );
223 223 reg_class dbl_reg0( FPR1L,FPR1H );
224 224 reg_class dbl_reg1( FPR2L,FPR2H );
225 225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
226 226 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
227 227
228 228 // XMM6 and XMM7 could be used as temporary registers for long, float and
229 229 // double values for SSE2.
230 230 reg_class xdb_reg6( XMM6a,XMM6b );
231 231 reg_class xdb_reg7( XMM7a,XMM7b );
232 232 %}
233 233
234 234
235 235 //----------SOURCE BLOCK-------------------------------------------------------
236 236 // This is a block of C++ code which provides values, functions, and
237 237 // definitions necessary in the rest of the architecture description
238 238 source %{
239 239 #define RELOC_IMM32 Assembler::imm_operand
240 240 #define RELOC_DISP32 Assembler::disp32_operand
241 241
242 242 #define __ _masm.
243 243
244 244 // How to find the high register of a Long pair, given the low register
245 245 #define HIGH_FROM_LOW(x) ((x)+2)
246 246
247 247 // These masks are used to provide 128-bit aligned bitmasks to the XMM
248 248 // instructions, to allow sign-masking or sign-bit flipping. They allow
249 249 // fast versions of NegF/NegD and AbsF/AbsD.
250 250
251 251 // Note: 'double' and 'long long' have 32-bits alignment on x86.
252 252 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
253 253 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
254 254 // of 128-bits operands for SSE instructions.
255 255 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
256 256 // Store the value to a 128-bits operand.
257 257 operand[0] = lo;
258 258 operand[1] = hi;
259 259 return operand;
260 260 }
↓ open down ↓ |
260 lines elided |
↑ open up ↑ |
261 261
262 262 // Buffer for 128-bits masks used by SSE instructions.
263 263 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
264 264
265 265 // Static initialization during VM startup.
266 266 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
267 267 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
268 268 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
269 269 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
270 270
271 +// Offset hacking within calls.
272 +static int pre_call_FPU_size() {
273 + if (Compile::current()->in_24_bit_fp_mode())
274 + return 6; // fldcw
275 + return 0;
276 +}
277 +
278 +static int preserve_SP_size() {
279 + return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg)
280 +}
281 +
271 282 // !!!!! Special hack to get all type of calls to specify the byte offset
272 283 // from the start of the call to the point where the return address
273 284 // will point.
274 285 int MachCallStaticJavaNode::ret_addr_offset() {
275 - return 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0); // 5 bytes from start of call to where return address points
286 + int offset = 5 + pre_call_FPU_size(); // 5 bytes from start of call to where return address points
287 + if (_method_handle_invoke)
288 + offset += preserve_SP_size();
289 + return offset;
276 290 }
277 291
278 292 int MachCallDynamicJavaNode::ret_addr_offset() {
279 - return 10 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0); // 10 bytes from start of call to where return address points
293 + return 10 + pre_call_FPU_size(); // 10 bytes from start of call to where return address points
280 294 }
281 295
282 296 static int sizeof_FFree_Float_Stack_All = -1;
283 297
284 298 int MachCallRuntimeNode::ret_addr_offset() {
285 299 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
286 - return sizeof_FFree_Float_Stack_All + 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0);
300 + return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
287 301 }
288 302
289 303 // Indicate if the safepoint node needs the polling page as an input.
290 304 // Since x86 does have absolute addressing, it doesn't.
291 305 bool SafePointNode::needs_polling_address_input() {
292 306 return false;
293 307 }
294 308
295 309 //
296 310 // Compute padding required for nodes which need alignment
297 311 //
298 312
299 313 // The address of the call instruction needs to be 4-byte aligned to
300 314 // ensure that it does not span a cache line so that it can be patched.
301 315 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
302 - if (Compile::current()->in_24_bit_fp_mode())
303 - current_offset += 6; // skip fldcw in pre_call_FPU, if any
316 + current_offset += pre_call_FPU_size(); // skip fldcw, if any
317 + current_offset += 1; // skip call opcode byte
318 + return round_to(current_offset, alignment_required()) - current_offset;
319 +}
320 +
321 +// The address of the call instruction needs to be 4-byte aligned to
322 +// ensure that it does not span a cache line so that it can be patched.
323 +int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
324 + current_offset += pre_call_FPU_size(); // skip fldcw, if any
325 + current_offset += preserve_SP_size(); // skip mov rbp, rsp
304 326 current_offset += 1; // skip call opcode byte
305 327 return round_to(current_offset, alignment_required()) - current_offset;
306 328 }
307 329
308 330 // The address of the call instruction needs to be 4-byte aligned to
309 331 // ensure that it does not span a cache line so that it can be patched.
310 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
311 - if (Compile::current()->in_24_bit_fp_mode())
312 - current_offset += 6; // skip fldcw in pre_call_FPU, if any
333 + current_offset += pre_call_FPU_size(); // skip fldcw, if any
313 334 current_offset += 5; // skip MOV instruction
314 335 current_offset += 1; // skip call opcode byte
315 336 return round_to(current_offset, alignment_required()) - current_offset;
316 337 }
317 338
318 339 #ifndef PRODUCT
319 340 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
320 341 st->print("INT3");
321 342 }
322 343 #endif
323 344
324 345 // EMIT_RM()
325 346 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
326 347 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
327 348 *(cbuf.code_end()) = c;
328 349 cbuf.set_code_end(cbuf.code_end() + 1);
329 350 }
330 351
331 352 // EMIT_CC()
332 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
333 354 unsigned char c = (unsigned char)( f1 | f2 );
334 355 *(cbuf.code_end()) = c;
335 356 cbuf.set_code_end(cbuf.code_end() + 1);
336 357 }
337 358
338 359 // EMIT_OPCODE()
339 360 void emit_opcode(CodeBuffer &cbuf, int code) {
340 361 *(cbuf.code_end()) = (unsigned char)code;
341 362 cbuf.set_code_end(cbuf.code_end() + 1);
342 363 }
343 364
344 365 // EMIT_OPCODE() w/ relocation information
345 366 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
346 367 cbuf.relocate(cbuf.inst_mark() + offset, reloc);
347 368 emit_opcode(cbuf, code);
348 369 }
349 370
350 371 // EMIT_D8()
351 372 void emit_d8(CodeBuffer &cbuf, int d8) {
352 373 *(cbuf.code_end()) = (unsigned char)d8;
353 374 cbuf.set_code_end(cbuf.code_end() + 1);
354 375 }
355 376
356 377 // EMIT_D16()
357 378 void emit_d16(CodeBuffer &cbuf, int d16) {
358 379 *((short *)(cbuf.code_end())) = d16;
359 380 cbuf.set_code_end(cbuf.code_end() + 2);
360 381 }
361 382
362 383 // EMIT_D32()
363 384 void emit_d32(CodeBuffer &cbuf, int d32) {
364 385 *((int *)(cbuf.code_end())) = d32;
365 386 cbuf.set_code_end(cbuf.code_end() + 4);
366 387 }
367 388
368 389 // emit 32 bit value and construct relocation entry from relocInfo::relocType
369 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
370 391 int format) {
371 392 cbuf.relocate(cbuf.inst_mark(), reloc, format);
372 393
373 394 *((int *)(cbuf.code_end())) = d32;
374 395 cbuf.set_code_end(cbuf.code_end() + 4);
375 396 }
376 397
377 398 // emit 32 bit value and construct relocation entry from RelocationHolder
378 399 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
379 400 int format) {
380 401 #ifdef ASSERT
381 402 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
382 403 assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
383 404 }
384 405 #endif
385 406 cbuf.relocate(cbuf.inst_mark(), rspec, format);
386 407
387 408 *((int *)(cbuf.code_end())) = d32;
388 409 cbuf.set_code_end(cbuf.code_end() + 4);
389 410 }
390 411
391 412 // Access stack slot for load or store
392 413 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
393 414 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src])
394 415 if( -128 <= disp && disp <= 127 ) {
395 416 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte
396 417 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
397 418 emit_d8 (cbuf, disp); // Displacement // R/M byte
398 419 } else {
399 420 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte
400 421 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
401 422 emit_d32(cbuf, disp); // Displacement // R/M byte
402 423 }
403 424 }
404 425
405 426 // eRegI ereg, memory mem) %{ // emit_reg_mem
406 427 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
407 428 // There is no index & no scale, use form without SIB byte
408 429 if ((index == 0x4) &&
409 430 (scale == 0) && (base != ESP_enc)) {
410 431 // If no displacement, mode is 0x0; unless base is [EBP]
411 432 if ( (displace == 0) && (base != EBP_enc) ) {
412 433 emit_rm(cbuf, 0x0, reg_encoding, base);
413 434 }
414 435 else { // If 8-bit displacement, mode 0x1
415 436 if ((displace >= -128) && (displace <= 127)
416 437 && !(displace_is_oop) ) {
417 438 emit_rm(cbuf, 0x1, reg_encoding, base);
418 439 emit_d8(cbuf, displace);
419 440 }
420 441 else { // If 32-bit displacement
421 442 if (base == -1) { // Special flag for absolute address
422 443 emit_rm(cbuf, 0x0, reg_encoding, 0x5);
423 444 // (manual lies; no SIB needed here)
424 445 if ( displace_is_oop ) {
425 446 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
426 447 } else {
427 448 emit_d32 (cbuf, displace);
428 449 }
429 450 }
430 451 else { // Normal base + offset
431 452 emit_rm(cbuf, 0x2, reg_encoding, base);
432 453 if ( displace_is_oop ) {
433 454 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
434 455 } else {
435 456 emit_d32 (cbuf, displace);
436 457 }
437 458 }
438 459 }
439 460 }
440 461 }
441 462 else { // Else, encode with the SIB byte
442 463 // If no displacement, mode is 0x0; unless base is [EBP]
443 464 if (displace == 0 && (base != EBP_enc)) { // If no displacement
444 465 emit_rm(cbuf, 0x0, reg_encoding, 0x4);
445 466 emit_rm(cbuf, scale, index, base);
446 467 }
447 468 else { // If 8-bit displacement, mode 0x1
448 469 if ((displace >= -128) && (displace <= 127)
449 470 && !(displace_is_oop) ) {
450 471 emit_rm(cbuf, 0x1, reg_encoding, 0x4);
451 472 emit_rm(cbuf, scale, index, base);
452 473 emit_d8(cbuf, displace);
453 474 }
454 475 else { // If 32-bit displacement
455 476 if (base == 0x04 ) {
456 477 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
457 478 emit_rm(cbuf, scale, index, 0x04);
458 479 } else {
459 480 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
460 481 emit_rm(cbuf, scale, index, base);
461 482 }
462 483 if ( displace_is_oop ) {
463 484 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
464 485 } else {
465 486 emit_d32 (cbuf, displace);
466 487 }
467 488 }
468 489 }
469 490 }
470 491 }
471 492
472 493
473 494 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
474 495 if( dst_encoding == src_encoding ) {
475 496 // reg-reg copy, use an empty encoding
476 497 } else {
477 498 emit_opcode( cbuf, 0x8B );
478 499 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
479 500 }
480 501 }
481 502
482 503 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
483 504 if( dst_encoding == src_encoding ) {
484 505 // reg-reg copy, use an empty encoding
485 506 } else {
486 507 MacroAssembler _masm(&cbuf);
487 508
488 509 __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
489 510 }
490 511 }
491 512
492 513
493 514 //=============================================================================
494 515 #ifndef PRODUCT
495 516 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
496 517 Compile* C = ra_->C;
497 518 if( C->in_24_bit_fp_mode() ) {
498 519 st->print("FLDCW 24 bit fpu control word");
499 520 st->print_cr(""); st->print("\t");
500 521 }
501 522
502 523 int framesize = C->frame_slots() << LogBytesPerInt;
503 524 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
504 525 // Remove two words for return addr and rbp,
505 526 framesize -= 2*wordSize;
506 527
507 528 // Calls to C2R adapters often do not accept exceptional returns.
508 529 // We require that their callers must bang for them. But be careful, because
509 530 // some VM calls (such as call site linkage) can use several kilobytes of
510 531 // stack. But the stack safety zone should account for that.
511 532 // See bugs 4446381, 4468289, 4497237.
512 533 if (C->need_stack_bang(framesize)) {
513 534 st->print_cr("# stack bang"); st->print("\t");
514 535 }
515 536 st->print_cr("PUSHL EBP"); st->print("\t");
516 537
517 538 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
518 539 st->print("PUSH 0xBADB100D\t# Majik cookie for stack depth check");
519 540 st->print_cr(""); st->print("\t");
520 541 framesize -= wordSize;
521 542 }
522 543
523 544 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
524 545 if (framesize) {
525 546 st->print("SUB ESP,%d\t# Create frame",framesize);
526 547 }
527 548 } else {
528 549 st->print("SUB ESP,%d\t# Create frame",framesize);
529 550 }
530 551 }
531 552 #endif
532 553
533 554
534 555 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
535 556 Compile* C = ra_->C;
536 557
537 558 if (UseSSE >= 2 && VerifyFPU) {
538 559 MacroAssembler masm(&cbuf);
539 560 masm.verify_FPU(0, "FPU stack must be clean on entry");
540 561 }
541 562
542 563 // WARNING: Initial instruction MUST be 5 bytes or longer so that
543 564 // NativeJump::patch_verified_entry will be able to patch out the entry
544 565 // code safely. The fldcw is ok at 6 bytes, the push to verify stack
545 566 // depth is ok at 5 bytes, the frame allocation can be either 3 or
546 567 // 6 bytes. So if we don't do the fldcw or the push then we must
547 568 // use the 6 byte frame allocation even if we have no frame. :-(
548 569 // If method sets FPU control word do it now
549 570 if( C->in_24_bit_fp_mode() ) {
550 571 MacroAssembler masm(&cbuf);
551 572 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
552 573 }
553 574
554 575 int framesize = C->frame_slots() << LogBytesPerInt;
555 576 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
556 577 // Remove two words for return addr and rbp,
557 578 framesize -= 2*wordSize;
558 579
559 580 // Calls to C2R adapters often do not accept exceptional returns.
560 581 // We require that their callers must bang for them. But be careful, because
561 582 // some VM calls (such as call site linkage) can use several kilobytes of
562 583 // stack. But the stack safety zone should account for that.
563 584 // See bugs 4446381, 4468289, 4497237.
564 585 if (C->need_stack_bang(framesize)) {
565 586 MacroAssembler masm(&cbuf);
566 587 masm.generate_stack_overflow_check(framesize);
567 588 }
568 589
569 590 // We always push rbp, so that on return to interpreter rbp, will be
570 591 // restored correctly and we can correct the stack.
571 592 emit_opcode(cbuf, 0x50 | EBP_enc);
572 593
573 594 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
574 595 emit_opcode(cbuf, 0x68); // push 0xbadb100d
575 596 emit_d32(cbuf, 0xbadb100d);
576 597 framesize -= wordSize;
577 598 }
578 599
579 600 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
580 601 if (framesize) {
581 602 emit_opcode(cbuf, 0x83); // sub SP,#framesize
582 603 emit_rm(cbuf, 0x3, 0x05, ESP_enc);
583 604 emit_d8(cbuf, framesize);
584 605 }
585 606 } else {
586 607 emit_opcode(cbuf, 0x81); // sub SP,#framesize
587 608 emit_rm(cbuf, 0x3, 0x05, ESP_enc);
588 609 emit_d32(cbuf, framesize);
589 610 }
590 611 C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
591 612
592 613 #ifdef ASSERT
593 614 if (VerifyStackAtCalls) {
594 615 Label L;
595 616 MacroAssembler masm(&cbuf);
596 617 masm.push(rax);
597 618 masm.mov(rax, rsp);
598 619 masm.andptr(rax, StackAlignmentInBytes-1);
599 620 masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
600 621 masm.pop(rax);
601 622 masm.jcc(Assembler::equal, L);
602 623 masm.stop("Stack is not properly aligned!");
603 624 masm.bind(L);
604 625 }
605 626 #endif
606 627
607 628 }
608 629
609 630 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
610 631 return MachNode::size(ra_); // too many variables; just compute it the hard way
611 632 }
612 633
613 634 int MachPrologNode::reloc() const {
614 635 return 0; // a large enough number
615 636 }
616 637
617 638 //=============================================================================
618 639 #ifndef PRODUCT
619 640 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
620 641 Compile *C = ra_->C;
621 642 int framesize = C->frame_slots() << LogBytesPerInt;
622 643 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
623 644 // Remove two words for return addr and rbp,
624 645 framesize -= 2*wordSize;
625 646
626 647 if( C->in_24_bit_fp_mode() ) {
627 648 st->print("FLDCW standard control word");
628 649 st->cr(); st->print("\t");
629 650 }
630 651 if( framesize ) {
631 652 st->print("ADD ESP,%d\t# Destroy frame",framesize);
632 653 st->cr(); st->print("\t");
633 654 }
634 655 st->print_cr("POPL EBP"); st->print("\t");
635 656 if( do_polling() && C->is_method_compilation() ) {
636 657 st->print("TEST PollPage,EAX\t! Poll Safepoint");
637 658 st->cr(); st->print("\t");
638 659 }
639 660 }
640 661 #endif
641 662
642 663 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
643 664 Compile *C = ra_->C;
644 665
645 666 // If method set FPU control word, restore to standard control word
646 667 if( C->in_24_bit_fp_mode() ) {
647 668 MacroAssembler masm(&cbuf);
648 669 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
649 670 }
650 671
651 672 int framesize = C->frame_slots() << LogBytesPerInt;
652 673 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
653 674 // Remove two words for return addr and rbp,
654 675 framesize -= 2*wordSize;
655 676
656 677 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
657 678
658 679 if( framesize >= 128 ) {
659 680 emit_opcode(cbuf, 0x81); // add SP, #framesize
660 681 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
661 682 emit_d32(cbuf, framesize);
662 683 }
663 684 else if( framesize ) {
664 685 emit_opcode(cbuf, 0x83); // add SP, #framesize
665 686 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
666 687 emit_d8(cbuf, framesize);
667 688 }
668 689
669 690 emit_opcode(cbuf, 0x58 | EBP_enc);
670 691
671 692 if( do_polling() && C->is_method_compilation() ) {
672 693 cbuf.relocate(cbuf.code_end(), relocInfo::poll_return_type, 0);
673 694 emit_opcode(cbuf,0x85);
674 695 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
675 696 emit_d32(cbuf, (intptr_t)os::get_polling_page());
676 697 }
677 698 }
678 699
679 700 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
680 701 Compile *C = ra_->C;
681 702 // If method set FPU control word, restore to standard control word
682 703 int size = C->in_24_bit_fp_mode() ? 6 : 0;
683 704 if( do_polling() && C->is_method_compilation() ) size += 6;
684 705
685 706 int framesize = C->frame_slots() << LogBytesPerInt;
686 707 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
687 708 // Remove two words for return addr and rbp,
688 709 framesize -= 2*wordSize;
689 710
690 711 size++; // popl rbp,
691 712
692 713 if( framesize >= 128 ) {
693 714 size += 6;
694 715 } else {
695 716 size += framesize ? 3 : 0;
696 717 }
697 718 return size;
698 719 }
699 720
700 721 int MachEpilogNode::reloc() const {
701 722 return 0; // a large enough number
702 723 }
703 724
704 725 const Pipeline * MachEpilogNode::pipeline() const {
705 726 return MachNode::pipeline_class();
706 727 }
707 728
708 729 int MachEpilogNode::safepoint_offset() const { return 0; }
709 730
710 731 //=============================================================================
711 732
712 733 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
713 734 static enum RC rc_class( OptoReg::Name reg ) {
714 735
715 736 if( !OptoReg::is_valid(reg) ) return rc_bad;
716 737 if (OptoReg::is_stack(reg)) return rc_stack;
717 738
718 739 VMReg r = OptoReg::as_VMReg(reg);
719 740 if (r->is_Register()) return rc_int;
720 741 if (r->is_FloatRegister()) {
721 742 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
722 743 return rc_float;
723 744 }
724 745 assert(r->is_XMMRegister(), "must be");
725 746 return rc_xmm;
726 747 }
727 748
728 749 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
729 750 int opcode, const char *op_str, int size, outputStream* st ) {
730 751 if( cbuf ) {
731 752 emit_opcode (*cbuf, opcode );
732 753 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
733 754 #ifndef PRODUCT
734 755 } else if( !do_size ) {
735 756 if( size != 0 ) st->print("\n\t");
736 757 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
737 758 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
738 759 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
739 760 } else { // FLD, FST, PUSH, POP
740 761 st->print("%s [ESP + #%d]",op_str,offset);
741 762 }
742 763 #endif
743 764 }
744 765 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
745 766 return size+3+offset_size;
746 767 }
747 768
748 769 // Helper for XMM registers. Extra opcode bits, limited syntax.
749 770 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
750 771 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
751 772 if( cbuf ) {
752 773 if( reg_lo+1 == reg_hi ) { // double move?
753 774 if( is_load && !UseXmmLoadAndClearUpper )
754 775 emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
755 776 else
756 777 emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
757 778 } else {
758 779 emit_opcode(*cbuf, 0xF3 );
759 780 }
760 781 emit_opcode(*cbuf, 0x0F );
761 782 if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
762 783 emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load
763 784 else
764 785 emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
765 786 encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
766 787 #ifndef PRODUCT
767 788 } else if( !do_size ) {
768 789 if( size != 0 ) st->print("\n\t");
769 790 if( reg_lo+1 == reg_hi ) { // double move?
770 791 if( is_load ) st->print("%s %s,[ESP + #%d]",
771 792 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
772 793 Matcher::regName[reg_lo], offset);
773 794 else st->print("MOVSD [ESP + #%d],%s",
774 795 offset, Matcher::regName[reg_lo]);
775 796 } else {
776 797 if( is_load ) st->print("MOVSS %s,[ESP + #%d]",
777 798 Matcher::regName[reg_lo], offset);
778 799 else st->print("MOVSS [ESP + #%d],%s",
779 800 offset, Matcher::regName[reg_lo]);
780 801 }
781 802 #endif
782 803 }
783 804 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
784 805 return size+5+offset_size;
785 806 }
786 807
787 808
788 809 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
789 810 int src_hi, int dst_hi, int size, outputStream* st ) {
790 811 if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
791 812 if( cbuf ) {
792 813 if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
793 814 emit_opcode(*cbuf, 0x66 );
794 815 }
795 816 emit_opcode(*cbuf, 0x0F );
796 817 emit_opcode(*cbuf, 0x28 );
797 818 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
798 819 #ifndef PRODUCT
799 820 } else if( !do_size ) {
800 821 if( size != 0 ) st->print("\n\t");
801 822 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
802 823 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
803 824 } else {
804 825 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
805 826 }
806 827 #endif
807 828 }
808 829 return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
809 830 } else {
810 831 if( cbuf ) {
811 832 emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
812 833 emit_opcode(*cbuf, 0x0F );
813 834 emit_opcode(*cbuf, 0x10 );
814 835 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
815 836 #ifndef PRODUCT
816 837 } else if( !do_size ) {
817 838 if( size != 0 ) st->print("\n\t");
818 839 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
819 840 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
820 841 } else {
821 842 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
822 843 }
823 844 #endif
824 845 }
825 846 return size+4;
826 847 }
827 848 }
828 849
829 850 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
830 851 if( cbuf ) {
831 852 emit_opcode(*cbuf, 0x8B );
832 853 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
833 854 #ifndef PRODUCT
834 855 } else if( !do_size ) {
835 856 if( size != 0 ) st->print("\n\t");
836 857 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
837 858 #endif
838 859 }
839 860 return size+2;
840 861 }
841 862
842 863 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
843 864 int offset, int size, outputStream* st ) {
844 865 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
845 866 if( cbuf ) {
846 867 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
847 868 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
848 869 #ifndef PRODUCT
849 870 } else if( !do_size ) {
850 871 if( size != 0 ) st->print("\n\t");
851 872 st->print("FLD %s",Matcher::regName[src_lo]);
852 873 #endif
853 874 }
854 875 size += 2;
855 876 }
856 877
857 878 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
858 879 const char *op_str;
859 880 int op;
860 881 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
861 882 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
862 883 op = 0xDD;
863 884 } else { // 32-bit store
864 885 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
865 886 op = 0xD9;
866 887 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
867 888 }
868 889
869 890 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
870 891 }
871 892
872 893 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
873 894 // Get registers to move
874 895 OptoReg::Name src_second = ra_->get_reg_second(in(1));
875 896 OptoReg::Name src_first = ra_->get_reg_first(in(1));
876 897 OptoReg::Name dst_second = ra_->get_reg_second(this );
877 898 OptoReg::Name dst_first = ra_->get_reg_first(this );
878 899
879 900 enum RC src_second_rc = rc_class(src_second);
880 901 enum RC src_first_rc = rc_class(src_first);
881 902 enum RC dst_second_rc = rc_class(dst_second);
882 903 enum RC dst_first_rc = rc_class(dst_first);
883 904
884 905 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
885 906
886 907 // Generate spill code!
887 908 int size = 0;
888 909
889 910 if( src_first == dst_first && src_second == dst_second )
890 911 return size; // Self copy, no move
891 912
892 913 // --------------------------------------
893 914 // Check for mem-mem move. push/pop to move.
894 915 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
895 916 if( src_second == dst_first ) { // overlapping stack copy ranges
896 917 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
897 918 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
898 919 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
899 920 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
900 921 }
901 922 // move low bits
902 923 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
903 924 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
904 925 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
905 926 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
906 927 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
907 928 }
908 929 return size;
909 930 }
910 931
911 932 // --------------------------------------
912 933 // Check for integer reg-reg copy
913 934 if( src_first_rc == rc_int && dst_first_rc == rc_int )
914 935 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
915 936
916 937 // Check for integer store
917 938 if( src_first_rc == rc_int && dst_first_rc == rc_stack )
918 939 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
919 940
920 941 // Check for integer load
921 942 if( dst_first_rc == rc_int && src_first_rc == rc_stack )
922 943 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
923 944
924 945 // --------------------------------------
925 946 // Check for float reg-reg copy
926 947 if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
927 948 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
928 949 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
929 950 if( cbuf ) {
930 951
931 952 // Note the mucking with the register encode to compensate for the 0/1
932 953 // indexing issue mentioned in a comment in the reg_def sections
933 954 // for FPR registers many lines above here.
934 955
935 956 if( src_first != FPR1L_num ) {
936 957 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i)
937 958 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
938 959 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
939 960 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
940 961 } else {
941 962 emit_opcode (*cbuf, 0xDD ); // FST ST(i)
942 963 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
943 964 }
944 965 #ifndef PRODUCT
945 966 } else if( !do_size ) {
946 967 if( size != 0 ) st->print("\n\t");
947 968 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
948 969 else st->print( "FST %s", Matcher::regName[dst_first]);
949 970 #endif
950 971 }
951 972 return size + ((src_first != FPR1L_num) ? 2+2 : 2);
952 973 }
953 974
954 975 // Check for float store
955 976 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
956 977 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
957 978 }
958 979
959 980 // Check for float load
960 981 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
961 982 int offset = ra_->reg2offset(src_first);
962 983 const char *op_str;
963 984 int op;
964 985 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
965 986 op_str = "FLD_D";
966 987 op = 0xDD;
967 988 } else { // 32-bit load
968 989 op_str = "FLD_S";
969 990 op = 0xD9;
970 991 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
971 992 }
972 993 if( cbuf ) {
973 994 emit_opcode (*cbuf, op );
974 995 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
975 996 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
976 997 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
977 998 #ifndef PRODUCT
978 999 } else if( !do_size ) {
979 1000 if( size != 0 ) st->print("\n\t");
980 1001 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
981 1002 #endif
982 1003 }
983 1004 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
984 1005 return size + 3+offset_size+2;
985 1006 }
986 1007
987 1008 // Check for xmm reg-reg copy
988 1009 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
989 1010 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
990 1011 (src_first+1 == src_second && dst_first+1 == dst_second),
991 1012 "no non-adjacent float-moves" );
992 1013 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
993 1014 }
994 1015
995 1016 // Check for xmm store
996 1017 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
997 1018 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
998 1019 }
999 1020
1000 1021 // Check for float xmm load
1001 1022 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1002 1023 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1003 1024 }
1004 1025
1005 1026 // Copy from float reg to xmm reg
1006 1027 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1007 1028 // copy to the top of stack from floating point reg
1008 1029 // and use LEA to preserve flags
1009 1030 if( cbuf ) {
1010 1031 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8]
1011 1032 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1012 1033 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1013 1034 emit_d8(*cbuf,0xF8);
1014 1035 #ifndef PRODUCT
1015 1036 } else if( !do_size ) {
1016 1037 if( size != 0 ) st->print("\n\t");
1017 1038 st->print("LEA ESP,[ESP-8]");
1018 1039 #endif
1019 1040 }
1020 1041 size += 4;
1021 1042
1022 1043 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1023 1044
1024 1045 // Copy from the temp memory to the xmm reg.
1025 1046 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1026 1047
1027 1048 if( cbuf ) {
1028 1049 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
1029 1050 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1030 1051 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1031 1052 emit_d8(*cbuf,0x08);
1032 1053 #ifndef PRODUCT
1033 1054 } else if( !do_size ) {
1034 1055 if( size != 0 ) st->print("\n\t");
1035 1056 st->print("LEA ESP,[ESP+8]");
1036 1057 #endif
1037 1058 }
1038 1059 size += 4;
1039 1060 return size;
1040 1061 }
1041 1062
1042 1063 assert( size > 0, "missed a case" );
1043 1064
1044 1065 // --------------------------------------------------------------------
1045 1066 // Check for second bits still needing moving.
1046 1067 if( src_second == dst_second )
1047 1068 return size; // Self copy; no move
1048 1069 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1049 1070
1050 1071 // Check for second word int-int move
1051 1072 if( src_second_rc == rc_int && dst_second_rc == rc_int )
1052 1073 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1053 1074
1054 1075 // Check for second word integer store
1055 1076 if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1056 1077 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1057 1078
1058 1079 // Check for second word integer load
1059 1080 if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1060 1081 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1061 1082
1062 1083
1063 1084 Unimplemented();
1064 1085 }
1065 1086
1066 1087 #ifndef PRODUCT
1067 1088 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1068 1089 implementation( NULL, ra_, false, st );
1069 1090 }
1070 1091 #endif
1071 1092
1072 1093 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1073 1094 implementation( &cbuf, ra_, false, NULL );
1074 1095 }
1075 1096
1076 1097 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1077 1098 return implementation( NULL, ra_, true, NULL );
1078 1099 }
1079 1100
1080 1101 //=============================================================================
1081 1102 #ifndef PRODUCT
1082 1103 void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1083 1104 st->print("NOP \t# %d bytes pad for loops and calls", _count);
1084 1105 }
1085 1106 #endif
1086 1107
1087 1108 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1088 1109 MacroAssembler _masm(&cbuf);
1089 1110 __ nop(_count);
1090 1111 }
1091 1112
1092 1113 uint MachNopNode::size(PhaseRegAlloc *) const {
1093 1114 return _count;
1094 1115 }
1095 1116
1096 1117
1097 1118 //=============================================================================
1098 1119 #ifndef PRODUCT
1099 1120 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1100 1121 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1101 1122 int reg = ra_->get_reg_first(this);
1102 1123 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
1103 1124 }
1104 1125 #endif
1105 1126
1106 1127 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1107 1128 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1108 1129 int reg = ra_->get_encode(this);
1109 1130 if( offset >= 128 ) {
1110 1131 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1111 1132 emit_rm(cbuf, 0x2, reg, 0x04);
1112 1133 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1113 1134 emit_d32(cbuf, offset);
1114 1135 }
1115 1136 else {
1116 1137 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1117 1138 emit_rm(cbuf, 0x1, reg, 0x04);
1118 1139 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1119 1140 emit_d8(cbuf, offset);
1120 1141 }
1121 1142 }
1122 1143
1123 1144 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1124 1145 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1125 1146 if( offset >= 128 ) {
1126 1147 return 7;
1127 1148 }
1128 1149 else {
1129 1150 return 4;
1130 1151 }
1131 1152 }
1132 1153
1133 1154 //=============================================================================
1134 1155
1135 1156 // emit call stub, compiled java to interpreter
1136 1157 void emit_java_to_interp(CodeBuffer &cbuf ) {
1137 1158 // Stub is fixed up when the corresponding call is converted from calling
1138 1159 // compiled code to calling interpreted code.
1139 1160 // mov rbx,0
1140 1161 // jmp -1
1141 1162
1142 1163 address mark = cbuf.inst_mark(); // get mark within main instrs section
1143 1164
1144 1165 // Note that the code buffer's inst_mark is always relative to insts.
1145 1166 // That's why we must use the macroassembler to generate a stub.
1146 1167 MacroAssembler _masm(&cbuf);
1147 1168
1148 1169 address base =
1149 1170 __ start_a_stub(Compile::MAX_stubs_size);
1150 1171 if (base == NULL) return; // CodeBuffer::expand failed
1151 1172 // static stub relocation stores the instruction address of the call
1152 1173 __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1153 1174 // static stub relocation also tags the methodOop in the code-stream.
1154 1175 __ movoop(rbx, (jobject)NULL); // method is zapped till fixup time
1155 1176 // This is recognized as unresolved by relocs/nativeInst/ic code
1156 1177 __ jump(RuntimeAddress(__ pc()));
1157 1178
1158 1179 __ end_a_stub();
1159 1180 // Update current stubs pointer and restore code_end.
1160 1181 }
1161 1182 // size of call stub, compiled java to interpretor
1162 1183 uint size_java_to_interp() {
1163 1184 return 10; // movl; jmp
1164 1185 }
1165 1186 // relocation entries for call stub, compiled java to interpretor
1166 1187 uint reloc_java_to_interp() {
1167 1188 return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1168 1189 }
1169 1190
1170 1191 //=============================================================================
1171 1192 #ifndef PRODUCT
1172 1193 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1173 1194 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
1174 1195 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
1175 1196 st->print_cr("\tNOP");
1176 1197 st->print_cr("\tNOP");
1177 1198 if( !OptoBreakpoint )
1178 1199 st->print_cr("\tNOP");
1179 1200 }
1180 1201 #endif
1181 1202
1182 1203 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1183 1204 MacroAssembler masm(&cbuf);
1184 1205 #ifdef ASSERT
1185 1206 uint code_size = cbuf.code_size();
1186 1207 #endif
1187 1208 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1188 1209 masm.jump_cc(Assembler::notEqual,
1189 1210 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1190 1211 /* WARNING these NOPs are critical so that verified entry point is properly
1191 1212 aligned for patching by NativeJump::patch_verified_entry() */
1192 1213 int nops_cnt = 2;
1193 1214 if( !OptoBreakpoint ) // Leave space for int3
1194 1215 nops_cnt += 1;
1195 1216 masm.nop(nops_cnt);
1196 1217
1197 1218 assert(cbuf.code_size() - code_size == size(ra_), "checking code size of inline cache node");
1198 1219 }
1199 1220
1200 1221 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1201 1222 return OptoBreakpoint ? 11 : 12;
1202 1223 }
1203 1224
1204 1225
1205 1226 //=============================================================================
1206 1227 uint size_exception_handler() {
1207 1228 // NativeCall instruction size is the same as NativeJump.
1208 1229 // exception handler starts out as jump and can be patched to
1209 1230 // a call be deoptimization. (4932387)
1210 1231 // Note that this value is also credited (in output.cpp) to
1211 1232 // the size of the code section.
1212 1233 return NativeJump::instruction_size;
1213 1234 }
1214 1235
1215 1236 // Emit exception handler code. Stuff framesize into a register
1216 1237 // and call a VM stub routine.
1217 1238 int emit_exception_handler(CodeBuffer& cbuf) {
1218 1239
1219 1240 // Note that the code buffer's inst_mark is always relative to insts.
1220 1241 // That's why we must use the macroassembler to generate a handler.
1221 1242 MacroAssembler _masm(&cbuf);
1222 1243 address base =
1223 1244 __ start_a_stub(size_exception_handler());
1224 1245 if (base == NULL) return 0; // CodeBuffer::expand failed
1225 1246 int offset = __ offset();
1226 1247 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1227 1248 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1228 1249 __ end_a_stub();
1229 1250 return offset;
1230 1251 }
1231 1252
1232 1253 uint size_deopt_handler() {
1233 1254 // NativeCall instruction size is the same as NativeJump.
1234 1255 // exception handler starts out as jump and can be patched to
1235 1256 // a call be deoptimization. (4932387)
1236 1257 // Note that this value is also credited (in output.cpp) to
1237 1258 // the size of the code section.
1238 1259 return 5 + NativeJump::instruction_size; // pushl(); jmp;
1239 1260 }
1240 1261
1241 1262 // Emit deopt handler code.
1242 1263 int emit_deopt_handler(CodeBuffer& cbuf) {
1243 1264
1244 1265 // Note that the code buffer's inst_mark is always relative to insts.
1245 1266 // That's why we must use the macroassembler to generate a handler.
1246 1267 MacroAssembler _masm(&cbuf);
1247 1268 address base =
1248 1269 __ start_a_stub(size_exception_handler());
1249 1270 if (base == NULL) return 0; // CodeBuffer::expand failed
1250 1271 int offset = __ offset();
1251 1272 InternalAddress here(__ pc());
1252 1273 __ pushptr(here.addr());
1253 1274
1254 1275 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1255 1276 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1256 1277 __ end_a_stub();
1257 1278 return offset;
1258 1279 }
1259 1280
1260 1281
1261 1282 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1262 1283 int mark = cbuf.insts()->mark_off();
1263 1284 MacroAssembler _masm(&cbuf);
1264 1285 address double_address = __ double_constant(x);
1265 1286 cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift
1266 1287 emit_d32_reloc(cbuf,
1267 1288 (int)double_address,
1268 1289 internal_word_Relocation::spec(double_address),
1269 1290 RELOC_DISP32);
1270 1291 }
1271 1292
1272 1293 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1273 1294 int mark = cbuf.insts()->mark_off();
1274 1295 MacroAssembler _masm(&cbuf);
1275 1296 address float_address = __ float_constant(x);
1276 1297 cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift
1277 1298 emit_d32_reloc(cbuf,
1278 1299 (int)float_address,
1279 1300 internal_word_Relocation::spec(float_address),
1280 1301 RELOC_DISP32);
1281 1302 }
1282 1303
1283 1304
1284 1305 const bool Matcher::match_rule_supported(int opcode) {
1285 1306 if (!has_match_rule(opcode))
1286 1307 return false;
1287 1308
1288 1309 return true; // Per default match rules are supported.
1289 1310 }
1290 1311
1291 1312 int Matcher::regnum_to_fpu_offset(int regnum) {
1292 1313 return regnum - 32; // The FP registers are in the second chunk
1293 1314 }
1294 1315
1295 1316 bool is_positive_zero_float(jfloat f) {
1296 1317 return jint_cast(f) == jint_cast(0.0F);
1297 1318 }
1298 1319
1299 1320 bool is_positive_one_float(jfloat f) {
1300 1321 return jint_cast(f) == jint_cast(1.0F);
1301 1322 }
1302 1323
1303 1324 bool is_positive_zero_double(jdouble d) {
1304 1325 return jlong_cast(d) == jlong_cast(0.0);
1305 1326 }
1306 1327
1307 1328 bool is_positive_one_double(jdouble d) {
1308 1329 return jlong_cast(d) == jlong_cast(1.0);
1309 1330 }
1310 1331
1311 1332 // This is UltraSparc specific, true just means we have fast l2f conversion
1312 1333 const bool Matcher::convL2FSupported(void) {
1313 1334 return true;
1314 1335 }
1315 1336
1316 1337 // Vector width in bytes
1317 1338 const uint Matcher::vector_width_in_bytes(void) {
1318 1339 return UseSSE >= 2 ? 8 : 0;
1319 1340 }
1320 1341
1321 1342 // Vector ideal reg
1322 1343 const uint Matcher::vector_ideal_reg(void) {
1323 1344 return Op_RegD;
1324 1345 }
1325 1346
1326 1347 // Is this branch offset short enough that a short branch can be used?
1327 1348 //
1328 1349 // NOTE: If the platform does not provide any short branch variants, then
1329 1350 // this method should return false for offset 0.
1330 1351 bool Matcher::is_short_branch_offset(int rule, int offset) {
1331 1352 // the short version of jmpConUCF2 contains multiple branches,
1332 1353 // making the reach slightly less
1333 1354 if (rule == jmpConUCF2_rule)
1334 1355 return (-126 <= offset && offset <= 125);
1335 1356 return (-128 <= offset && offset <= 127);
1336 1357 }
1337 1358
1338 1359 const bool Matcher::isSimpleConstant64(jlong value) {
1339 1360 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1340 1361 return false;
1341 1362 }
1342 1363
1343 1364 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1344 1365 const bool Matcher::init_array_count_is_in_bytes = false;
1345 1366
1346 1367 // Threshold size for cleararray.
1347 1368 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1348 1369
1349 1370 // Should the Matcher clone shifts on addressing modes, expecting them to
1350 1371 // be subsumed into complex addressing expressions or compute them into
1351 1372 // registers? True for Intel but false for most RISCs
1352 1373 const bool Matcher::clone_shift_expressions = true;
1353 1374
1354 1375 // Is it better to copy float constants, or load them directly from memory?
1355 1376 // Intel can load a float constant from a direct address, requiring no
1356 1377 // extra registers. Most RISCs will have to materialize an address into a
1357 1378 // register first, so they would do better to copy the constant from stack.
1358 1379 const bool Matcher::rematerialize_float_constants = true;
1359 1380
1360 1381 // If CPU can load and store mis-aligned doubles directly then no fixup is
1361 1382 // needed. Else we split the double into 2 integer pieces and move it
1362 1383 // piece-by-piece. Only happens when passing doubles into C code as the
1363 1384 // Java calling convention forces doubles to be aligned.
1364 1385 const bool Matcher::misaligned_doubles_ok = true;
1365 1386
1366 1387
1367 1388 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1368 1389 // Get the memory operand from the node
1369 1390 uint numopnds = node->num_opnds(); // Virtual call for number of operands
1370 1391 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far
1371 1392 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1372 1393 uint opcnt = 1; // First operand
1373 1394 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1374 1395 while( idx >= skipped+num_edges ) {
1375 1396 skipped += num_edges;
1376 1397 opcnt++; // Bump operand count
1377 1398 assert( opcnt < numopnds, "Accessing non-existent operand" );
1378 1399 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1379 1400 }
1380 1401
1381 1402 MachOper *memory = node->_opnds[opcnt];
1382 1403 MachOper *new_memory = NULL;
1383 1404 switch (memory->opcode()) {
1384 1405 case DIRECT:
1385 1406 case INDOFFSET32X:
1386 1407 // No transformation necessary.
1387 1408 return;
1388 1409 case INDIRECT:
1389 1410 new_memory = new (C) indirect_win95_safeOper( );
1390 1411 break;
1391 1412 case INDOFFSET8:
1392 1413 new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1393 1414 break;
1394 1415 case INDOFFSET32:
1395 1416 new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1396 1417 break;
1397 1418 case INDINDEXOFFSET:
1398 1419 new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1399 1420 break;
1400 1421 case INDINDEXSCALE:
1401 1422 new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1402 1423 break;
1403 1424 case INDINDEXSCALEOFFSET:
1404 1425 new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1405 1426 break;
1406 1427 case LOAD_LONG_INDIRECT:
1407 1428 case LOAD_LONG_INDOFFSET32:
1408 1429 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1409 1430 return;
1410 1431 default:
1411 1432 assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1412 1433 return;
1413 1434 }
1414 1435 node->_opnds[opcnt] = new_memory;
1415 1436 }
1416 1437
1417 1438 // Advertise here if the CPU requires explicit rounding operations
1418 1439 // to implement the UseStrictFP mode.
1419 1440 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1420 1441
1421 1442 // Do floats take an entire double register or just half?
1422 1443 const bool Matcher::float_in_double = true;
1423 1444 // Do ints take an entire long register or just half?
1424 1445 const bool Matcher::int_in_long = false;
1425 1446
1426 1447 // Return whether or not this register is ever used as an argument. This
1427 1448 // function is used on startup to build the trampoline stubs in generateOptoStub.
1428 1449 // Registers not mentioned will be killed by the VM call in the trampoline, and
1429 1450 // arguments in those registers not be available to the callee.
1430 1451 bool Matcher::can_be_java_arg( int reg ) {
1431 1452 if( reg == ECX_num || reg == EDX_num ) return true;
1432 1453 if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1433 1454 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1434 1455 return false;
1435 1456 }
1436 1457
1437 1458 bool Matcher::is_spillable_arg( int reg ) {
1438 1459 return can_be_java_arg(reg);
1439 1460 }
1440 1461
1441 1462 // Register for DIVI projection of divmodI
1442 1463 RegMask Matcher::divI_proj_mask() {
1443 1464 return EAX_REG_mask;
1444 1465 }
1445 1466
1446 1467 // Register for MODI projection of divmodI
1447 1468 RegMask Matcher::modI_proj_mask() {
1448 1469 return EDX_REG_mask;
1449 1470 }
1450 1471
1451 1472 // Register for DIVL projection of divmodL
1452 1473 RegMask Matcher::divL_proj_mask() {
↓ open down ↓ |
1130 lines elided |
↑ open up ↑ |
1453 1474 ShouldNotReachHere();
1454 1475 return RegMask();
1455 1476 }
1456 1477
1457 1478 // Register for MODL projection of divmodL
1458 1479 RegMask Matcher::modL_proj_mask() {
1459 1480 ShouldNotReachHere();
1460 1481 return RegMask();
1461 1482 }
1462 1483
1484 +const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1485 + return EBP_REG_mask;
1486 +}
1487 +
1463 1488 %}
1464 1489
1465 1490 //----------ENCODING BLOCK-----------------------------------------------------
1466 1491 // This block specifies the encoding classes used by the compiler to output
1467 1492 // byte streams. Encoding classes generate functions which are called by
1468 1493 // Machine Instruction Nodes in order to generate the bit encoding of the
1469 1494 // instruction. Operands specify their base encoding interface with the
1470 1495 // interface keyword. There are currently supported four interfaces,
1471 1496 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
1472 1497 // operand to generate a function which returns its register number when
1473 1498 // queried. CONST_INTER causes an operand to generate a function which
1474 1499 // returns the value of the constant when queried. MEMORY_INTER causes an
1475 1500 // operand to generate four functions which return the Base Register, the
1476 1501 // Index Register, the Scale Value, and the Offset Value of the operand when
1477 1502 // queried. COND_INTER causes an operand to generate six functions which
1478 1503 // return the encoding code (ie - encoding bits for the instruction)
1479 1504 // associated with each basic boolean condition for a conditional instruction.
1480 1505 // Instructions specify two basic values for encoding. They use the
1481 1506 // ins_encode keyword to specify their encoding class (which must be one of
1482 1507 // the class names specified in the encoding block), and they use the
1483 1508 // opcode keyword to specify, in order, their primary, secondary, and
1484 1509 // tertiary opcode. Only the opcode sections which a particular instruction
1485 1510 // needs for encoding need to be specified.
1486 1511 encode %{
1487 1512 // Build emit functions for each basic byte or larger field in the intel
1488 1513 // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1489 1514 // code in the enc_class source block. Emit functions will live in the
1490 1515 // main source block for now. In future, we can generalize this by
1491 1516 // adding a syntax that specifies the sizes of fields in an order,
1492 1517 // so that the adlc can build the emit functions automagically
1493 1518
1494 1519 // Emit primary opcode
1495 1520 enc_class OpcP %{
1496 1521 emit_opcode(cbuf, $primary);
1497 1522 %}
1498 1523
1499 1524 // Emit secondary opcode
1500 1525 enc_class OpcS %{
1501 1526 emit_opcode(cbuf, $secondary);
1502 1527 %}
1503 1528
1504 1529 // Emit opcode directly
1505 1530 enc_class Opcode(immI d8) %{
1506 1531 emit_opcode(cbuf, $d8$$constant);
1507 1532 %}
1508 1533
1509 1534 enc_class SizePrefix %{
1510 1535 emit_opcode(cbuf,0x66);
1511 1536 %}
1512 1537
1513 1538 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
1514 1539 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1515 1540 %}
1516 1541
1517 1542 enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many)
1518 1543 emit_opcode(cbuf,$opcode$$constant);
1519 1544 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1520 1545 %}
1521 1546
1522 1547 enc_class mov_r32_imm0( eRegI dst ) %{
1523 1548 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
1524 1549 emit_d32 ( cbuf, 0x0 ); // imm32==0x0
1525 1550 %}
1526 1551
1527 1552 enc_class cdq_enc %{
1528 1553 // Full implementation of Java idiv and irem; checks for
1529 1554 // special case as described in JVM spec., p.243 & p.271.
1530 1555 //
1531 1556 // normal case special case
1532 1557 //
1533 1558 // input : rax,: dividend min_int
1534 1559 // reg: divisor -1
1535 1560 //
1536 1561 // output: rax,: quotient (= rax, idiv reg) min_int
1537 1562 // rdx: remainder (= rax, irem reg) 0
1538 1563 //
1539 1564 // Code sequnce:
1540 1565 //
1541 1566 // 81 F8 00 00 00 80 cmp rax,80000000h
1542 1567 // 0F 85 0B 00 00 00 jne normal_case
1543 1568 // 33 D2 xor rdx,edx
1544 1569 // 83 F9 FF cmp rcx,0FFh
1545 1570 // 0F 84 03 00 00 00 je done
1546 1571 // normal_case:
1547 1572 // 99 cdq
1548 1573 // F7 F9 idiv rax,ecx
1549 1574 // done:
1550 1575 //
1551 1576 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1552 1577 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1553 1578 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h
1554 1579 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1555 1580 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1556 1581 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case
1557 1582 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx
1558 1583 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1559 1584 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1560 1585 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1561 1586 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done
1562 1587 // normal_case:
1563 1588 emit_opcode(cbuf,0x99); // cdq
1564 1589 // idiv (note: must be emitted by the user of this rule)
1565 1590 // normal:
1566 1591 %}
1567 1592
1568 1593 // Dense encoding for older common ops
1569 1594 enc_class Opc_plus(immI opcode, eRegI reg) %{
1570 1595 emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1571 1596 %}
1572 1597
1573 1598
1574 1599 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1575 1600 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1576 1601 // Check for 8-bit immediate, and set sign extend bit in opcode
1577 1602 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1578 1603 emit_opcode(cbuf, $primary | 0x02);
1579 1604 }
1580 1605 else { // If 32-bit immediate
1581 1606 emit_opcode(cbuf, $primary);
1582 1607 }
1583 1608 %}
1584 1609
1585 1610 enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m
1586 1611 // Emit primary opcode and set sign-extend bit
1587 1612 // Check for 8-bit immediate, and set sign extend bit in opcode
1588 1613 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1589 1614 emit_opcode(cbuf, $primary | 0x02); }
1590 1615 else { // If 32-bit immediate
1591 1616 emit_opcode(cbuf, $primary);
1592 1617 }
1593 1618 // Emit r/m byte with secondary opcode, after primary opcode.
1594 1619 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1595 1620 %}
1596 1621
1597 1622 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
1598 1623 // Check for 8-bit immediate, and set sign extend bit in opcode
1599 1624 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1600 1625 $$$emit8$imm$$constant;
1601 1626 }
1602 1627 else { // If 32-bit immediate
1603 1628 // Output immediate
1604 1629 $$$emit32$imm$$constant;
1605 1630 }
1606 1631 %}
1607 1632
1608 1633 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1609 1634 // Emit primary opcode and set sign-extend bit
1610 1635 // Check for 8-bit immediate, and set sign extend bit in opcode
1611 1636 int con = (int)$imm$$constant; // Throw away top bits
1612 1637 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1613 1638 // Emit r/m byte with secondary opcode, after primary opcode.
1614 1639 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1615 1640 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1616 1641 else emit_d32(cbuf,con);
1617 1642 %}
1618 1643
1619 1644 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1620 1645 // Emit primary opcode and set sign-extend bit
1621 1646 // Check for 8-bit immediate, and set sign extend bit in opcode
1622 1647 int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1623 1648 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1624 1649 // Emit r/m byte with tertiary opcode, after primary opcode.
1625 1650 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1626 1651 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1627 1652 else emit_d32(cbuf,con);
1628 1653 %}
1629 1654
1630 1655 enc_class Lbl (label labl) %{ // JMP, CALL
1631 1656 Label *l = $labl$$label;
1632 1657 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
1633 1658 %}
1634 1659
1635 1660 enc_class LblShort (label labl) %{ // JMP, CALL
1636 1661 Label *l = $labl$$label;
1637 1662 int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
1638 1663 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1639 1664 emit_d8(cbuf, disp);
1640 1665 %}
1641 1666
1642 1667 enc_class OpcSReg (eRegI dst) %{ // BSWAP
1643 1668 emit_cc(cbuf, $secondary, $dst$$reg );
1644 1669 %}
1645 1670
1646 1671 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1647 1672 int destlo = $dst$$reg;
1648 1673 int desthi = HIGH_FROM_LOW(destlo);
1649 1674 // bswap lo
1650 1675 emit_opcode(cbuf, 0x0F);
1651 1676 emit_cc(cbuf, 0xC8, destlo);
1652 1677 // bswap hi
1653 1678 emit_opcode(cbuf, 0x0F);
1654 1679 emit_cc(cbuf, 0xC8, desthi);
1655 1680 // xchg lo and hi
1656 1681 emit_opcode(cbuf, 0x87);
1657 1682 emit_rm(cbuf, 0x3, destlo, desthi);
1658 1683 %}
1659 1684
1660 1685 enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1661 1686 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1662 1687 %}
1663 1688
1664 1689 enc_class Jcc (cmpOp cop, label labl) %{ // JCC
1665 1690 Label *l = $labl$$label;
1666 1691 $$$emit8$primary;
1667 1692 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1668 1693 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
1669 1694 %}
1670 1695
1671 1696 enc_class JccShort (cmpOp cop, label labl) %{ // JCC
1672 1697 Label *l = $labl$$label;
1673 1698 emit_cc(cbuf, $primary, $cop$$cmpcode);
1674 1699 int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
1675 1700 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1676 1701 emit_d8(cbuf, disp);
1677 1702 %}
1678 1703
1679 1704 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1680 1705 $$$emit8$primary;
1681 1706 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1682 1707 %}
1683 1708
1684 1709 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1685 1710 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1686 1711 emit_d8(cbuf, op >> 8 );
1687 1712 emit_d8(cbuf, op & 255);
1688 1713 %}
1689 1714
1690 1715 // emulate a CMOV with a conditional branch around a MOV
1691 1716 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1692 1717 // Invert sense of branch from sense of CMOV
1693 1718 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1694 1719 emit_d8( cbuf, $brOffs$$constant );
1695 1720 %}
1696 1721
1697 1722 enc_class enc_PartialSubtypeCheck( ) %{
1698 1723 Register Redi = as_Register(EDI_enc); // result register
1699 1724 Register Reax = as_Register(EAX_enc); // super class
1700 1725 Register Recx = as_Register(ECX_enc); // killed
1701 1726 Register Resi = as_Register(ESI_enc); // sub class
1702 1727 Label miss;
1703 1728
1704 1729 MacroAssembler _masm(&cbuf);
1705 1730 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1706 1731 NULL, &miss,
1707 1732 /*set_cond_codes:*/ true);
1708 1733 if ($primary) {
1709 1734 __ xorptr(Redi, Redi);
1710 1735 }
1711 1736 __ bind(miss);
1712 1737 %}
1713 1738
1714 1739 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
1715 1740 MacroAssembler masm(&cbuf);
1716 1741 int start = masm.offset();
1717 1742 if (UseSSE >= 2) {
1718 1743 if (VerifyFPU) {
1719 1744 masm.verify_FPU(0, "must be empty in SSE2+ mode");
1720 1745 }
1721 1746 } else {
1722 1747 // External c_calling_convention expects the FPU stack to be 'clean'.
1723 1748 // Compiled code leaves it dirty. Do cleanup now.
1724 1749 masm.empty_FPU_stack();
1725 1750 }
1726 1751 if (sizeof_FFree_Float_Stack_All == -1) {
1727 1752 sizeof_FFree_Float_Stack_All = masm.offset() - start;
1728 1753 } else {
1729 1754 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1730 1755 }
1731 1756 %}
1732 1757
1733 1758 enc_class Verify_FPU_For_Leaf %{
1734 1759 if( VerifyFPU ) {
1735 1760 MacroAssembler masm(&cbuf);
1736 1761 masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1737 1762 }
1738 1763 %}
1739 1764
1740 1765 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1741 1766 // This is the instruction starting address for relocation info.
1742 1767 cbuf.set_inst_mark();
1743 1768 $$$emit8$primary;
1744 1769 // CALL directly to the runtime
1745 1770 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1746 1771 runtime_call_Relocation::spec(), RELOC_IMM32 );
1747 1772
1748 1773 if (UseSSE >= 2) {
1749 1774 MacroAssembler _masm(&cbuf);
1750 1775 BasicType rt = tf()->return_type();
1751 1776
1752 1777 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1753 1778 // A C runtime call where the return value is unused. In SSE2+
1754 1779 // mode the result needs to be removed from the FPU stack. It's
1755 1780 // likely that this function call could be removed by the
1756 1781 // optimizer if the C function is a pure function.
1757 1782 __ ffree(0);
1758 1783 } else if (rt == T_FLOAT) {
1759 1784 __ lea(rsp, Address(rsp, -4));
1760 1785 __ fstp_s(Address(rsp, 0));
1761 1786 __ movflt(xmm0, Address(rsp, 0));
1762 1787 __ lea(rsp, Address(rsp, 4));
1763 1788 } else if (rt == T_DOUBLE) {
1764 1789 __ lea(rsp, Address(rsp, -8));
↓ open down ↓ |
292 lines elided |
↑ open up ↑ |
1765 1790 __ fstp_d(Address(rsp, 0));
1766 1791 __ movdbl(xmm0, Address(rsp, 0));
1767 1792 __ lea(rsp, Address(rsp, 8));
1768 1793 }
1769 1794 }
1770 1795 %}
1771 1796
1772 1797
1773 1798 enc_class pre_call_FPU %{
1774 1799 // If method sets FPU control word restore it here
1800 + debug_only(int off0 = cbuf.code_size());
1775 1801 if( Compile::current()->in_24_bit_fp_mode() ) {
1776 1802 MacroAssembler masm(&cbuf);
1777 1803 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1778 1804 }
1805 + debug_only(int off1 = cbuf.code_size());
1806 + assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1779 1807 %}
1780 1808
1781 1809 enc_class post_call_FPU %{
1782 1810 // If method sets FPU control word do it here also
1783 1811 if( Compile::current()->in_24_bit_fp_mode() ) {
1784 1812 MacroAssembler masm(&cbuf);
1785 1813 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1786 1814 }
1787 1815 %}
1788 1816
1817 + enc_class preserve_SP %{
1818 + debug_only(int off0 = cbuf.code_size());
1819 + MacroAssembler _masm(&cbuf);
1820 + // RBP is preserved across all calls, even compiled calls.
1821 + // Use it to preserve RSP in places where the callee might change the SP.
1822 + __ movptr(rbp, rsp);
1823 + debug_only(int off1 = cbuf.code_size());
1824 + assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
1825 + %}
1826 +
1827 + enc_class restore_SP %{
1828 + MacroAssembler _masm(&cbuf);
1829 + __ movptr(rsp, rbp);
1830 + %}
1831 +
1789 1832 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
1790 1833 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1791 1834 // who we intended to call.
1792 1835 cbuf.set_inst_mark();
1793 1836 $$$emit8$primary;
1794 1837 if ( !_method ) {
1795 1838 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1796 1839 runtime_call_Relocation::spec(), RELOC_IMM32 );
1797 1840 } else if(_optimized_virtual) {
1798 1841 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1799 1842 opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1800 1843 } else {
1801 1844 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1802 1845 static_call_Relocation::spec(), RELOC_IMM32 );
1803 1846 }
1804 1847 if( _method ) { // Emit stub for static call
1805 1848 emit_java_to_interp(cbuf);
1806 1849 }
1807 1850 %}
1808 1851
1809 1852 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
1810 1853 // !!!!!
1811 1854 // Generate "Mov EAX,0x00", placeholder instruction to load oop-info
1812 1855 // emit_call_dynamic_prologue( cbuf );
1813 1856 cbuf.set_inst_mark();
1814 1857 emit_opcode(cbuf, 0xB8 + EAX_enc); // mov EAX,-1
1815 1858 emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1816 1859 address virtual_call_oop_addr = cbuf.inst_mark();
1817 1860 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1818 1861 // who we intended to call.
1819 1862 cbuf.set_inst_mark();
1820 1863 $$$emit8$primary;
1821 1864 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1822 1865 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1823 1866 %}
1824 1867
1825 1868 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1826 1869 int disp = in_bytes(methodOopDesc::from_compiled_offset());
1827 1870 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1828 1871
1829 1872 // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1830 1873 cbuf.set_inst_mark();
1831 1874 $$$emit8$primary;
1832 1875 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1833 1876 emit_d8(cbuf, disp); // Displacement
1834 1877
1835 1878 %}
1836 1879
1837 1880 enc_class Xor_Reg (eRegI dst) %{
1838 1881 emit_opcode(cbuf, 0x33);
1839 1882 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1840 1883 %}
1841 1884
1842 1885 // Following encoding is no longer used, but may be restored if calling
1843 1886 // convention changes significantly.
1844 1887 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1845 1888 //
1846 1889 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
1847 1890 // // int ic_reg = Matcher::inline_cache_reg();
1848 1891 // // int ic_encode = Matcher::_regEncode[ic_reg];
1849 1892 // // int imo_reg = Matcher::interpreter_method_oop_reg();
1850 1893 // // int imo_encode = Matcher::_regEncode[imo_reg];
1851 1894 //
1852 1895 // // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1853 1896 // // // so we load it immediately before the call
1854 1897 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop
1855 1898 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1856 1899 //
1857 1900 // // xor rbp,ebp
1858 1901 // emit_opcode(cbuf, 0x33);
1859 1902 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1860 1903 //
1861 1904 // // CALL to interpreter.
1862 1905 // cbuf.set_inst_mark();
1863 1906 // $$$emit8$primary;
1864 1907 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.code_end()) - 4),
1865 1908 // runtime_call_Relocation::spec(), RELOC_IMM32 );
1866 1909 // %}
1867 1910
1868 1911 enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR
1869 1912 $$$emit8$primary;
1870 1913 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1871 1914 $$$emit8$shift$$constant;
1872 1915 %}
1873 1916
1874 1917 enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate
1875 1918 // Load immediate does not have a zero or sign extended version
1876 1919 // for 8-bit immediates
1877 1920 emit_opcode(cbuf, 0xB8 + $dst$$reg);
1878 1921 $$$emit32$src$$constant;
1879 1922 %}
1880 1923
1881 1924 enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate
1882 1925 // Load immediate does not have a zero or sign extended version
1883 1926 // for 8-bit immediates
1884 1927 emit_opcode(cbuf, $primary + $dst$$reg);
1885 1928 $$$emit32$src$$constant;
1886 1929 %}
1887 1930
1888 1931 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
1889 1932 // Load immediate does not have a zero or sign extended version
1890 1933 // for 8-bit immediates
1891 1934 int dst_enc = $dst$$reg;
1892 1935 int src_con = $src$$constant & 0x0FFFFFFFFL;
1893 1936 if (src_con == 0) {
1894 1937 // xor dst, dst
1895 1938 emit_opcode(cbuf, 0x33);
1896 1939 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1897 1940 } else {
1898 1941 emit_opcode(cbuf, $primary + dst_enc);
1899 1942 emit_d32(cbuf, src_con);
1900 1943 }
1901 1944 %}
1902 1945
1903 1946 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
1904 1947 // Load immediate does not have a zero or sign extended version
1905 1948 // for 8-bit immediates
1906 1949 int dst_enc = $dst$$reg + 2;
1907 1950 int src_con = ((julong)($src$$constant)) >> 32;
1908 1951 if (src_con == 0) {
1909 1952 // xor dst, dst
1910 1953 emit_opcode(cbuf, 0x33);
1911 1954 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1912 1955 } else {
1913 1956 emit_opcode(cbuf, $primary + dst_enc);
1914 1957 emit_d32(cbuf, src_con);
1915 1958 }
1916 1959 %}
1917 1960
1918 1961
1919 1962 enc_class LdImmD (immD src) %{ // Load Immediate
1920 1963 if( is_positive_zero_double($src$$constant)) {
1921 1964 // FLDZ
1922 1965 emit_opcode(cbuf,0xD9);
1923 1966 emit_opcode(cbuf,0xEE);
1924 1967 } else if( is_positive_one_double($src$$constant)) {
1925 1968 // FLD1
1926 1969 emit_opcode(cbuf,0xD9);
1927 1970 emit_opcode(cbuf,0xE8);
1928 1971 } else {
1929 1972 emit_opcode(cbuf,0xDD);
1930 1973 emit_rm(cbuf, 0x0, 0x0, 0x5);
1931 1974 emit_double_constant(cbuf, $src$$constant);
1932 1975 }
1933 1976 %}
1934 1977
1935 1978
1936 1979 enc_class LdImmF (immF src) %{ // Load Immediate
1937 1980 if( is_positive_zero_float($src$$constant)) {
1938 1981 emit_opcode(cbuf,0xD9);
1939 1982 emit_opcode(cbuf,0xEE);
1940 1983 } else if( is_positive_one_float($src$$constant)) {
1941 1984 emit_opcode(cbuf,0xD9);
1942 1985 emit_opcode(cbuf,0xE8);
1943 1986 } else {
1944 1987 $$$emit8$primary;
1945 1988 // Load immediate does not have a zero or sign extended version
1946 1989 // for 8-bit immediates
1947 1990 // First load to TOS, then move to dst
1948 1991 emit_rm(cbuf, 0x0, 0x0, 0x5);
1949 1992 emit_float_constant(cbuf, $src$$constant);
1950 1993 }
1951 1994 %}
1952 1995
1953 1996 enc_class LdImmX (regX dst, immXF con) %{ // Load Immediate
1954 1997 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
1955 1998 emit_float_constant(cbuf, $con$$constant);
1956 1999 %}
1957 2000
1958 2001 enc_class LdImmXD (regXD dst, immXD con) %{ // Load Immediate
1959 2002 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
1960 2003 emit_double_constant(cbuf, $con$$constant);
1961 2004 %}
1962 2005
1963 2006 enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
1964 2007 // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
1965 2008 emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1966 2009 emit_opcode(cbuf, 0x0F);
1967 2010 emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1968 2011 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
1969 2012 emit_double_constant(cbuf, $con$$constant);
1970 2013 %}
1971 2014
1972 2015 enc_class Opc_MemImm_F(immF src) %{
1973 2016 cbuf.set_inst_mark();
1974 2017 $$$emit8$primary;
1975 2018 emit_rm(cbuf, 0x0, $secondary, 0x5);
1976 2019 emit_float_constant(cbuf, $src$$constant);
1977 2020 %}
1978 2021
1979 2022
1980 2023 enc_class MovI2X_reg(regX dst, eRegI src) %{
1981 2024 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
1982 2025 emit_opcode(cbuf, 0x0F );
1983 2026 emit_opcode(cbuf, 0x6E );
1984 2027 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1985 2028 %}
1986 2029
1987 2030 enc_class MovX2I_reg(eRegI dst, regX src) %{
1988 2031 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
1989 2032 emit_opcode(cbuf, 0x0F );
1990 2033 emit_opcode(cbuf, 0x7E );
1991 2034 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
1992 2035 %}
1993 2036
1994 2037 enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
1995 2038 { // MOVD $dst,$src.lo
1996 2039 emit_opcode(cbuf,0x66);
1997 2040 emit_opcode(cbuf,0x0F);
1998 2041 emit_opcode(cbuf,0x6E);
1999 2042 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2000 2043 }
2001 2044 { // MOVD $tmp,$src.hi
2002 2045 emit_opcode(cbuf,0x66);
2003 2046 emit_opcode(cbuf,0x0F);
2004 2047 emit_opcode(cbuf,0x6E);
2005 2048 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2006 2049 }
2007 2050 { // PUNPCKLDQ $dst,$tmp
2008 2051 emit_opcode(cbuf,0x66);
2009 2052 emit_opcode(cbuf,0x0F);
2010 2053 emit_opcode(cbuf,0x62);
2011 2054 emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2012 2055 }
2013 2056 %}
2014 2057
2015 2058 enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2016 2059 { // MOVD $dst.lo,$src
2017 2060 emit_opcode(cbuf,0x66);
2018 2061 emit_opcode(cbuf,0x0F);
2019 2062 emit_opcode(cbuf,0x7E);
2020 2063 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2021 2064 }
2022 2065 { // PSHUFLW $tmp,$src,0x4E (01001110b)
2023 2066 emit_opcode(cbuf,0xF2);
2024 2067 emit_opcode(cbuf,0x0F);
2025 2068 emit_opcode(cbuf,0x70);
2026 2069 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2027 2070 emit_d8(cbuf, 0x4E);
2028 2071 }
2029 2072 { // MOVD $dst.hi,$tmp
2030 2073 emit_opcode(cbuf,0x66);
2031 2074 emit_opcode(cbuf,0x0F);
2032 2075 emit_opcode(cbuf,0x7E);
2033 2076 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2034 2077 }
2035 2078 %}
2036 2079
2037 2080
2038 2081 // Encode a reg-reg copy. If it is useless, then empty encoding.
2039 2082 enc_class enc_Copy( eRegI dst, eRegI src ) %{
2040 2083 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2041 2084 %}
2042 2085
2043 2086 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2044 2087 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2045 2088 %}
2046 2089
2047 2090 // Encode xmm reg-reg copy. If it is useless, then empty encoding.
2048 2091 enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2049 2092 encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2050 2093 %}
2051 2094
2052 2095 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
2053 2096 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2054 2097 %}
2055 2098
2056 2099 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
2057 2100 $$$emit8$primary;
2058 2101 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2059 2102 %}
2060 2103
2061 2104 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
2062 2105 $$$emit8$secondary;
2063 2106 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2064 2107 %}
2065 2108
2066 2109 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
2067 2110 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2068 2111 %}
2069 2112
2070 2113 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
2071 2114 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2072 2115 %}
2073 2116
2074 2117 enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2075 2118 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2076 2119 %}
2077 2120
2078 2121 enc_class Con32 (immI src) %{ // Con32(storeImmI)
2079 2122 // Output immediate
2080 2123 $$$emit32$src$$constant;
2081 2124 %}
2082 2125
2083 2126 enc_class Con32F_as_bits(immF src) %{ // storeF_imm
2084 2127 // Output Float immediate bits
2085 2128 jfloat jf = $src$$constant;
2086 2129 int jf_as_bits = jint_cast( jf );
2087 2130 emit_d32(cbuf, jf_as_bits);
2088 2131 %}
2089 2132
2090 2133 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm
2091 2134 // Output Float immediate bits
2092 2135 jfloat jf = $src$$constant;
2093 2136 int jf_as_bits = jint_cast( jf );
2094 2137 emit_d32(cbuf, jf_as_bits);
2095 2138 %}
2096 2139
2097 2140 enc_class Con16 (immI src) %{ // Con16(storeImmI)
2098 2141 // Output immediate
2099 2142 $$$emit16$src$$constant;
2100 2143 %}
2101 2144
2102 2145 enc_class Con_d32(immI src) %{
2103 2146 emit_d32(cbuf,$src$$constant);
2104 2147 %}
2105 2148
2106 2149 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
2107 2150 // Output immediate memory reference
2108 2151 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2109 2152 emit_d32(cbuf, 0x00);
2110 2153 %}
2111 2154
2112 2155 enc_class lock_prefix( ) %{
2113 2156 if( os::is_MP() )
2114 2157 emit_opcode(cbuf,0xF0); // [Lock]
2115 2158 %}
2116 2159
2117 2160 // Cmp-xchg long value.
2118 2161 // Note: we need to swap rbx, and rcx before and after the
2119 2162 // cmpxchg8 instruction because the instruction uses
2120 2163 // rcx as the high order word of the new value to store but
2121 2164 // our register encoding uses rbx,.
2122 2165 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2123 2166
2124 2167 // XCHG rbx,ecx
2125 2168 emit_opcode(cbuf,0x87);
2126 2169 emit_opcode(cbuf,0xD9);
2127 2170 // [Lock]
2128 2171 if( os::is_MP() )
2129 2172 emit_opcode(cbuf,0xF0);
2130 2173 // CMPXCHG8 [Eptr]
2131 2174 emit_opcode(cbuf,0x0F);
2132 2175 emit_opcode(cbuf,0xC7);
2133 2176 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2134 2177 // XCHG rbx,ecx
2135 2178 emit_opcode(cbuf,0x87);
2136 2179 emit_opcode(cbuf,0xD9);
2137 2180 %}
2138 2181
2139 2182 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2140 2183 // [Lock]
2141 2184 if( os::is_MP() )
2142 2185 emit_opcode(cbuf,0xF0);
2143 2186
2144 2187 // CMPXCHG [Eptr]
2145 2188 emit_opcode(cbuf,0x0F);
2146 2189 emit_opcode(cbuf,0xB1);
2147 2190 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2148 2191 %}
2149 2192
2150 2193 enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2151 2194 int res_encoding = $res$$reg;
2152 2195
2153 2196 // MOV res,0
2154 2197 emit_opcode( cbuf, 0xB8 + res_encoding);
2155 2198 emit_d32( cbuf, 0 );
2156 2199 // JNE,s fail
2157 2200 emit_opcode(cbuf,0x75);
2158 2201 emit_d8(cbuf, 5 );
2159 2202 // MOV res,1
2160 2203 emit_opcode( cbuf, 0xB8 + res_encoding);
2161 2204 emit_d32( cbuf, 1 );
2162 2205 // fail:
2163 2206 %}
2164 2207
2165 2208 enc_class set_instruction_start( ) %{
2166 2209 cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand
2167 2210 %}
2168 2211
2169 2212 enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem
2170 2213 int reg_encoding = $ereg$$reg;
2171 2214 int base = $mem$$base;
2172 2215 int index = $mem$$index;
2173 2216 int scale = $mem$$scale;
2174 2217 int displace = $mem$$disp;
2175 2218 bool disp_is_oop = $mem->disp_is_oop();
2176 2219 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2177 2220 %}
2178 2221
2179 2222 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
2180 2223 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo
2181 2224 int base = $mem$$base;
2182 2225 int index = $mem$$index;
2183 2226 int scale = $mem$$scale;
2184 2227 int displace = $mem$$disp + 4; // Offset is 4 further in memory
2185 2228 assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2186 2229 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2187 2230 %}
2188 2231
2189 2232 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2190 2233 int r1, r2;
2191 2234 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2192 2235 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2193 2236 emit_opcode(cbuf,0x0F);
2194 2237 emit_opcode(cbuf,$tertiary);
2195 2238 emit_rm(cbuf, 0x3, r1, r2);
2196 2239 emit_d8(cbuf,$cnt$$constant);
2197 2240 emit_d8(cbuf,$primary);
2198 2241 emit_rm(cbuf, 0x3, $secondary, r1);
2199 2242 emit_d8(cbuf,$cnt$$constant);
2200 2243 %}
2201 2244
2202 2245 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2203 2246 emit_opcode( cbuf, 0x8B ); // Move
2204 2247 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2205 2248 emit_d8(cbuf,$primary);
2206 2249 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2207 2250 emit_d8(cbuf,$cnt$$constant-32);
2208 2251 emit_d8(cbuf,$primary);
2209 2252 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2210 2253 emit_d8(cbuf,31);
2211 2254 %}
2212 2255
2213 2256 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2214 2257 int r1, r2;
2215 2258 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2216 2259 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2217 2260
2218 2261 emit_opcode( cbuf, 0x8B ); // Move r1,r2
2219 2262 emit_rm(cbuf, 0x3, r1, r2);
2220 2263 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2221 2264 emit_opcode(cbuf,$primary);
2222 2265 emit_rm(cbuf, 0x3, $secondary, r1);
2223 2266 emit_d8(cbuf,$cnt$$constant-32);
2224 2267 }
2225 2268 emit_opcode(cbuf,0x33); // XOR r2,r2
2226 2269 emit_rm(cbuf, 0x3, r2, r2);
2227 2270 %}
2228 2271
2229 2272 // Clone of RegMem but accepts an extra parameter to access each
2230 2273 // half of a double in memory; it never needs relocation info.
2231 2274 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2232 2275 emit_opcode(cbuf,$opcode$$constant);
2233 2276 int reg_encoding = $rm_reg$$reg;
2234 2277 int base = $mem$$base;
2235 2278 int index = $mem$$index;
2236 2279 int scale = $mem$$scale;
2237 2280 int displace = $mem$$disp + $disp_for_half$$constant;
2238 2281 bool disp_is_oop = false;
2239 2282 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2240 2283 %}
2241 2284
2242 2285 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2243 2286 //
2244 2287 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2245 2288 // and it never needs relocation information.
2246 2289 // Frequently used to move data between FPU's Stack Top and memory.
2247 2290 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2248 2291 int rm_byte_opcode = $rm_opcode$$constant;
2249 2292 int base = $mem$$base;
2250 2293 int index = $mem$$index;
2251 2294 int scale = $mem$$scale;
2252 2295 int displace = $mem$$disp;
2253 2296 assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2254 2297 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2255 2298 %}
2256 2299
2257 2300 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2258 2301 int rm_byte_opcode = $rm_opcode$$constant;
2259 2302 int base = $mem$$base;
2260 2303 int index = $mem$$index;
2261 2304 int scale = $mem$$scale;
2262 2305 int displace = $mem$$disp;
2263 2306 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2264 2307 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2265 2308 %}
2266 2309
2267 2310 enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea
2268 2311 int reg_encoding = $dst$$reg;
2269 2312 int base = $src0$$reg; // 0xFFFFFFFF indicates no base
2270 2313 int index = 0x04; // 0x04 indicates no index
2271 2314 int scale = 0x00; // 0x00 indicates no scale
2272 2315 int displace = $src1$$constant; // 0x00 indicates no displacement
2273 2316 bool disp_is_oop = false;
2274 2317 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2275 2318 %}
2276 2319
2277 2320 enc_class min_enc (eRegI dst, eRegI src) %{ // MIN
2278 2321 // Compare dst,src
2279 2322 emit_opcode(cbuf,0x3B);
2280 2323 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2281 2324 // jmp dst < src around move
2282 2325 emit_opcode(cbuf,0x7C);
2283 2326 emit_d8(cbuf,2);
2284 2327 // move dst,src
2285 2328 emit_opcode(cbuf,0x8B);
2286 2329 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2287 2330 %}
2288 2331
2289 2332 enc_class max_enc (eRegI dst, eRegI src) %{ // MAX
2290 2333 // Compare dst,src
2291 2334 emit_opcode(cbuf,0x3B);
2292 2335 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2293 2336 // jmp dst > src around move
2294 2337 emit_opcode(cbuf,0x7F);
2295 2338 emit_d8(cbuf,2);
2296 2339 // move dst,src
2297 2340 emit_opcode(cbuf,0x8B);
2298 2341 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2299 2342 %}
2300 2343
2301 2344 enc_class enc_FP_store(memory mem, regD src) %{
2302 2345 // If src is FPR1, we can just FST to store it.
2303 2346 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2304 2347 int reg_encoding = 0x2; // Just store
2305 2348 int base = $mem$$base;
2306 2349 int index = $mem$$index;
2307 2350 int scale = $mem$$scale;
2308 2351 int displace = $mem$$disp;
2309 2352 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2310 2353 if( $src$$reg != FPR1L_enc ) {
2311 2354 reg_encoding = 0x3; // Store & pop
2312 2355 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2313 2356 emit_d8( cbuf, 0xC0-1+$src$$reg );
2314 2357 }
2315 2358 cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand
2316 2359 emit_opcode(cbuf,$primary);
2317 2360 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2318 2361 %}
2319 2362
2320 2363 enc_class neg_reg(eRegI dst) %{
2321 2364 // NEG $dst
2322 2365 emit_opcode(cbuf,0xF7);
2323 2366 emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2324 2367 %}
2325 2368
2326 2369 enc_class setLT_reg(eCXRegI dst) %{
2327 2370 // SETLT $dst
2328 2371 emit_opcode(cbuf,0x0F);
2329 2372 emit_opcode(cbuf,0x9C);
2330 2373 emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2331 2374 %}
2332 2375
2333 2376 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
2334 2377 int tmpReg = $tmp$$reg;
2335 2378
2336 2379 // SUB $p,$q
2337 2380 emit_opcode(cbuf,0x2B);
2338 2381 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2339 2382 // SBB $tmp,$tmp
2340 2383 emit_opcode(cbuf,0x1B);
2341 2384 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2342 2385 // AND $tmp,$y
2343 2386 emit_opcode(cbuf,0x23);
2344 2387 emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2345 2388 // ADD $p,$tmp
2346 2389 emit_opcode(cbuf,0x03);
2347 2390 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2348 2391 %}
2349 2392
2350 2393 enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT
2351 2394 int tmpReg = $tmp$$reg;
2352 2395
2353 2396 // SUB $p,$q
2354 2397 emit_opcode(cbuf,0x2B);
2355 2398 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2356 2399 // SBB $tmp,$tmp
2357 2400 emit_opcode(cbuf,0x1B);
2358 2401 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2359 2402 // AND $tmp,$y
2360 2403 cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand
2361 2404 emit_opcode(cbuf,0x23);
2362 2405 int reg_encoding = tmpReg;
2363 2406 int base = $mem$$base;
2364 2407 int index = $mem$$index;
2365 2408 int scale = $mem$$scale;
2366 2409 int displace = $mem$$disp;
2367 2410 bool disp_is_oop = $mem->disp_is_oop();
2368 2411 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2369 2412 // ADD $p,$tmp
2370 2413 emit_opcode(cbuf,0x03);
2371 2414 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2372 2415 %}
2373 2416
2374 2417 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2375 2418 // TEST shift,32
2376 2419 emit_opcode(cbuf,0xF7);
2377 2420 emit_rm(cbuf, 0x3, 0, ECX_enc);
2378 2421 emit_d32(cbuf,0x20);
2379 2422 // JEQ,s small
2380 2423 emit_opcode(cbuf, 0x74);
2381 2424 emit_d8(cbuf, 0x04);
2382 2425 // MOV $dst.hi,$dst.lo
2383 2426 emit_opcode( cbuf, 0x8B );
2384 2427 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2385 2428 // CLR $dst.lo
2386 2429 emit_opcode(cbuf, 0x33);
2387 2430 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2388 2431 // small:
2389 2432 // SHLD $dst.hi,$dst.lo,$shift
2390 2433 emit_opcode(cbuf,0x0F);
2391 2434 emit_opcode(cbuf,0xA5);
2392 2435 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2393 2436 // SHL $dst.lo,$shift"
2394 2437 emit_opcode(cbuf,0xD3);
2395 2438 emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2396 2439 %}
2397 2440
2398 2441 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2399 2442 // TEST shift,32
2400 2443 emit_opcode(cbuf,0xF7);
2401 2444 emit_rm(cbuf, 0x3, 0, ECX_enc);
2402 2445 emit_d32(cbuf,0x20);
2403 2446 // JEQ,s small
2404 2447 emit_opcode(cbuf, 0x74);
2405 2448 emit_d8(cbuf, 0x04);
2406 2449 // MOV $dst.lo,$dst.hi
2407 2450 emit_opcode( cbuf, 0x8B );
2408 2451 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2409 2452 // CLR $dst.hi
2410 2453 emit_opcode(cbuf, 0x33);
2411 2454 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2412 2455 // small:
2413 2456 // SHRD $dst.lo,$dst.hi,$shift
2414 2457 emit_opcode(cbuf,0x0F);
2415 2458 emit_opcode(cbuf,0xAD);
2416 2459 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2417 2460 // SHR $dst.hi,$shift"
2418 2461 emit_opcode(cbuf,0xD3);
2419 2462 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2420 2463 %}
2421 2464
2422 2465 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2423 2466 // TEST shift,32
2424 2467 emit_opcode(cbuf,0xF7);
2425 2468 emit_rm(cbuf, 0x3, 0, ECX_enc);
2426 2469 emit_d32(cbuf,0x20);
2427 2470 // JEQ,s small
2428 2471 emit_opcode(cbuf, 0x74);
2429 2472 emit_d8(cbuf, 0x05);
2430 2473 // MOV $dst.lo,$dst.hi
2431 2474 emit_opcode( cbuf, 0x8B );
2432 2475 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2433 2476 // SAR $dst.hi,31
2434 2477 emit_opcode(cbuf, 0xC1);
2435 2478 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2436 2479 emit_d8(cbuf, 0x1F );
2437 2480 // small:
2438 2481 // SHRD $dst.lo,$dst.hi,$shift
2439 2482 emit_opcode(cbuf,0x0F);
2440 2483 emit_opcode(cbuf,0xAD);
2441 2484 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2442 2485 // SAR $dst.hi,$shift"
2443 2486 emit_opcode(cbuf,0xD3);
2444 2487 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2445 2488 %}
2446 2489
2447 2490
2448 2491 // ----------------- Encodings for floating point unit -----------------
2449 2492 // May leave result in FPU-TOS or FPU reg depending on opcodes
2450 2493 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV
2451 2494 $$$emit8$primary;
2452 2495 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2453 2496 %}
2454 2497
2455 2498 // Pop argument in FPR0 with FSTP ST(0)
2456 2499 enc_class PopFPU() %{
2457 2500 emit_opcode( cbuf, 0xDD );
2458 2501 emit_d8( cbuf, 0xD8 );
2459 2502 %}
2460 2503
2461 2504 // !!!!! equivalent to Pop_Reg_F
2462 2505 enc_class Pop_Reg_D( regD dst ) %{
2463 2506 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2464 2507 emit_d8( cbuf, 0xD8+$dst$$reg );
2465 2508 %}
2466 2509
2467 2510 enc_class Push_Reg_D( regD dst ) %{
2468 2511 emit_opcode( cbuf, 0xD9 );
2469 2512 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2470 2513 %}
2471 2514
2472 2515 enc_class strictfp_bias1( regD dst ) %{
2473 2516 emit_opcode( cbuf, 0xDB ); // FLD m80real
2474 2517 emit_opcode( cbuf, 0x2D );
2475 2518 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2476 2519 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2477 2520 emit_opcode( cbuf, 0xC8+$dst$$reg );
2478 2521 %}
2479 2522
2480 2523 enc_class strictfp_bias2( regD dst ) %{
2481 2524 emit_opcode( cbuf, 0xDB ); // FLD m80real
2482 2525 emit_opcode( cbuf, 0x2D );
2483 2526 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2484 2527 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2485 2528 emit_opcode( cbuf, 0xC8+$dst$$reg );
2486 2529 %}
2487 2530
2488 2531 // Special case for moving an integer register to a stack slot.
2489 2532 enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2490 2533 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2491 2534 %}
2492 2535
2493 2536 // Special case for moving a register to a stack slot.
2494 2537 enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2495 2538 // Opcode already emitted
2496 2539 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
2497 2540 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
2498 2541 emit_d32(cbuf, $dst$$disp); // Displacement
2499 2542 %}
2500 2543
2501 2544 // Push the integer in stackSlot 'src' onto FP-stack
2502 2545 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2503 2546 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2504 2547 %}
2505 2548
2506 2549 // Push the float in stackSlot 'src' onto FP-stack
2507 2550 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src]
2508 2551 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2509 2552 %}
2510 2553
2511 2554 // Push the double in stackSlot 'src' onto FP-stack
2512 2555 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src]
2513 2556 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2514 2557 %}
2515 2558
2516 2559 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2517 2560 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2518 2561 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2519 2562 %}
2520 2563
2521 2564 // Same as Pop_Mem_F except for opcode
2522 2565 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2523 2566 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2524 2567 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2525 2568 %}
2526 2569
2527 2570 enc_class Pop_Reg_F( regF dst ) %{
2528 2571 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2529 2572 emit_d8( cbuf, 0xD8+$dst$$reg );
2530 2573 %}
2531 2574
2532 2575 enc_class Push_Reg_F( regF dst ) %{
2533 2576 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2534 2577 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2535 2578 %}
2536 2579
2537 2580 // Push FPU's float to a stack-slot, and pop FPU-stack
2538 2581 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2539 2582 int pop = 0x02;
2540 2583 if ($src$$reg != FPR1L_enc) {
2541 2584 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2542 2585 emit_d8( cbuf, 0xC0-1+$src$$reg );
2543 2586 pop = 0x03;
2544 2587 }
2545 2588 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2546 2589 %}
2547 2590
2548 2591 // Push FPU's double to a stack-slot, and pop FPU-stack
2549 2592 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2550 2593 int pop = 0x02;
2551 2594 if ($src$$reg != FPR1L_enc) {
2552 2595 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2553 2596 emit_d8( cbuf, 0xC0-1+$src$$reg );
2554 2597 pop = 0x03;
2555 2598 }
2556 2599 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2557 2600 %}
2558 2601
2559 2602 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2560 2603 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2561 2604 int pop = 0xD0 - 1; // -1 since we skip FLD
2562 2605 if ($src$$reg != FPR1L_enc) {
2563 2606 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2564 2607 emit_d8( cbuf, 0xC0-1+$src$$reg );
2565 2608 pop = 0xD8;
2566 2609 }
2567 2610 emit_opcode( cbuf, 0xDD );
2568 2611 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2569 2612 %}
2570 2613
2571 2614
2572 2615 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2573 2616 MacroAssembler masm(&cbuf);
2574 2617 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg
2575 2618 masm.fmul( $src2$$reg+0); // value at TOS
2576 2619 masm.fadd( $src$$reg+0); // value at TOS
2577 2620 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store
2578 2621 %}
2579 2622
2580 2623
2581 2624 enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2582 2625 // load dst in FPR0
2583 2626 emit_opcode( cbuf, 0xD9 );
2584 2627 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2585 2628 if ($src$$reg != FPR1L_enc) {
2586 2629 // fincstp
2587 2630 emit_opcode (cbuf, 0xD9);
2588 2631 emit_opcode (cbuf, 0xF7);
2589 2632 // swap src with FPR1:
2590 2633 // FXCH FPR1 with src
2591 2634 emit_opcode(cbuf, 0xD9);
2592 2635 emit_d8(cbuf, 0xC8-1+$src$$reg );
2593 2636 // fdecstp
2594 2637 emit_opcode (cbuf, 0xD9);
2595 2638 emit_opcode (cbuf, 0xF6);
2596 2639 }
2597 2640 %}
2598 2641
2599 2642 enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2600 2643 // Allocate a word
2601 2644 emit_opcode(cbuf,0x83); // SUB ESP,8
2602 2645 emit_opcode(cbuf,0xEC);
2603 2646 emit_d8(cbuf,0x08);
2604 2647
2605 2648 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1
2606 2649 emit_opcode (cbuf, 0x0F );
2607 2650 emit_opcode (cbuf, 0x11 );
2608 2651 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2609 2652
2610 2653 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2611 2654 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2612 2655
2613 2656 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0
2614 2657 emit_opcode (cbuf, 0x0F );
2615 2658 emit_opcode (cbuf, 0x11 );
2616 2659 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2617 2660
2618 2661 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2619 2662 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2620 2663
2621 2664 %}
2622 2665
2623 2666 enc_class Push_ModX_encoding( regX src0, regX src1) %{
2624 2667 // Allocate a word
2625 2668 emit_opcode(cbuf,0x83); // SUB ESP,4
2626 2669 emit_opcode(cbuf,0xEC);
2627 2670 emit_d8(cbuf,0x04);
2628 2671
2629 2672 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1
2630 2673 emit_opcode (cbuf, 0x0F );
2631 2674 emit_opcode (cbuf, 0x11 );
2632 2675 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2633 2676
2634 2677 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2635 2678 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2636 2679
2637 2680 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0
2638 2681 emit_opcode (cbuf, 0x0F );
2639 2682 emit_opcode (cbuf, 0x11 );
2640 2683 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2641 2684
2642 2685 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2643 2686 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2644 2687
2645 2688 %}
2646 2689
2647 2690 enc_class Push_ResultXD(regXD dst) %{
2648 2691 store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2649 2692
2650 2693 // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2651 2694 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2652 2695 emit_opcode (cbuf, 0x0F );
2653 2696 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2654 2697 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2655 2698
2656 2699 emit_opcode(cbuf,0x83); // ADD ESP,8
2657 2700 emit_opcode(cbuf,0xC4);
2658 2701 emit_d8(cbuf,0x08);
2659 2702 %}
2660 2703
2661 2704 enc_class Push_ResultX(regX dst, immI d8) %{
2662 2705 store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2663 2706
2664 2707 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
2665 2708 emit_opcode (cbuf, 0x0F );
2666 2709 emit_opcode (cbuf, 0x10 );
2667 2710 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2668 2711
2669 2712 emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8)
2670 2713 emit_opcode(cbuf,0xC4);
2671 2714 emit_d8(cbuf,$d8$$constant);
2672 2715 %}
2673 2716
2674 2717 enc_class Push_SrcXD(regXD src) %{
2675 2718 // Allocate a word
2676 2719 emit_opcode(cbuf,0x83); // SUB ESP,8
2677 2720 emit_opcode(cbuf,0xEC);
2678 2721 emit_d8(cbuf,0x08);
2679 2722
2680 2723 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
2681 2724 emit_opcode (cbuf, 0x0F );
2682 2725 emit_opcode (cbuf, 0x11 );
2683 2726 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2684 2727
2685 2728 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2686 2729 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2687 2730 %}
2688 2731
2689 2732 enc_class push_stack_temp_qword() %{
2690 2733 emit_opcode(cbuf,0x83); // SUB ESP,8
2691 2734 emit_opcode(cbuf,0xEC);
2692 2735 emit_d8 (cbuf,0x08);
2693 2736 %}
2694 2737
2695 2738 enc_class pop_stack_temp_qword() %{
2696 2739 emit_opcode(cbuf,0x83); // ADD ESP,8
2697 2740 emit_opcode(cbuf,0xC4);
2698 2741 emit_d8 (cbuf,0x08);
2699 2742 %}
2700 2743
2701 2744 enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2702 2745 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src
2703 2746 emit_opcode (cbuf, 0x0F );
2704 2747 emit_opcode (cbuf, 0x11 );
2705 2748 encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2706 2749
2707 2750 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2708 2751 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2709 2752 %}
2710 2753
2711 2754 // Compute X^Y using Intel's fast hardware instructions, if possible.
2712 2755 // Otherwise return a NaN.
2713 2756 enc_class pow_exp_core_encoding %{
2714 2757 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X))
2715 2758 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q
2716 2759 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q
2717 2760 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q)
2718 2761 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q)
2719 2762 emit_opcode(cbuf,0x1C);
2720 2763 emit_d8(cbuf,0x24);
2721 2764 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1
2722 2765 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1
2723 2766 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q)
2724 2767 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q)
2725 2768 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2726 2769 emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask
2727 2770 emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2728 2771 emit_d32(cbuf,0xFFFFF800);
2729 2772 emit_opcode(cbuf,0x81); // add rax,1023 - the double exponent bias
2730 2773 emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2731 2774 emit_d32(cbuf,1023);
2732 2775 emit_opcode(cbuf,0x8B); // mov rbx,eax
2733 2776 emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2734 2777 emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position
2735 2778 emit_rm(cbuf,0x3,0x4,EAX_enc);
2736 2779 emit_d8(cbuf,20);
2737 2780 emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow
2738 2781 emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2739 2782 emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX
2740 2783 emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2741 2784 emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word
2742 2785 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2743 2786 emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2744 2787 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2745 2788 emit_d32(cbuf,0);
2746 2789 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2747 2790 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2748 2791 %}
2749 2792
2750 2793 // enc_class Pop_Reg_Mod_D( regD dst, regD src)
2751 2794 // was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2752 2795
2753 2796 enc_class Push_Result_Mod_D( regD src) %{
2754 2797 if ($src$$reg != FPR1L_enc) {
2755 2798 // fincstp
2756 2799 emit_opcode (cbuf, 0xD9);
2757 2800 emit_opcode (cbuf, 0xF7);
2758 2801 // FXCH FPR1 with src
2759 2802 emit_opcode(cbuf, 0xD9);
2760 2803 emit_d8(cbuf, 0xC8-1+$src$$reg );
2761 2804 // fdecstp
2762 2805 emit_opcode (cbuf, 0xD9);
2763 2806 emit_opcode (cbuf, 0xF6);
2764 2807 }
2765 2808 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2766 2809 // // FSTP FPR$dst$$reg
2767 2810 // emit_opcode( cbuf, 0xDD );
2768 2811 // emit_d8( cbuf, 0xD8+$dst$$reg );
2769 2812 %}
2770 2813
2771 2814 enc_class fnstsw_sahf_skip_parity() %{
2772 2815 // fnstsw ax
2773 2816 emit_opcode( cbuf, 0xDF );
2774 2817 emit_opcode( cbuf, 0xE0 );
2775 2818 // sahf
2776 2819 emit_opcode( cbuf, 0x9E );
2777 2820 // jnp ::skip
2778 2821 emit_opcode( cbuf, 0x7B );
2779 2822 emit_opcode( cbuf, 0x05 );
2780 2823 %}
2781 2824
2782 2825 enc_class emitModD() %{
2783 2826 // fprem must be iterative
2784 2827 // :: loop
2785 2828 // fprem
2786 2829 emit_opcode( cbuf, 0xD9 );
2787 2830 emit_opcode( cbuf, 0xF8 );
2788 2831 // wait
2789 2832 emit_opcode( cbuf, 0x9b );
2790 2833 // fnstsw ax
2791 2834 emit_opcode( cbuf, 0xDF );
2792 2835 emit_opcode( cbuf, 0xE0 );
2793 2836 // sahf
2794 2837 emit_opcode( cbuf, 0x9E );
2795 2838 // jp ::loop
2796 2839 emit_opcode( cbuf, 0x0F );
2797 2840 emit_opcode( cbuf, 0x8A );
2798 2841 emit_opcode( cbuf, 0xF4 );
2799 2842 emit_opcode( cbuf, 0xFF );
2800 2843 emit_opcode( cbuf, 0xFF );
2801 2844 emit_opcode( cbuf, 0xFF );
2802 2845 %}
2803 2846
2804 2847 enc_class fpu_flags() %{
2805 2848 // fnstsw_ax
2806 2849 emit_opcode( cbuf, 0xDF);
2807 2850 emit_opcode( cbuf, 0xE0);
2808 2851 // test ax,0x0400
2809 2852 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate
2810 2853 emit_opcode( cbuf, 0xA9 );
2811 2854 emit_d16 ( cbuf, 0x0400 );
2812 2855 // // // This sequence works, but stalls for 12-16 cycles on PPro
2813 2856 // // test rax,0x0400
2814 2857 // emit_opcode( cbuf, 0xA9 );
2815 2858 // emit_d32 ( cbuf, 0x00000400 );
2816 2859 //
2817 2860 // jz exit (no unordered comparison)
2818 2861 emit_opcode( cbuf, 0x74 );
2819 2862 emit_d8 ( cbuf, 0x02 );
2820 2863 // mov ah,1 - treat as LT case (set carry flag)
2821 2864 emit_opcode( cbuf, 0xB4 );
2822 2865 emit_d8 ( cbuf, 0x01 );
2823 2866 // sahf
2824 2867 emit_opcode( cbuf, 0x9E);
2825 2868 %}
2826 2869
2827 2870 enc_class cmpF_P6_fixup() %{
2828 2871 // Fixup the integer flags in case comparison involved a NaN
2829 2872 //
2830 2873 // JNP exit (no unordered comparison, P-flag is set by NaN)
2831 2874 emit_opcode( cbuf, 0x7B );
2832 2875 emit_d8 ( cbuf, 0x03 );
2833 2876 // MOV AH,1 - treat as LT case (set carry flag)
2834 2877 emit_opcode( cbuf, 0xB4 );
2835 2878 emit_d8 ( cbuf, 0x01 );
2836 2879 // SAHF
2837 2880 emit_opcode( cbuf, 0x9E);
2838 2881 // NOP // target for branch to avoid branch to branch
2839 2882 emit_opcode( cbuf, 0x90);
2840 2883 %}
2841 2884
2842 2885 // fnstsw_ax();
2843 2886 // sahf();
2844 2887 // movl(dst, nan_result);
2845 2888 // jcc(Assembler::parity, exit);
2846 2889 // movl(dst, less_result);
2847 2890 // jcc(Assembler::below, exit);
2848 2891 // movl(dst, equal_result);
2849 2892 // jcc(Assembler::equal, exit);
2850 2893 // movl(dst, greater_result);
2851 2894
2852 2895 // less_result = 1;
2853 2896 // greater_result = -1;
2854 2897 // equal_result = 0;
2855 2898 // nan_result = -1;
2856 2899
2857 2900 enc_class CmpF_Result(eRegI dst) %{
2858 2901 // fnstsw_ax();
2859 2902 emit_opcode( cbuf, 0xDF);
2860 2903 emit_opcode( cbuf, 0xE0);
2861 2904 // sahf
2862 2905 emit_opcode( cbuf, 0x9E);
2863 2906 // movl(dst, nan_result);
2864 2907 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2865 2908 emit_d32( cbuf, -1 );
2866 2909 // jcc(Assembler::parity, exit);
2867 2910 emit_opcode( cbuf, 0x7A );
2868 2911 emit_d8 ( cbuf, 0x13 );
2869 2912 // movl(dst, less_result);
2870 2913 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2871 2914 emit_d32( cbuf, -1 );
2872 2915 // jcc(Assembler::below, exit);
2873 2916 emit_opcode( cbuf, 0x72 );
2874 2917 emit_d8 ( cbuf, 0x0C );
2875 2918 // movl(dst, equal_result);
2876 2919 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2877 2920 emit_d32( cbuf, 0 );
2878 2921 // jcc(Assembler::equal, exit);
2879 2922 emit_opcode( cbuf, 0x74 );
2880 2923 emit_d8 ( cbuf, 0x05 );
2881 2924 // movl(dst, greater_result);
2882 2925 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2883 2926 emit_d32( cbuf, 1 );
2884 2927 %}
2885 2928
2886 2929
2887 2930 // XMM version of CmpF_Result. Because the XMM compare
2888 2931 // instructions set the EFLAGS directly. It becomes simpler than
2889 2932 // the float version above.
2890 2933 enc_class CmpX_Result(eRegI dst) %{
2891 2934 MacroAssembler _masm(&cbuf);
2892 2935 Label nan, inc, done;
2893 2936
2894 2937 __ jccb(Assembler::parity, nan);
2895 2938 __ jccb(Assembler::equal, done);
2896 2939 __ jccb(Assembler::above, inc);
2897 2940 __ bind(nan);
2898 2941 __ decrement(as_Register($dst$$reg)); // NO L qqq
2899 2942 __ jmpb(done);
2900 2943 __ bind(inc);
2901 2944 __ increment(as_Register($dst$$reg)); // NO L qqq
2902 2945 __ bind(done);
2903 2946 %}
2904 2947
2905 2948 // Compare the longs and set flags
2906 2949 // BROKEN! Do Not use as-is
2907 2950 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2908 2951 // CMP $src1.hi,$src2.hi
2909 2952 emit_opcode( cbuf, 0x3B );
2910 2953 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2911 2954 // JNE,s done
2912 2955 emit_opcode(cbuf,0x75);
2913 2956 emit_d8(cbuf, 2 );
2914 2957 // CMP $src1.lo,$src2.lo
2915 2958 emit_opcode( cbuf, 0x3B );
2916 2959 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2917 2960 // done:
2918 2961 %}
2919 2962
2920 2963 enc_class convert_int_long( regL dst, eRegI src ) %{
2921 2964 // mov $dst.lo,$src
2922 2965 int dst_encoding = $dst$$reg;
2923 2966 int src_encoding = $src$$reg;
2924 2967 encode_Copy( cbuf, dst_encoding , src_encoding );
2925 2968 // mov $dst.hi,$src
2926 2969 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2927 2970 // sar $dst.hi,31
2928 2971 emit_opcode( cbuf, 0xC1 );
2929 2972 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2930 2973 emit_d8(cbuf, 0x1F );
2931 2974 %}
2932 2975
2933 2976 enc_class convert_long_double( eRegL src ) %{
2934 2977 // push $src.hi
2935 2978 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2936 2979 // push $src.lo
2937 2980 emit_opcode(cbuf, 0x50+$src$$reg );
2938 2981 // fild 64-bits at [SP]
2939 2982 emit_opcode(cbuf,0xdf);
2940 2983 emit_d8(cbuf, 0x6C);
2941 2984 emit_d8(cbuf, 0x24);
2942 2985 emit_d8(cbuf, 0x00);
2943 2986 // pop stack
2944 2987 emit_opcode(cbuf, 0x83); // add SP, #8
2945 2988 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2946 2989 emit_d8(cbuf, 0x8);
2947 2990 %}
2948 2991
2949 2992 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2950 2993 // IMUL EDX:EAX,$src1
2951 2994 emit_opcode( cbuf, 0xF7 );
2952 2995 emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2953 2996 // SAR EDX,$cnt-32
2954 2997 int shift_count = ((int)$cnt$$constant) - 32;
2955 2998 if (shift_count > 0) {
2956 2999 emit_opcode(cbuf, 0xC1);
2957 3000 emit_rm(cbuf, 0x3, 7, $dst$$reg );
2958 3001 emit_d8(cbuf, shift_count);
2959 3002 }
2960 3003 %}
2961 3004
2962 3005 // this version doesn't have add sp, 8
2963 3006 enc_class convert_long_double2( eRegL src ) %{
2964 3007 // push $src.hi
2965 3008 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2966 3009 // push $src.lo
2967 3010 emit_opcode(cbuf, 0x50+$src$$reg );
2968 3011 // fild 64-bits at [SP]
2969 3012 emit_opcode(cbuf,0xdf);
2970 3013 emit_d8(cbuf, 0x6C);
2971 3014 emit_d8(cbuf, 0x24);
2972 3015 emit_d8(cbuf, 0x00);
2973 3016 %}
2974 3017
2975 3018 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2976 3019 // Basic idea: long = (long)int * (long)int
2977 3020 // IMUL EDX:EAX, src
2978 3021 emit_opcode( cbuf, 0xF7 );
2979 3022 emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2980 3023 %}
2981 3024
2982 3025 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2983 3026 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
2984 3027 // MUL EDX:EAX, src
2985 3028 emit_opcode( cbuf, 0xF7 );
2986 3029 emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2987 3030 %}
2988 3031
2989 3032 enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
2990 3033 // Basic idea: lo(result) = lo(x_lo * y_lo)
2991 3034 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2992 3035 // MOV $tmp,$src.lo
2993 3036 encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2994 3037 // IMUL $tmp,EDX
2995 3038 emit_opcode( cbuf, 0x0F );
2996 3039 emit_opcode( cbuf, 0xAF );
2997 3040 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2998 3041 // MOV EDX,$src.hi
2999 3042 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3000 3043 // IMUL EDX,EAX
3001 3044 emit_opcode( cbuf, 0x0F );
3002 3045 emit_opcode( cbuf, 0xAF );
3003 3046 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3004 3047 // ADD $tmp,EDX
3005 3048 emit_opcode( cbuf, 0x03 );
3006 3049 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3007 3050 // MUL EDX:EAX,$src.lo
3008 3051 emit_opcode( cbuf, 0xF7 );
3009 3052 emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3010 3053 // ADD EDX,ESI
3011 3054 emit_opcode( cbuf, 0x03 );
3012 3055 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
3013 3056 %}
3014 3057
3015 3058 enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
3016 3059 // Basic idea: lo(result) = lo(src * y_lo)
3017 3060 // hi(result) = hi(src * y_lo) + lo(src * y_hi)
3018 3061 // IMUL $tmp,EDX,$src
3019 3062 emit_opcode( cbuf, 0x6B );
3020 3063 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3021 3064 emit_d8( cbuf, (int)$src$$constant );
3022 3065 // MOV EDX,$src
3023 3066 emit_opcode(cbuf, 0xB8 + EDX_enc);
3024 3067 emit_d32( cbuf, (int)$src$$constant );
3025 3068 // MUL EDX:EAX,EDX
3026 3069 emit_opcode( cbuf, 0xF7 );
3027 3070 emit_rm( cbuf, 0x3, 0x4, EDX_enc );
3028 3071 // ADD EDX,ESI
3029 3072 emit_opcode( cbuf, 0x03 );
3030 3073 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
3031 3074 %}
3032 3075
3033 3076 enc_class long_div( eRegL src1, eRegL src2 ) %{
3034 3077 // PUSH src1.hi
3035 3078 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3036 3079 // PUSH src1.lo
3037 3080 emit_opcode(cbuf, 0x50+$src1$$reg );
3038 3081 // PUSH src2.hi
3039 3082 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3040 3083 // PUSH src2.lo
3041 3084 emit_opcode(cbuf, 0x50+$src2$$reg );
3042 3085 // CALL directly to the runtime
3043 3086 cbuf.set_inst_mark();
3044 3087 emit_opcode(cbuf,0xE8); // Call into runtime
3045 3088 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3046 3089 // Restore stack
3047 3090 emit_opcode(cbuf, 0x83); // add SP, #framesize
3048 3091 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3049 3092 emit_d8(cbuf, 4*4);
3050 3093 %}
3051 3094
3052 3095 enc_class long_mod( eRegL src1, eRegL src2 ) %{
3053 3096 // PUSH src1.hi
3054 3097 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3055 3098 // PUSH src1.lo
3056 3099 emit_opcode(cbuf, 0x50+$src1$$reg );
3057 3100 // PUSH src2.hi
3058 3101 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3059 3102 // PUSH src2.lo
3060 3103 emit_opcode(cbuf, 0x50+$src2$$reg );
3061 3104 // CALL directly to the runtime
3062 3105 cbuf.set_inst_mark();
3063 3106 emit_opcode(cbuf,0xE8); // Call into runtime
3064 3107 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3065 3108 // Restore stack
3066 3109 emit_opcode(cbuf, 0x83); // add SP, #framesize
3067 3110 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3068 3111 emit_d8(cbuf, 4*4);
3069 3112 %}
3070 3113
3071 3114 enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
3072 3115 // MOV $tmp,$src.lo
3073 3116 emit_opcode(cbuf, 0x8B);
3074 3117 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
3075 3118 // OR $tmp,$src.hi
3076 3119 emit_opcode(cbuf, 0x0B);
3077 3120 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
3078 3121 %}
3079 3122
3080 3123 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
3081 3124 // CMP $src1.lo,$src2.lo
3082 3125 emit_opcode( cbuf, 0x3B );
3083 3126 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3084 3127 // JNE,s skip
3085 3128 emit_cc(cbuf, 0x70, 0x5);
3086 3129 emit_d8(cbuf,2);
3087 3130 // CMP $src1.hi,$src2.hi
3088 3131 emit_opcode( cbuf, 0x3B );
3089 3132 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3090 3133 %}
3091 3134
3092 3135 enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
3093 3136 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
3094 3137 emit_opcode( cbuf, 0x3B );
3095 3138 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3096 3139 // MOV $tmp,$src1.hi
3097 3140 emit_opcode( cbuf, 0x8B );
3098 3141 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
3099 3142 // SBB $tmp,$src2.hi\t! Compute flags for long compare
3100 3143 emit_opcode( cbuf, 0x1B );
3101 3144 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
3102 3145 %}
3103 3146
3104 3147 enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
3105 3148 // XOR $tmp,$tmp
3106 3149 emit_opcode(cbuf,0x33); // XOR
3107 3150 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3108 3151 // CMP $tmp,$src.lo
3109 3152 emit_opcode( cbuf, 0x3B );
3110 3153 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3111 3154 // SBB $tmp,$src.hi
3112 3155 emit_opcode( cbuf, 0x1B );
3113 3156 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3114 3157 %}
3115 3158
3116 3159 // Sniff, sniff... smells like Gnu Superoptimizer
3117 3160 enc_class neg_long( eRegL dst ) %{
3118 3161 emit_opcode(cbuf,0xF7); // NEG hi
3119 3162 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3120 3163 emit_opcode(cbuf,0xF7); // NEG lo
3121 3164 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
3122 3165 emit_opcode(cbuf,0x83); // SBB hi,0
3123 3166 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3124 3167 emit_d8 (cbuf,0 );
3125 3168 %}
3126 3169
3127 3170 enc_class movq_ld(regXD dst, memory mem) %{
3128 3171 MacroAssembler _masm(&cbuf);
3129 3172 __ movq($dst$$XMMRegister, $mem$$Address);
3130 3173 %}
3131 3174
3132 3175 enc_class movq_st(memory mem, regXD src) %{
3133 3176 MacroAssembler _masm(&cbuf);
3134 3177 __ movq($mem$$Address, $src$$XMMRegister);
3135 3178 %}
3136 3179
3137 3180 enc_class pshufd_8x8(regX dst, regX src) %{
3138 3181 MacroAssembler _masm(&cbuf);
3139 3182
3140 3183 encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3141 3184 __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3142 3185 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3143 3186 %}
3144 3187
3145 3188 enc_class pshufd_4x16(regX dst, regX src) %{
3146 3189 MacroAssembler _masm(&cbuf);
3147 3190
3148 3191 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3149 3192 %}
3150 3193
3151 3194 enc_class pshufd(regXD dst, regXD src, int mode) %{
3152 3195 MacroAssembler _masm(&cbuf);
3153 3196
3154 3197 __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3155 3198 %}
3156 3199
3157 3200 enc_class pxor(regXD dst, regXD src) %{
3158 3201 MacroAssembler _masm(&cbuf);
3159 3202
3160 3203 __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3161 3204 %}
3162 3205
3163 3206 enc_class mov_i2x(regXD dst, eRegI src) %{
3164 3207 MacroAssembler _masm(&cbuf);
3165 3208
3166 3209 __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3167 3210 %}
3168 3211
3169 3212
3170 3213 // Because the transitions from emitted code to the runtime
3171 3214 // monitorenter/exit helper stubs are so slow it's critical that
3172 3215 // we inline both the stack-locking fast-path and the inflated fast path.
3173 3216 //
3174 3217 // See also: cmpFastLock and cmpFastUnlock.
3175 3218 //
3176 3219 // What follows is a specialized inline transliteration of the code
3177 3220 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
3178 3221 // another option would be to emit TrySlowEnter and TrySlowExit methods
3179 3222 // at startup-time. These methods would accept arguments as
3180 3223 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3181 3224 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
3182 3225 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3183 3226 // In practice, however, the # of lock sites is bounded and is usually small.
3184 3227 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3185 3228 // if the processor uses simple bimodal branch predictors keyed by EIP
3186 3229 // Since the helper routines would be called from multiple synchronization
3187 3230 // sites.
3188 3231 //
3189 3232 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3190 3233 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3191 3234 // to those specialized methods. That'd give us a mostly platform-independent
3192 3235 // implementation that the JITs could optimize and inline at their pleasure.
3193 3236 // Done correctly, the only time we'd need to cross to native could would be
3194 3237 // to park() or unpark() threads. We'd also need a few more unsafe operators
3195 3238 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3196 3239 // (b) explicit barriers or fence operations.
3197 3240 //
3198 3241 // TODO:
3199 3242 //
3200 3243 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3201 3244 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3202 3245 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
3203 3246 // the lock operators would typically be faster than reifying Self.
3204 3247 //
3205 3248 // * Ideally I'd define the primitives as:
3206 3249 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3207 3250 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3208 3251 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
3209 3252 // Instead, we're stuck with a rather awkward and brittle register assignments below.
3210 3253 // Furthermore the register assignments are overconstrained, possibly resulting in
3211 3254 // sub-optimal code near the synchronization site.
3212 3255 //
3213 3256 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
3214 3257 // Alternately, use a better sp-proximity test.
3215 3258 //
3216 3259 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3217 3260 // Either one is sufficient to uniquely identify a thread.
3218 3261 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3219 3262 //
3220 3263 // * Intrinsify notify() and notifyAll() for the common cases where the
3221 3264 // object is locked by the calling thread but the waitlist is empty.
3222 3265 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3223 3266 //
3224 3267 // * use jccb and jmpb instead of jcc and jmp to improve code density.
3225 3268 // But beware of excessive branch density on AMD Opterons.
3226 3269 //
3227 3270 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3228 3271 // or failure of the fast-path. If the fast-path fails then we pass
3229 3272 // control to the slow-path, typically in C. In Fast_Lock and
3230 3273 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3231 3274 // will emit a conditional branch immediately after the node.
3232 3275 // So we have branches to branches and lots of ICC.ZF games.
3233 3276 // Instead, it might be better to have C2 pass a "FailureLabel"
3234 3277 // into Fast_Lock and Fast_Unlock. In the case of success, control
3235 3278 // will drop through the node. ICC.ZF is undefined at exit.
3236 3279 // In the case of failure, the node will branch directly to the
3237 3280 // FailureLabel
3238 3281
3239 3282
3240 3283 // obj: object to lock
3241 3284 // box: on-stack box address (displaced header location) - KILLED
3242 3285 // rax,: tmp -- KILLED
3243 3286 // scr: tmp -- KILLED
3244 3287 enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3245 3288
3246 3289 Register objReg = as_Register($obj$$reg);
3247 3290 Register boxReg = as_Register($box$$reg);
3248 3291 Register tmpReg = as_Register($tmp$$reg);
3249 3292 Register scrReg = as_Register($scr$$reg);
3250 3293
3251 3294 // Ensure the register assignents are disjoint
3252 3295 guarantee (objReg != boxReg, "") ;
3253 3296 guarantee (objReg != tmpReg, "") ;
3254 3297 guarantee (objReg != scrReg, "") ;
3255 3298 guarantee (boxReg != tmpReg, "") ;
3256 3299 guarantee (boxReg != scrReg, "") ;
3257 3300 guarantee (tmpReg == as_Register(EAX_enc), "") ;
3258 3301
3259 3302 MacroAssembler masm(&cbuf);
3260 3303
3261 3304 if (_counters != NULL) {
3262 3305 masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3263 3306 }
3264 3307 if (EmitSync & 1) {
3265 3308 // set box->dhw = unused_mark (3)
3266 3309 // Force all sync thru slow-path: slow_enter() and slow_exit()
3267 3310 masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;
3268 3311 masm.cmpptr (rsp, (int32_t)0) ;
3269 3312 } else
3270 3313 if (EmitSync & 2) {
3271 3314 Label DONE_LABEL ;
3272 3315 if (UseBiasedLocking) {
3273 3316 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3274 3317 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3275 3318 }
3276 3319
3277 3320 masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword
3278 3321 masm.orptr (tmpReg, 0x1);
3279 3322 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
3280 3323 if (os::is_MP()) { masm.lock(); }
3281 3324 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3282 3325 masm.jcc(Assembler::equal, DONE_LABEL);
3283 3326 // Recursive locking
3284 3327 masm.subptr(tmpReg, rsp);
3285 3328 masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3286 3329 masm.movptr(Address(boxReg, 0), tmpReg);
3287 3330 masm.bind(DONE_LABEL) ;
3288 3331 } else {
3289 3332 // Possible cases that we'll encounter in fast_lock
3290 3333 // ------------------------------------------------
3291 3334 // * Inflated
3292 3335 // -- unlocked
3293 3336 // -- Locked
3294 3337 // = by self
3295 3338 // = by other
3296 3339 // * biased
3297 3340 // -- by Self
3298 3341 // -- by other
3299 3342 // * neutral
3300 3343 // * stack-locked
3301 3344 // -- by self
3302 3345 // = sp-proximity test hits
3303 3346 // = sp-proximity test generates false-negative
3304 3347 // -- by other
3305 3348 //
3306 3349
3307 3350 Label IsInflated, DONE_LABEL, PopDone ;
3308 3351
3309 3352 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3310 3353 // order to reduce the number of conditional branches in the most common cases.
3311 3354 // Beware -- there's a subtle invariant that fetch of the markword
3312 3355 // at [FETCH], below, will never observe a biased encoding (*101b).
3313 3356 // If this invariant is not held we risk exclusion (safety) failure.
3314 3357 if (UseBiasedLocking && !UseOptoBiasInlining) {
3315 3358 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3316 3359 }
3317 3360
3318 3361 masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
3319 3362 masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral)
3320 3363 masm.jccb (Assembler::notZero, IsInflated) ;
3321 3364
3322 3365 // Attempt stack-locking ...
3323 3366 masm.orptr (tmpReg, 0x1);
3324 3367 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
3325 3368 if (os::is_MP()) { masm.lock(); }
3326 3369 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3327 3370 if (_counters != NULL) {
3328 3371 masm.cond_inc32(Assembler::equal,
3329 3372 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3330 3373 }
3331 3374 masm.jccb (Assembler::equal, DONE_LABEL);
3332 3375
3333 3376 // Recursive locking
3334 3377 masm.subptr(tmpReg, rsp);
3335 3378 masm.andptr(tmpReg, 0xFFFFF003 );
3336 3379 masm.movptr(Address(boxReg, 0), tmpReg);
3337 3380 if (_counters != NULL) {
3338 3381 masm.cond_inc32(Assembler::equal,
3339 3382 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3340 3383 }
3341 3384 masm.jmp (DONE_LABEL) ;
3342 3385
3343 3386 masm.bind (IsInflated) ;
3344 3387
3345 3388 // The object is inflated.
3346 3389 //
3347 3390 // TODO-FIXME: eliminate the ugly use of manifest constants:
3348 3391 // Use markOopDesc::monitor_value instead of "2".
3349 3392 // use markOop::unused_mark() instead of "3".
3350 3393 // The tmpReg value is an objectMonitor reference ORed with
3351 3394 // markOopDesc::monitor_value (2). We can either convert tmpReg to an
3352 3395 // objectmonitor pointer by masking off the "2" bit or we can just
3353 3396 // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3354 3397 // field offsets with "-2" to compensate for and annul the low-order tag bit.
3355 3398 //
3356 3399 // I use the latter as it avoids AGI stalls.
3357 3400 // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3358 3401 // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3359 3402 //
3360 3403 #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3361 3404
3362 3405 // boxReg refers to the on-stack BasicLock in the current frame.
3363 3406 // We'd like to write:
3364 3407 // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
3365 3408 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
3366 3409 // additional latency as we have another ST in the store buffer that must drain.
3367 3410
3368 3411 if (EmitSync & 8192) {
3369 3412 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
3370 3413 masm.get_thread (scrReg) ;
3371 3414 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
3372 3415 masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov
3373 3416 if (os::is_MP()) { masm.lock(); }
3374 3417 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3375 3418 } else
3376 3419 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
3377 3420 masm.movptr(scrReg, boxReg) ;
3378 3421 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
3379 3422
3380 3423 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3381 3424 if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3382 3425 // prefetchw [eax + Offset(_owner)-2]
3383 3426 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3384 3427 }
3385 3428
3386 3429 if ((EmitSync & 64) == 0) {
3387 3430 // Optimistic form: consider XORL tmpReg,tmpReg
3388 3431 masm.movptr(tmpReg, NULL_WORD) ;
3389 3432 } else {
3390 3433 // Can suffer RTS->RTO upgrades on shared or cold $ lines
3391 3434 // Test-And-CAS instead of CAS
3392 3435 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
3393 3436 masm.testptr(tmpReg, tmpReg) ; // Locked ?
3394 3437 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3395 3438 }
3396 3439
3397 3440 // Appears unlocked - try to swing _owner from null to non-null.
3398 3441 // Ideally, I'd manifest "Self" with get_thread and then attempt
3399 3442 // to CAS the register containing Self into m->Owner.
3400 3443 // But we don't have enough registers, so instead we can either try to CAS
3401 3444 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
3402 3445 // we later store "Self" into m->Owner. Transiently storing a stack address
3403 3446 // (rsp or the address of the box) into m->owner is harmless.
3404 3447 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
3405 3448 if (os::is_MP()) { masm.lock(); }
3406 3449 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3407 3450 masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3
3408 3451 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3409 3452 masm.get_thread (scrReg) ; // beware: clobbers ICCs
3410 3453 masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
3411 3454 masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success
3412 3455
3413 3456 // If the CAS fails we can either retry or pass control to the slow-path.
3414 3457 // We use the latter tactic.
3415 3458 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3416 3459 // If the CAS was successful ...
3417 3460 // Self has acquired the lock
3418 3461 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3419 3462 // Intentional fall-through into DONE_LABEL ...
3420 3463 } else {
3421 3464 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
3422 3465 masm.movptr(boxReg, tmpReg) ;
3423 3466
3424 3467 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3425 3468 if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3426 3469 // prefetchw [eax + Offset(_owner)-2]
3427 3470 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3428 3471 }
3429 3472
3430 3473 if ((EmitSync & 64) == 0) {
3431 3474 // Optimistic form
3432 3475 masm.xorptr (tmpReg, tmpReg) ;
3433 3476 } else {
3434 3477 // Can suffer RTS->RTO upgrades on shared or cold $ lines
3435 3478 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
3436 3479 masm.testptr(tmpReg, tmpReg) ; // Locked ?
3437 3480 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3438 3481 }
3439 3482
3440 3483 // Appears unlocked - try to swing _owner from null to non-null.
3441 3484 // Use either "Self" (in scr) or rsp as thread identity in _owner.
3442 3485 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
3443 3486 masm.get_thread (scrReg) ;
3444 3487 if (os::is_MP()) { masm.lock(); }
3445 3488 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3446 3489
3447 3490 // If the CAS fails we can either retry or pass control to the slow-path.
3448 3491 // We use the latter tactic.
3449 3492 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3450 3493 // If the CAS was successful ...
3451 3494 // Self has acquired the lock
3452 3495 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3453 3496 // Intentional fall-through into DONE_LABEL ...
3454 3497 }
3455 3498
3456 3499 // DONE_LABEL is a hot target - we'd really like to place it at the
3457 3500 // start of cache line by padding with NOPs.
3458 3501 // See the AMD and Intel software optimization manuals for the
3459 3502 // most efficient "long" NOP encodings.
3460 3503 // Unfortunately none of our alignment mechanisms suffice.
3461 3504 masm.bind(DONE_LABEL);
3462 3505
3463 3506 // Avoid branch-to-branch on AMD processors
3464 3507 // This appears to be superstition.
3465 3508 if (EmitSync & 32) masm.nop() ;
3466 3509
3467 3510
3468 3511 // At DONE_LABEL the icc ZFlag is set as follows ...
3469 3512 // Fast_Unlock uses the same protocol.
3470 3513 // ZFlag == 1 -> Success
3471 3514 // ZFlag == 0 -> Failure - force control through the slow-path
3472 3515 }
3473 3516 %}
3474 3517
3475 3518 // obj: object to unlock
3476 3519 // box: box address (displaced header location), killed. Must be EAX.
3477 3520 // rbx,: killed tmp; cannot be obj nor box.
3478 3521 //
3479 3522 // Some commentary on balanced locking:
3480 3523 //
3481 3524 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3482 3525 // Methods that don't have provably balanced locking are forced to run in the
3483 3526 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3484 3527 // The interpreter provides two properties:
3485 3528 // I1: At return-time the interpreter automatically and quietly unlocks any
3486 3529 // objects acquired the current activation (frame). Recall that the
3487 3530 // interpreter maintains an on-stack list of locks currently held by
3488 3531 // a frame.
3489 3532 // I2: If a method attempts to unlock an object that is not held by the
3490 3533 // the frame the interpreter throws IMSX.
3491 3534 //
3492 3535 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3493 3536 // B() doesn't have provably balanced locking so it runs in the interpreter.
3494 3537 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3495 3538 // is still locked by A().
3496 3539 //
3497 3540 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3498 3541 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3499 3542 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3500 3543 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3501 3544
3502 3545 enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3503 3546
3504 3547 Register objReg = as_Register($obj$$reg);
3505 3548 Register boxReg = as_Register($box$$reg);
3506 3549 Register tmpReg = as_Register($tmp$$reg);
3507 3550
3508 3551 guarantee (objReg != boxReg, "") ;
3509 3552 guarantee (objReg != tmpReg, "") ;
3510 3553 guarantee (boxReg != tmpReg, "") ;
3511 3554 guarantee (boxReg == as_Register(EAX_enc), "") ;
3512 3555 MacroAssembler masm(&cbuf);
3513 3556
3514 3557 if (EmitSync & 4) {
3515 3558 // Disable - inhibit all inlining. Force control through the slow-path
3516 3559 masm.cmpptr (rsp, 0) ;
3517 3560 } else
3518 3561 if (EmitSync & 8) {
3519 3562 Label DONE_LABEL ;
3520 3563 if (UseBiasedLocking) {
3521 3564 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3522 3565 }
3523 3566 // classic stack-locking code ...
3524 3567 masm.movptr(tmpReg, Address(boxReg, 0)) ;
3525 3568 masm.testptr(tmpReg, tmpReg) ;
3526 3569 masm.jcc (Assembler::zero, DONE_LABEL) ;
3527 3570 if (os::is_MP()) { masm.lock(); }
3528 3571 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3529 3572 masm.bind(DONE_LABEL);
3530 3573 } else {
3531 3574 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3532 3575
3533 3576 // Critically, the biased locking test must have precedence over
3534 3577 // and appear before the (box->dhw == 0) recursive stack-lock test.
3535 3578 if (UseBiasedLocking && !UseOptoBiasInlining) {
3536 3579 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3537 3580 }
3538 3581
3539 3582 masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header
3540 3583 masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3541 3584 masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock
3542 3585
3543 3586 masm.testptr(tmpReg, 0x02) ; // Inflated?
3544 3587 masm.jccb (Assembler::zero, Stacked) ;
3545 3588
3546 3589 masm.bind (Inflated) ;
3547 3590 // It's inflated.
3548 3591 // Despite our balanced locking property we still check that m->_owner == Self
3549 3592 // as java routines or native JNI code called by this thread might
3550 3593 // have released the lock.
3551 3594 // Refer to the comments in synchronizer.cpp for how we might encode extra
3552 3595 // state in _succ so we can avoid fetching EntryList|cxq.
3553 3596 //
3554 3597 // I'd like to add more cases in fast_lock() and fast_unlock() --
3555 3598 // such as recursive enter and exit -- but we have to be wary of
3556 3599 // I$ bloat, T$ effects and BP$ effects.
3557 3600 //
3558 3601 // If there's no contention try a 1-0 exit. That is, exit without
3559 3602 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3560 3603 // we detect and recover from the race that the 1-0 exit admits.
3561 3604 //
3562 3605 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3563 3606 // before it STs null into _owner, releasing the lock. Updates
3564 3607 // to data protected by the critical section must be visible before
3565 3608 // we drop the lock (and thus before any other thread could acquire
3566 3609 // the lock and observe the fields protected by the lock).
3567 3610 // IA32's memory-model is SPO, so STs are ordered with respect to
3568 3611 // each other and there's no need for an explicit barrier (fence).
3569 3612 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3570 3613
3571 3614 masm.get_thread (boxReg) ;
3572 3615 if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
3573 3616 // prefetchw [ebx + Offset(_owner)-2]
3574 3617 masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3575 3618 }
3576 3619
3577 3620 // Note that we could employ various encoding schemes to reduce
3578 3621 // the number of loads below (currently 4) to just 2 or 3.
3579 3622 // Refer to the comments in synchronizer.cpp.
3580 3623 // In practice the chain of fetches doesn't seem to impact performance, however.
3581 3624 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3582 3625 // Attempt to reduce branch density - AMD's branch predictor.
3583 3626 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3584 3627 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3585 3628 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3586 3629 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3587 3630 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3588 3631 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3589 3632 masm.jmpb (DONE_LABEL) ;
3590 3633 } else {
3591 3634 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3592 3635 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3593 3636 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3594 3637 masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3595 3638 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3596 3639 masm.jccb (Assembler::notZero, CheckSucc) ;
3597 3640 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3598 3641 masm.jmpb (DONE_LABEL) ;
3599 3642 }
3600 3643
3601 3644 // The Following code fragment (EmitSync & 65536) improves the performance of
3602 3645 // contended applications and contended synchronization microbenchmarks.
3603 3646 // Unfortunately the emission of the code - even though not executed - causes regressions
3604 3647 // in scimark and jetstream, evidently because of $ effects. Replacing the code
3605 3648 // with an equal number of never-executed NOPs results in the same regression.
3606 3649 // We leave it off by default.
3607 3650
3608 3651 if ((EmitSync & 65536) != 0) {
3609 3652 Label LSuccess, LGoSlowPath ;
3610 3653
3611 3654 masm.bind (CheckSucc) ;
3612 3655
3613 3656 // Optional pre-test ... it's safe to elide this
3614 3657 if ((EmitSync & 16) == 0) {
3615 3658 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3616 3659 masm.jccb (Assembler::zero, LGoSlowPath) ;
3617 3660 }
3618 3661
3619 3662 // We have a classic Dekker-style idiom:
3620 3663 // ST m->_owner = 0 ; MEMBAR; LD m->_succ
3621 3664 // There are a number of ways to implement the barrier:
3622 3665 // (1) lock:andl &m->_owner, 0
3623 3666 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3624 3667 // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3625 3668 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3626 3669 // (2) If supported, an explicit MFENCE is appealing.
3627 3670 // In older IA32 processors MFENCE is slower than lock:add or xchg
3628 3671 // particularly if the write-buffer is full as might be the case if
3629 3672 // if stores closely precede the fence or fence-equivalent instruction.
3630 3673 // In more modern implementations MFENCE appears faster, however.
3631 3674 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3632 3675 // The $lines underlying the top-of-stack should be in M-state.
3633 3676 // The locked add instruction is serializing, of course.
3634 3677 // (4) Use xchg, which is serializing
3635 3678 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3636 3679 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3637 3680 // The integer condition codes will tell us if succ was 0.
3638 3681 // Since _succ and _owner should reside in the same $line and
3639 3682 // we just stored into _owner, it's likely that the $line
3640 3683 // remains in M-state for the lock:orl.
3641 3684 //
3642 3685 // We currently use (3), although it's likely that switching to (2)
3643 3686 // is correct for the future.
3644 3687
3645 3688 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3646 3689 if (os::is_MP()) {
3647 3690 if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
3648 3691 masm.mfence();
3649 3692 } else {
3650 3693 masm.lock () ; masm.addptr(Address(rsp, 0), 0) ;
3651 3694 }
3652 3695 }
3653 3696 // Ratify _succ remains non-null
3654 3697 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3655 3698 masm.jccb (Assembler::notZero, LSuccess) ;
3656 3699
3657 3700 masm.xorptr(boxReg, boxReg) ; // box is really EAX
3658 3701 if (os::is_MP()) { masm.lock(); }
3659 3702 masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3660 3703 masm.jccb (Assembler::notEqual, LSuccess) ;
3661 3704 // Since we're low on registers we installed rsp as a placeholding in _owner.
3662 3705 // Now install Self over rsp. This is safe as we're transitioning from
3663 3706 // non-null to non=null
3664 3707 masm.get_thread (boxReg) ;
3665 3708 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3666 3709 // Intentional fall-through into LGoSlowPath ...
3667 3710
3668 3711 masm.bind (LGoSlowPath) ;
3669 3712 masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure
3670 3713 masm.jmpb (DONE_LABEL) ;
3671 3714
3672 3715 masm.bind (LSuccess) ;
3673 3716 masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success
3674 3717 masm.jmpb (DONE_LABEL) ;
3675 3718 }
3676 3719
3677 3720 masm.bind (Stacked) ;
3678 3721 // It's not inflated and it's not recursively stack-locked and it's not biased.
3679 3722 // It must be stack-locked.
3680 3723 // Try to reset the header to displaced header.
3681 3724 // The "box" value on the stack is stable, so we can reload
3682 3725 // and be assured we observe the same value as above.
3683 3726 masm.movptr(tmpReg, Address(boxReg, 0)) ;
3684 3727 if (os::is_MP()) { masm.lock(); }
3685 3728 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3686 3729 // Intention fall-thru into DONE_LABEL
3687 3730
3688 3731
3689 3732 // DONE_LABEL is a hot target - we'd really like to place it at the
3690 3733 // start of cache line by padding with NOPs.
3691 3734 // See the AMD and Intel software optimization manuals for the
3692 3735 // most efficient "long" NOP encodings.
3693 3736 // Unfortunately none of our alignment mechanisms suffice.
3694 3737 if ((EmitSync & 65536) == 0) {
3695 3738 masm.bind (CheckSucc) ;
3696 3739 }
3697 3740 masm.bind(DONE_LABEL);
3698 3741
3699 3742 // Avoid branch to branch on AMD processors
3700 3743 if (EmitSync & 32768) { masm.nop() ; }
3701 3744 }
3702 3745 %}
3703 3746
3704 3747
3705 3748 enc_class enc_pop_rdx() %{
3706 3749 emit_opcode(cbuf,0x5A);
3707 3750 %}
3708 3751
3709 3752 enc_class enc_rethrow() %{
3710 3753 cbuf.set_inst_mark();
3711 3754 emit_opcode(cbuf, 0xE9); // jmp entry
3712 3755 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.code_end())-4,
3713 3756 runtime_call_Relocation::spec(), RELOC_IMM32 );
3714 3757 %}
3715 3758
3716 3759
3717 3760 // Convert a double to an int. Java semantics require we do complex
3718 3761 // manglelations in the corner cases. So we set the rounding mode to
3719 3762 // 'zero', store the darned double down as an int, and reset the
3720 3763 // rounding mode to 'nearest'. The hardware throws an exception which
3721 3764 // patches up the correct value directly to the stack.
3722 3765 enc_class D2I_encoding( regD src ) %{
3723 3766 // Flip to round-to-zero mode. We attempted to allow invalid-op
3724 3767 // exceptions here, so that a NAN or other corner-case value will
3725 3768 // thrown an exception (but normal values get converted at full speed).
3726 3769 // However, I2C adapters and other float-stack manglers leave pending
3727 3770 // invalid-op exceptions hanging. We would have to clear them before
3728 3771 // enabling them and that is more expensive than just testing for the
3729 3772 // invalid value Intel stores down in the corner cases.
3730 3773 emit_opcode(cbuf,0xD9); // FLDCW trunc
3731 3774 emit_opcode(cbuf,0x2D);
3732 3775 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3733 3776 // Allocate a word
3734 3777 emit_opcode(cbuf,0x83); // SUB ESP,4
3735 3778 emit_opcode(cbuf,0xEC);
3736 3779 emit_d8(cbuf,0x04);
3737 3780 // Encoding assumes a double has been pushed into FPR0.
3738 3781 // Store down the double as an int, popping the FPU stack
3739 3782 emit_opcode(cbuf,0xDB); // FISTP [ESP]
3740 3783 emit_opcode(cbuf,0x1C);
3741 3784 emit_d8(cbuf,0x24);
3742 3785 // Restore the rounding mode; mask the exception
3743 3786 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3744 3787 emit_opcode(cbuf,0x2D);
3745 3788 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3746 3789 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3747 3790 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3748 3791
3749 3792 // Load the converted int; adjust CPU stack
3750 3793 emit_opcode(cbuf,0x58); // POP EAX
3751 3794 emit_opcode(cbuf,0x3D); // CMP EAX,imm
3752 3795 emit_d32 (cbuf,0x80000000); // 0x80000000
3753 3796 emit_opcode(cbuf,0x75); // JNE around_slow_call
3754 3797 emit_d8 (cbuf,0x07); // Size of slow_call
3755 3798 // Push src onto stack slow-path
3756 3799 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3757 3800 emit_d8 (cbuf,0xC0-1+$src$$reg );
3758 3801 // CALL directly to the runtime
3759 3802 cbuf.set_inst_mark();
3760 3803 emit_opcode(cbuf,0xE8); // Call into runtime
3761 3804 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3762 3805 // Carry on here...
3763 3806 %}
3764 3807
3765 3808 enc_class D2L_encoding( regD src ) %{
3766 3809 emit_opcode(cbuf,0xD9); // FLDCW trunc
3767 3810 emit_opcode(cbuf,0x2D);
3768 3811 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3769 3812 // Allocate a word
3770 3813 emit_opcode(cbuf,0x83); // SUB ESP,8
3771 3814 emit_opcode(cbuf,0xEC);
3772 3815 emit_d8(cbuf,0x08);
3773 3816 // Encoding assumes a double has been pushed into FPR0.
3774 3817 // Store down the double as a long, popping the FPU stack
3775 3818 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3776 3819 emit_opcode(cbuf,0x3C);
3777 3820 emit_d8(cbuf,0x24);
3778 3821 // Restore the rounding mode; mask the exception
3779 3822 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3780 3823 emit_opcode(cbuf,0x2D);
3781 3824 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3782 3825 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3783 3826 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3784 3827
3785 3828 // Load the converted int; adjust CPU stack
3786 3829 emit_opcode(cbuf,0x58); // POP EAX
3787 3830 emit_opcode(cbuf,0x5A); // POP EDX
3788 3831 emit_opcode(cbuf,0x81); // CMP EDX,imm
3789 3832 emit_d8 (cbuf,0xFA); // rdx
3790 3833 emit_d32 (cbuf,0x80000000); // 0x80000000
3791 3834 emit_opcode(cbuf,0x75); // JNE around_slow_call
3792 3835 emit_d8 (cbuf,0x07+4); // Size of slow_call
3793 3836 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3794 3837 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3795 3838 emit_opcode(cbuf,0x75); // JNE around_slow_call
3796 3839 emit_d8 (cbuf,0x07); // Size of slow_call
3797 3840 // Push src onto stack slow-path
3798 3841 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3799 3842 emit_d8 (cbuf,0xC0-1+$src$$reg );
3800 3843 // CALL directly to the runtime
3801 3844 cbuf.set_inst_mark();
3802 3845 emit_opcode(cbuf,0xE8); // Call into runtime
3803 3846 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3804 3847 // Carry on here...
3805 3848 %}
3806 3849
3807 3850 enc_class X2L_encoding( regX src ) %{
3808 3851 // Allocate a word
3809 3852 emit_opcode(cbuf,0x83); // SUB ESP,8
3810 3853 emit_opcode(cbuf,0xEC);
3811 3854 emit_d8(cbuf,0x08);
3812 3855
3813 3856 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
3814 3857 emit_opcode (cbuf, 0x0F );
3815 3858 emit_opcode (cbuf, 0x11 );
3816 3859 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3817 3860
3818 3861 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
3819 3862 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3820 3863
3821 3864 emit_opcode(cbuf,0xD9); // FLDCW trunc
3822 3865 emit_opcode(cbuf,0x2D);
3823 3866 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3824 3867
3825 3868 // Encoding assumes a double has been pushed into FPR0.
3826 3869 // Store down the double as a long, popping the FPU stack
3827 3870 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3828 3871 emit_opcode(cbuf,0x3C);
3829 3872 emit_d8(cbuf,0x24);
3830 3873
3831 3874 // Restore the rounding mode; mask the exception
3832 3875 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3833 3876 emit_opcode(cbuf,0x2D);
3834 3877 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3835 3878 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3836 3879 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3837 3880
3838 3881 // Load the converted int; adjust CPU stack
3839 3882 emit_opcode(cbuf,0x58); // POP EAX
3840 3883
3841 3884 emit_opcode(cbuf,0x5A); // POP EDX
3842 3885
3843 3886 emit_opcode(cbuf,0x81); // CMP EDX,imm
3844 3887 emit_d8 (cbuf,0xFA); // rdx
3845 3888 emit_d32 (cbuf,0x80000000);// 0x80000000
3846 3889
3847 3890 emit_opcode(cbuf,0x75); // JNE around_slow_call
3848 3891 emit_d8 (cbuf,0x13+4); // Size of slow_call
3849 3892
3850 3893 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3851 3894 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3852 3895
3853 3896 emit_opcode(cbuf,0x75); // JNE around_slow_call
3854 3897 emit_d8 (cbuf,0x13); // Size of slow_call
3855 3898
3856 3899 // Allocate a word
3857 3900 emit_opcode(cbuf,0x83); // SUB ESP,4
3858 3901 emit_opcode(cbuf,0xEC);
3859 3902 emit_d8(cbuf,0x04);
3860 3903
3861 3904 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
3862 3905 emit_opcode (cbuf, 0x0F );
3863 3906 emit_opcode (cbuf, 0x11 );
3864 3907 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3865 3908
3866 3909 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
3867 3910 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3868 3911
3869 3912 emit_opcode(cbuf,0x83); // ADD ESP,4
3870 3913 emit_opcode(cbuf,0xC4);
3871 3914 emit_d8(cbuf,0x04);
3872 3915
3873 3916 // CALL directly to the runtime
3874 3917 cbuf.set_inst_mark();
3875 3918 emit_opcode(cbuf,0xE8); // Call into runtime
3876 3919 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3877 3920 // Carry on here...
3878 3921 %}
3879 3922
3880 3923 enc_class XD2L_encoding( regXD src ) %{
3881 3924 // Allocate a word
3882 3925 emit_opcode(cbuf,0x83); // SUB ESP,8
3883 3926 emit_opcode(cbuf,0xEC);
3884 3927 emit_d8(cbuf,0x08);
3885 3928
3886 3929 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
3887 3930 emit_opcode (cbuf, 0x0F );
3888 3931 emit_opcode (cbuf, 0x11 );
3889 3932 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3890 3933
3891 3934 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
3892 3935 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3893 3936
3894 3937 emit_opcode(cbuf,0xD9); // FLDCW trunc
3895 3938 emit_opcode(cbuf,0x2D);
3896 3939 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3897 3940
3898 3941 // Encoding assumes a double has been pushed into FPR0.
3899 3942 // Store down the double as a long, popping the FPU stack
3900 3943 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3901 3944 emit_opcode(cbuf,0x3C);
3902 3945 emit_d8(cbuf,0x24);
3903 3946
3904 3947 // Restore the rounding mode; mask the exception
3905 3948 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3906 3949 emit_opcode(cbuf,0x2D);
3907 3950 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3908 3951 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3909 3952 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3910 3953
3911 3954 // Load the converted int; adjust CPU stack
3912 3955 emit_opcode(cbuf,0x58); // POP EAX
3913 3956
3914 3957 emit_opcode(cbuf,0x5A); // POP EDX
3915 3958
3916 3959 emit_opcode(cbuf,0x81); // CMP EDX,imm
3917 3960 emit_d8 (cbuf,0xFA); // rdx
3918 3961 emit_d32 (cbuf,0x80000000); // 0x80000000
3919 3962
3920 3963 emit_opcode(cbuf,0x75); // JNE around_slow_call
3921 3964 emit_d8 (cbuf,0x13+4); // Size of slow_call
3922 3965
3923 3966 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3924 3967 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3925 3968
3926 3969 emit_opcode(cbuf,0x75); // JNE around_slow_call
3927 3970 emit_d8 (cbuf,0x13); // Size of slow_call
3928 3971
3929 3972 // Push src onto stack slow-path
3930 3973 // Allocate a word
3931 3974 emit_opcode(cbuf,0x83); // SUB ESP,8
3932 3975 emit_opcode(cbuf,0xEC);
3933 3976 emit_d8(cbuf,0x08);
3934 3977
3935 3978 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
3936 3979 emit_opcode (cbuf, 0x0F );
3937 3980 emit_opcode (cbuf, 0x11 );
3938 3981 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3939 3982
3940 3983 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
3941 3984 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3942 3985
3943 3986 emit_opcode(cbuf,0x83); // ADD ESP,8
3944 3987 emit_opcode(cbuf,0xC4);
3945 3988 emit_d8(cbuf,0x08);
3946 3989
3947 3990 // CALL directly to the runtime
3948 3991 cbuf.set_inst_mark();
3949 3992 emit_opcode(cbuf,0xE8); // Call into runtime
3950 3993 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3951 3994 // Carry on here...
3952 3995 %}
3953 3996
3954 3997 enc_class D2X_encoding( regX dst, regD src ) %{
3955 3998 // Allocate a word
3956 3999 emit_opcode(cbuf,0x83); // SUB ESP,4
3957 4000 emit_opcode(cbuf,0xEC);
3958 4001 emit_d8(cbuf,0x04);
3959 4002 int pop = 0x02;
3960 4003 if ($src$$reg != FPR1L_enc) {
3961 4004 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
3962 4005 emit_d8( cbuf, 0xC0-1+$src$$reg );
3963 4006 pop = 0x03;
3964 4007 }
3965 4008 store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP]
3966 4009
3967 4010 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
3968 4011 emit_opcode (cbuf, 0x0F );
3969 4012 emit_opcode (cbuf, 0x10 );
3970 4013 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
3971 4014
3972 4015 emit_opcode(cbuf,0x83); // ADD ESP,4
3973 4016 emit_opcode(cbuf,0xC4);
3974 4017 emit_d8(cbuf,0x04);
3975 4018 // Carry on here...
3976 4019 %}
3977 4020
3978 4021 enc_class FX2I_encoding( regX src, eRegI dst ) %{
3979 4022 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
3980 4023
3981 4024 // Compare the result to see if we need to go to the slow path
3982 4025 emit_opcode(cbuf,0x81); // CMP dst,imm
3983 4026 emit_rm (cbuf,0x3,0x7,$dst$$reg);
3984 4027 emit_d32 (cbuf,0x80000000); // 0x80000000
3985 4028
3986 4029 emit_opcode(cbuf,0x75); // JNE around_slow_call
3987 4030 emit_d8 (cbuf,0x13); // Size of slow_call
3988 4031 // Store xmm to a temp memory
3989 4032 // location and push it onto stack.
3990 4033
3991 4034 emit_opcode(cbuf,0x83); // SUB ESP,4
3992 4035 emit_opcode(cbuf,0xEC);
3993 4036 emit_d8(cbuf, $primary ? 0x8 : 0x4);
3994 4037
3995 4038 emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm
3996 4039 emit_opcode (cbuf, 0x0F );
3997 4040 emit_opcode (cbuf, 0x11 );
3998 4041 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3999 4042
4000 4043 emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP]
4001 4044 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4002 4045
4003 4046 emit_opcode(cbuf,0x83); // ADD ESP,4
4004 4047 emit_opcode(cbuf,0xC4);
4005 4048 emit_d8(cbuf, $primary ? 0x8 : 0x4);
4006 4049
4007 4050 // CALL directly to the runtime
4008 4051 cbuf.set_inst_mark();
4009 4052 emit_opcode(cbuf,0xE8); // Call into runtime
4010 4053 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4011 4054
4012 4055 // Carry on here...
4013 4056 %}
4014 4057
4015 4058 enc_class X2D_encoding( regD dst, regX src ) %{
4016 4059 // Allocate a word
4017 4060 emit_opcode(cbuf,0x83); // SUB ESP,4
4018 4061 emit_opcode(cbuf,0xEC);
4019 4062 emit_d8(cbuf,0x04);
4020 4063
4021 4064 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4022 4065 emit_opcode (cbuf, 0x0F );
4023 4066 emit_opcode (cbuf, 0x11 );
4024 4067 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4025 4068
4026 4069 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
4027 4070 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4028 4071
4029 4072 emit_opcode(cbuf,0x83); // ADD ESP,4
4030 4073 emit_opcode(cbuf,0xC4);
4031 4074 emit_d8(cbuf,0x04);
4032 4075
4033 4076 // Carry on here...
4034 4077 %}
4035 4078
4036 4079 enc_class AbsXF_encoding(regX dst) %{
4037 4080 address signmask_address=(address)float_signmask_pool;
4038 4081 // andpd:\tANDPS $dst,[signconst]
4039 4082 emit_opcode(cbuf, 0x0F);
4040 4083 emit_opcode(cbuf, 0x54);
4041 4084 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4042 4085 emit_d32(cbuf, (int)signmask_address);
4043 4086 %}
4044 4087
4045 4088 enc_class AbsXD_encoding(regXD dst) %{
4046 4089 address signmask_address=(address)double_signmask_pool;
4047 4090 // andpd:\tANDPD $dst,[signconst]
4048 4091 emit_opcode(cbuf, 0x66);
4049 4092 emit_opcode(cbuf, 0x0F);
4050 4093 emit_opcode(cbuf, 0x54);
4051 4094 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4052 4095 emit_d32(cbuf, (int)signmask_address);
4053 4096 %}
4054 4097
4055 4098 enc_class NegXF_encoding(regX dst) %{
4056 4099 address signmask_address=(address)float_signflip_pool;
4057 4100 // andpd:\tXORPS $dst,[signconst]
4058 4101 emit_opcode(cbuf, 0x0F);
4059 4102 emit_opcode(cbuf, 0x57);
4060 4103 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4061 4104 emit_d32(cbuf, (int)signmask_address);
4062 4105 %}
4063 4106
4064 4107 enc_class NegXD_encoding(regXD dst) %{
4065 4108 address signmask_address=(address)double_signflip_pool;
4066 4109 // andpd:\tXORPD $dst,[signconst]
4067 4110 emit_opcode(cbuf, 0x66);
4068 4111 emit_opcode(cbuf, 0x0F);
4069 4112 emit_opcode(cbuf, 0x57);
4070 4113 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4071 4114 emit_d32(cbuf, (int)signmask_address);
4072 4115 %}
4073 4116
4074 4117 enc_class FMul_ST_reg( eRegF src1 ) %{
4075 4118 // Operand was loaded from memory into fp ST (stack top)
4076 4119 // FMUL ST,$src /* D8 C8+i */
4077 4120 emit_opcode(cbuf, 0xD8);
4078 4121 emit_opcode(cbuf, 0xC8 + $src1$$reg);
4079 4122 %}
4080 4123
4081 4124 enc_class FAdd_ST_reg( eRegF src2 ) %{
4082 4125 // FADDP ST,src2 /* D8 C0+i */
4083 4126 emit_opcode(cbuf, 0xD8);
4084 4127 emit_opcode(cbuf, 0xC0 + $src2$$reg);
4085 4128 //could use FADDP src2,fpST /* DE C0+i */
4086 4129 %}
4087 4130
4088 4131 enc_class FAddP_reg_ST( eRegF src2 ) %{
4089 4132 // FADDP src2,ST /* DE C0+i */
4090 4133 emit_opcode(cbuf, 0xDE);
4091 4134 emit_opcode(cbuf, 0xC0 + $src2$$reg);
4092 4135 %}
4093 4136
4094 4137 enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
4095 4138 // Operand has been loaded into fp ST (stack top)
4096 4139 // FSUB ST,$src1
4097 4140 emit_opcode(cbuf, 0xD8);
4098 4141 emit_opcode(cbuf, 0xE0 + $src1$$reg);
4099 4142
4100 4143 // FDIV
4101 4144 emit_opcode(cbuf, 0xD8);
4102 4145 emit_opcode(cbuf, 0xF0 + $src2$$reg);
4103 4146 %}
4104 4147
4105 4148 enc_class MulFAddF (eRegF src1, eRegF src2) %{
4106 4149 // Operand was loaded from memory into fp ST (stack top)
4107 4150 // FADD ST,$src /* D8 C0+i */
4108 4151 emit_opcode(cbuf, 0xD8);
4109 4152 emit_opcode(cbuf, 0xC0 + $src1$$reg);
4110 4153
4111 4154 // FMUL ST,src2 /* D8 C*+i */
4112 4155 emit_opcode(cbuf, 0xD8);
4113 4156 emit_opcode(cbuf, 0xC8 + $src2$$reg);
4114 4157 %}
4115 4158
4116 4159
4117 4160 enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
4118 4161 // Operand was loaded from memory into fp ST (stack top)
4119 4162 // FADD ST,$src /* D8 C0+i */
4120 4163 emit_opcode(cbuf, 0xD8);
4121 4164 emit_opcode(cbuf, 0xC0 + $src1$$reg);
4122 4165
4123 4166 // FMULP src2,ST /* DE C8+i */
4124 4167 emit_opcode(cbuf, 0xDE);
4125 4168 emit_opcode(cbuf, 0xC8 + $src2$$reg);
4126 4169 %}
4127 4170
4128 4171 // Atomically load the volatile long
4129 4172 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4130 4173 emit_opcode(cbuf,0xDF);
4131 4174 int rm_byte_opcode = 0x05;
4132 4175 int base = $mem$$base;
4133 4176 int index = $mem$$index;
4134 4177 int scale = $mem$$scale;
4135 4178 int displace = $mem$$disp;
4136 4179 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4137 4180 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4138 4181 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4139 4182 %}
4140 4183
4141 4184 enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4142 4185 { // Atomic long load
4143 4186 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4144 4187 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4145 4188 emit_opcode(cbuf,0x0F);
4146 4189 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4147 4190 int base = $mem$$base;
4148 4191 int index = $mem$$index;
4149 4192 int scale = $mem$$scale;
4150 4193 int displace = $mem$$disp;
4151 4194 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4152 4195 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4153 4196 }
4154 4197 { // MOVSD $dst,$tmp ! atomic long store
4155 4198 emit_opcode(cbuf,0xF2);
4156 4199 emit_opcode(cbuf,0x0F);
4157 4200 emit_opcode(cbuf,0x11);
4158 4201 int base = $dst$$base;
4159 4202 int index = $dst$$index;
4160 4203 int scale = $dst$$scale;
4161 4204 int displace = $dst$$disp;
4162 4205 bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4163 4206 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4164 4207 }
4165 4208 %}
4166 4209
4167 4210 enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4168 4211 { // Atomic long load
4169 4212 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4170 4213 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4171 4214 emit_opcode(cbuf,0x0F);
4172 4215 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4173 4216 int base = $mem$$base;
4174 4217 int index = $mem$$index;
4175 4218 int scale = $mem$$scale;
4176 4219 int displace = $mem$$disp;
4177 4220 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4178 4221 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4179 4222 }
4180 4223 { // MOVD $dst.lo,$tmp
4181 4224 emit_opcode(cbuf,0x66);
4182 4225 emit_opcode(cbuf,0x0F);
4183 4226 emit_opcode(cbuf,0x7E);
4184 4227 emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4185 4228 }
4186 4229 { // PSRLQ $tmp,32
4187 4230 emit_opcode(cbuf,0x66);
4188 4231 emit_opcode(cbuf,0x0F);
4189 4232 emit_opcode(cbuf,0x73);
4190 4233 emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4191 4234 emit_d8(cbuf, 0x20);
4192 4235 }
4193 4236 { // MOVD $dst.hi,$tmp
4194 4237 emit_opcode(cbuf,0x66);
4195 4238 emit_opcode(cbuf,0x0F);
4196 4239 emit_opcode(cbuf,0x7E);
4197 4240 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4198 4241 }
4199 4242 %}
4200 4243
4201 4244 // Volatile Store Long. Must be atomic, so move it into
4202 4245 // the FP TOS and then do a 64-bit FIST. Has to probe the
4203 4246 // target address before the store (for null-ptr checks)
4204 4247 // so the memory operand is used twice in the encoding.
4205 4248 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4206 4249 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4207 4250 cbuf.set_inst_mark(); // Mark start of FIST in case $mem has an oop
4208 4251 emit_opcode(cbuf,0xDF);
4209 4252 int rm_byte_opcode = 0x07;
4210 4253 int base = $mem$$base;
4211 4254 int index = $mem$$index;
4212 4255 int scale = $mem$$scale;
4213 4256 int displace = $mem$$disp;
4214 4257 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4215 4258 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4216 4259 %}
4217 4260
4218 4261 enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4219 4262 { // Atomic long load
4220 4263 // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4221 4264 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4222 4265 emit_opcode(cbuf,0x0F);
4223 4266 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4224 4267 int base = $src$$base;
4225 4268 int index = $src$$index;
4226 4269 int scale = $src$$scale;
4227 4270 int displace = $src$$disp;
4228 4271 bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4229 4272 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4230 4273 }
4231 4274 cbuf.set_inst_mark(); // Mark start of MOVSD in case $mem has an oop
4232 4275 { // MOVSD $mem,$tmp ! atomic long store
4233 4276 emit_opcode(cbuf,0xF2);
4234 4277 emit_opcode(cbuf,0x0F);
4235 4278 emit_opcode(cbuf,0x11);
4236 4279 int base = $mem$$base;
4237 4280 int index = $mem$$index;
4238 4281 int scale = $mem$$scale;
4239 4282 int displace = $mem$$disp;
4240 4283 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4241 4284 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4242 4285 }
4243 4286 %}
4244 4287
4245 4288 enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4246 4289 { // MOVD $tmp,$src.lo
4247 4290 emit_opcode(cbuf,0x66);
4248 4291 emit_opcode(cbuf,0x0F);
4249 4292 emit_opcode(cbuf,0x6E);
4250 4293 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4251 4294 }
4252 4295 { // MOVD $tmp2,$src.hi
4253 4296 emit_opcode(cbuf,0x66);
4254 4297 emit_opcode(cbuf,0x0F);
4255 4298 emit_opcode(cbuf,0x6E);
4256 4299 emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4257 4300 }
4258 4301 { // PUNPCKLDQ $tmp,$tmp2
4259 4302 emit_opcode(cbuf,0x66);
4260 4303 emit_opcode(cbuf,0x0F);
4261 4304 emit_opcode(cbuf,0x62);
4262 4305 emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4263 4306 }
4264 4307 cbuf.set_inst_mark(); // Mark start of MOVSD in case $mem has an oop
4265 4308 { // MOVSD $mem,$tmp ! atomic long store
4266 4309 emit_opcode(cbuf,0xF2);
4267 4310 emit_opcode(cbuf,0x0F);
4268 4311 emit_opcode(cbuf,0x11);
4269 4312 int base = $mem$$base;
4270 4313 int index = $mem$$index;
4271 4314 int scale = $mem$$scale;
4272 4315 int displace = $mem$$disp;
4273 4316 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4274 4317 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4275 4318 }
4276 4319 %}
4277 4320
4278 4321 // Safepoint Poll. This polls the safepoint page, and causes an
4279 4322 // exception if it is not readable. Unfortunately, it kills the condition code
4280 4323 // in the process
4281 4324 // We current use TESTL [spp],EDI
4282 4325 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4283 4326
4284 4327 enc_class Safepoint_Poll() %{
4285 4328 cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0);
4286 4329 emit_opcode(cbuf,0x85);
4287 4330 emit_rm (cbuf, 0x0, 0x7, 0x5);
4288 4331 emit_d32(cbuf, (intptr_t)os::get_polling_page());
4289 4332 %}
4290 4333 %}
4291 4334
4292 4335
4293 4336 //----------FRAME--------------------------------------------------------------
4294 4337 // Definition of frame structure and management information.
4295 4338 //
4296 4339 // S T A C K L A Y O U T Allocators stack-slot number
4297 4340 // | (to get allocators register number
4298 4341 // G Owned by | | v add OptoReg::stack0())
4299 4342 // r CALLER | |
4300 4343 // o | +--------+ pad to even-align allocators stack-slot
4301 4344 // w V | pad0 | numbers; owned by CALLER
4302 4345 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4303 4346 // h ^ | in | 5
4304 4347 // | | args | 4 Holes in incoming args owned by SELF
4305 4348 // | | | | 3
4306 4349 // | | +--------+
4307 4350 // V | | old out| Empty on Intel, window on Sparc
4308 4351 // | old |preserve| Must be even aligned.
4309 4352 // | SP-+--------+----> Matcher::_old_SP, even aligned
4310 4353 // | | in | 3 area for Intel ret address
4311 4354 // Owned by |preserve| Empty on Sparc.
4312 4355 // SELF +--------+
4313 4356 // | | pad2 | 2 pad to align old SP
4314 4357 // | +--------+ 1
4315 4358 // | | locks | 0
4316 4359 // | +--------+----> OptoReg::stack0(), even aligned
4317 4360 // | | pad1 | 11 pad to align new SP
4318 4361 // | +--------+
4319 4362 // | | | 10
4320 4363 // | | spills | 9 spills
4321 4364 // V | | 8 (pad0 slot for callee)
4322 4365 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4323 4366 // ^ | out | 7
4324 4367 // | | args | 6 Holes in outgoing args owned by CALLEE
4325 4368 // Owned by +--------+
4326 4369 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4327 4370 // | new |preserve| Must be even-aligned.
4328 4371 // | SP-+--------+----> Matcher::_new_SP, even aligned
4329 4372 // | | |
4330 4373 //
4331 4374 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4332 4375 // known from SELF's arguments and the Java calling convention.
4333 4376 // Region 6-7 is determined per call site.
4334 4377 // Note 2: If the calling convention leaves holes in the incoming argument
4335 4378 // area, those holes are owned by SELF. Holes in the outgoing area
4336 4379 // are owned by the CALLEE. Holes should not be nessecary in the
4337 4380 // incoming area, as the Java calling convention is completely under
4338 4381 // the control of the AD file. Doubles can be sorted and packed to
4339 4382 // avoid holes. Holes in the outgoing arguments may be nessecary for
4340 4383 // varargs C calling conventions.
4341 4384 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4342 4385 // even aligned with pad0 as needed.
4343 4386 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4344 4387 // region 6-11 is even aligned; it may be padded out more so that
4345 4388 // the region from SP to FP meets the minimum stack alignment.
4346 4389
4347 4390 frame %{
4348 4391 // What direction does stack grow in (assumed to be same for C & Java)
4349 4392 stack_direction(TOWARDS_LOW);
4350 4393
4351 4394 // These three registers define part of the calling convention
4352 4395 // between compiled code and the interpreter.
4353 4396 inline_cache_reg(EAX); // Inline Cache Register
4354 4397 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter
4355 4398
4356 4399 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
4357 4400 cisc_spilling_operand_name(indOffset32);
4358 4401
4359 4402 // Number of stack slots consumed by locking an object
4360 4403 sync_stack_slots(1);
4361 4404
4362 4405 // Compiled code's Frame Pointer
4363 4406 frame_pointer(ESP);
4364 4407 // Interpreter stores its frame pointer in a register which is
4365 4408 // stored to the stack by I2CAdaptors.
4366 4409 // I2CAdaptors convert from interpreted java to compiled java.
4367 4410 interpreter_frame_pointer(EBP);
4368 4411
4369 4412 // Stack alignment requirement
4370 4413 // Alignment size in bytes (128-bit -> 16 bytes)
4371 4414 stack_alignment(StackAlignmentInBytes);
4372 4415
4373 4416 // Number of stack slots between incoming argument block and the start of
4374 4417 // a new frame. The PROLOG must add this many slots to the stack. The
4375 4418 // EPILOG must remove this many slots. Intel needs one slot for
4376 4419 // return address and one for rbp, (must save rbp)
4377 4420 in_preserve_stack_slots(2+VerifyStackAtCalls);
4378 4421
4379 4422 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4380 4423 // for calls to C. Supports the var-args backing area for register parms.
4381 4424 varargs_C_out_slots_killed(0);
4382 4425
4383 4426 // The after-PROLOG location of the return address. Location of
4384 4427 // return address specifies a type (REG or STACK) and a number
4385 4428 // representing the register number (i.e. - use a register name) or
4386 4429 // stack slot.
4387 4430 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4388 4431 // Otherwise, it is above the locks and verification slot and alignment word
4389 4432 return_addr(STACK - 1 +
4390 4433 round_to(1+VerifyStackAtCalls+
4391 4434 Compile::current()->fixed_slots(),
4392 4435 (StackAlignmentInBytes/wordSize)));
4393 4436
4394 4437 // Body of function which returns an integer array locating
4395 4438 // arguments either in registers or in stack slots. Passed an array
4396 4439 // of ideal registers called "sig" and a "length" count. Stack-slot
4397 4440 // offsets are based on outgoing arguments, i.e. a CALLER setting up
4398 4441 // arguments for a CALLEE. Incoming stack arguments are
4399 4442 // automatically biased by the preserve_stack_slots field above.
4400 4443 calling_convention %{
4401 4444 // No difference between ingoing/outgoing just pass false
4402 4445 SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4403 4446 %}
4404 4447
4405 4448
4406 4449 // Body of function which returns an integer array locating
4407 4450 // arguments either in registers or in stack slots. Passed an array
4408 4451 // of ideal registers called "sig" and a "length" count. Stack-slot
4409 4452 // offsets are based on outgoing arguments, i.e. a CALLER setting up
4410 4453 // arguments for a CALLEE. Incoming stack arguments are
4411 4454 // automatically biased by the preserve_stack_slots field above.
4412 4455 c_calling_convention %{
4413 4456 // This is obviously always outgoing
4414 4457 (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4415 4458 %}
4416 4459
4417 4460 // Location of C & interpreter return values
4418 4461 c_return_value %{
4419 4462 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4420 4463 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
4421 4464 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4422 4465
4423 4466 // in SSE2+ mode we want to keep the FPU stack clean so pretend
4424 4467 // that C functions return float and double results in XMM0.
4425 4468 if( ideal_reg == Op_RegD && UseSSE>=2 )
4426 4469 return OptoRegPair(XMM0b_num,XMM0a_num);
4427 4470 if( ideal_reg == Op_RegF && UseSSE>=2 )
4428 4471 return OptoRegPair(OptoReg::Bad,XMM0a_num);
4429 4472
4430 4473 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4431 4474 %}
4432 4475
4433 4476 // Location of return values
4434 4477 return_value %{
4435 4478 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4436 4479 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
4437 4480 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4438 4481 if( ideal_reg == Op_RegD && UseSSE>=2 )
4439 4482 return OptoRegPair(XMM0b_num,XMM0a_num);
4440 4483 if( ideal_reg == Op_RegF && UseSSE>=1 )
4441 4484 return OptoRegPair(OptoReg::Bad,XMM0a_num);
4442 4485 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4443 4486 %}
4444 4487
4445 4488 %}
4446 4489
4447 4490 //----------ATTRIBUTES---------------------------------------------------------
4448 4491 //----------Operand Attributes-------------------------------------------------
4449 4492 op_attrib op_cost(0); // Required cost attribute
4450 4493
4451 4494 //----------Instruction Attributes---------------------------------------------
4452 4495 ins_attrib ins_cost(100); // Required cost attribute
4453 4496 ins_attrib ins_size(8); // Required size attribute (in bits)
4454 4497 ins_attrib ins_pc_relative(0); // Required PC Relative flag
4455 4498 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
4456 4499 // non-matching short branch variant of some
4457 4500 // long branch?
4458 4501 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2)
4459 4502 // specifies the alignment that some part of the instruction (not
4460 4503 // necessarily the start) requires. If > 1, a compute_padding()
4461 4504 // function must be provided for the instruction
4462 4505
4463 4506 //----------OPERANDS-----------------------------------------------------------
4464 4507 // Operand definitions must precede instruction definitions for correct parsing
4465 4508 // in the ADLC because operands constitute user defined types which are used in
4466 4509 // instruction definitions.
4467 4510
4468 4511 //----------Simple Operands----------------------------------------------------
4469 4512 // Immediate Operands
4470 4513 // Integer Immediate
4471 4514 operand immI() %{
4472 4515 match(ConI);
4473 4516
4474 4517 op_cost(10);
4475 4518 format %{ %}
4476 4519 interface(CONST_INTER);
4477 4520 %}
4478 4521
4479 4522 // Constant for test vs zero
4480 4523 operand immI0() %{
4481 4524 predicate(n->get_int() == 0);
4482 4525 match(ConI);
4483 4526
4484 4527 op_cost(0);
4485 4528 format %{ %}
4486 4529 interface(CONST_INTER);
4487 4530 %}
4488 4531
4489 4532 // Constant for increment
4490 4533 operand immI1() %{
4491 4534 predicate(n->get_int() == 1);
4492 4535 match(ConI);
4493 4536
4494 4537 op_cost(0);
4495 4538 format %{ %}
4496 4539 interface(CONST_INTER);
4497 4540 %}
4498 4541
4499 4542 // Constant for decrement
4500 4543 operand immI_M1() %{
4501 4544 predicate(n->get_int() == -1);
4502 4545 match(ConI);
4503 4546
4504 4547 op_cost(0);
4505 4548 format %{ %}
4506 4549 interface(CONST_INTER);
4507 4550 %}
4508 4551
4509 4552 // Valid scale values for addressing modes
4510 4553 operand immI2() %{
4511 4554 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4512 4555 match(ConI);
4513 4556
4514 4557 format %{ %}
4515 4558 interface(CONST_INTER);
4516 4559 %}
4517 4560
4518 4561 operand immI8() %{
4519 4562 predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
4520 4563 match(ConI);
4521 4564
4522 4565 op_cost(5);
4523 4566 format %{ %}
4524 4567 interface(CONST_INTER);
4525 4568 %}
4526 4569
4527 4570 operand immI16() %{
4528 4571 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4529 4572 match(ConI);
4530 4573
4531 4574 op_cost(10);
4532 4575 format %{ %}
4533 4576 interface(CONST_INTER);
4534 4577 %}
4535 4578
4536 4579 // Constant for long shifts
4537 4580 operand immI_32() %{
4538 4581 predicate( n->get_int() == 32 );
4539 4582 match(ConI);
4540 4583
4541 4584 op_cost(0);
4542 4585 format %{ %}
4543 4586 interface(CONST_INTER);
4544 4587 %}
4545 4588
4546 4589 operand immI_1_31() %{
4547 4590 predicate( n->get_int() >= 1 && n->get_int() <= 31 );
4548 4591 match(ConI);
4549 4592
4550 4593 op_cost(0);
4551 4594 format %{ %}
4552 4595 interface(CONST_INTER);
4553 4596 %}
4554 4597
4555 4598 operand immI_32_63() %{
4556 4599 predicate( n->get_int() >= 32 && n->get_int() <= 63 );
4557 4600 match(ConI);
4558 4601 op_cost(0);
4559 4602
4560 4603 format %{ %}
4561 4604 interface(CONST_INTER);
4562 4605 %}
4563 4606
4564 4607 operand immI_1() %{
4565 4608 predicate( n->get_int() == 1 );
4566 4609 match(ConI);
4567 4610
4568 4611 op_cost(0);
4569 4612 format %{ %}
4570 4613 interface(CONST_INTER);
4571 4614 %}
4572 4615
4573 4616 operand immI_2() %{
4574 4617 predicate( n->get_int() == 2 );
4575 4618 match(ConI);
4576 4619
4577 4620 op_cost(0);
4578 4621 format %{ %}
4579 4622 interface(CONST_INTER);
4580 4623 %}
4581 4624
4582 4625 operand immI_3() %{
4583 4626 predicate( n->get_int() == 3 );
4584 4627 match(ConI);
4585 4628
4586 4629 op_cost(0);
4587 4630 format %{ %}
4588 4631 interface(CONST_INTER);
4589 4632 %}
4590 4633
4591 4634 // Pointer Immediate
4592 4635 operand immP() %{
4593 4636 match(ConP);
4594 4637
4595 4638 op_cost(10);
4596 4639 format %{ %}
4597 4640 interface(CONST_INTER);
4598 4641 %}
4599 4642
4600 4643 // NULL Pointer Immediate
4601 4644 operand immP0() %{
4602 4645 predicate( n->get_ptr() == 0 );
4603 4646 match(ConP);
4604 4647 op_cost(0);
4605 4648
4606 4649 format %{ %}
4607 4650 interface(CONST_INTER);
4608 4651 %}
4609 4652
4610 4653 // Long Immediate
4611 4654 operand immL() %{
4612 4655 match(ConL);
4613 4656
4614 4657 op_cost(20);
4615 4658 format %{ %}
4616 4659 interface(CONST_INTER);
4617 4660 %}
4618 4661
4619 4662 // Long Immediate zero
4620 4663 operand immL0() %{
4621 4664 predicate( n->get_long() == 0L );
4622 4665 match(ConL);
4623 4666 op_cost(0);
4624 4667
4625 4668 format %{ %}
4626 4669 interface(CONST_INTER);
4627 4670 %}
4628 4671
4629 4672 // Long Immediate zero
4630 4673 operand immL_M1() %{
4631 4674 predicate( n->get_long() == -1L );
4632 4675 match(ConL);
4633 4676 op_cost(0);
4634 4677
4635 4678 format %{ %}
4636 4679 interface(CONST_INTER);
4637 4680 %}
4638 4681
4639 4682 // Long immediate from 0 to 127.
4640 4683 // Used for a shorter form of long mul by 10.
4641 4684 operand immL_127() %{
4642 4685 predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4643 4686 match(ConL);
4644 4687 op_cost(0);
4645 4688
4646 4689 format %{ %}
4647 4690 interface(CONST_INTER);
4648 4691 %}
4649 4692
4650 4693 // Long Immediate: low 32-bit mask
4651 4694 operand immL_32bits() %{
4652 4695 predicate(n->get_long() == 0xFFFFFFFFL);
4653 4696 match(ConL);
4654 4697 op_cost(0);
4655 4698
4656 4699 format %{ %}
4657 4700 interface(CONST_INTER);
4658 4701 %}
4659 4702
4660 4703 // Long Immediate: low 32-bit mask
4661 4704 operand immL32() %{
4662 4705 predicate(n->get_long() == (int)(n->get_long()));
4663 4706 match(ConL);
4664 4707 op_cost(20);
4665 4708
4666 4709 format %{ %}
4667 4710 interface(CONST_INTER);
4668 4711 %}
4669 4712
4670 4713 //Double Immediate zero
4671 4714 operand immD0() %{
4672 4715 // Do additional (and counter-intuitive) test against NaN to work around VC++
4673 4716 // bug that generates code such that NaNs compare equal to 0.0
4674 4717 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4675 4718 match(ConD);
4676 4719
4677 4720 op_cost(5);
4678 4721 format %{ %}
4679 4722 interface(CONST_INTER);
4680 4723 %}
4681 4724
4682 4725 // Double Immediate
4683 4726 operand immD1() %{
4684 4727 predicate( UseSSE<=1 && n->getd() == 1.0 );
4685 4728 match(ConD);
4686 4729
4687 4730 op_cost(5);
4688 4731 format %{ %}
4689 4732 interface(CONST_INTER);
4690 4733 %}
4691 4734
4692 4735 // Double Immediate
4693 4736 operand immD() %{
4694 4737 predicate(UseSSE<=1);
4695 4738 match(ConD);
4696 4739
4697 4740 op_cost(5);
4698 4741 format %{ %}
4699 4742 interface(CONST_INTER);
4700 4743 %}
4701 4744
4702 4745 operand immXD() %{
4703 4746 predicate(UseSSE>=2);
4704 4747 match(ConD);
4705 4748
4706 4749 op_cost(5);
4707 4750 format %{ %}
4708 4751 interface(CONST_INTER);
4709 4752 %}
4710 4753
4711 4754 // Double Immediate zero
4712 4755 operand immXD0() %{
4713 4756 // Do additional (and counter-intuitive) test against NaN to work around VC++
4714 4757 // bug that generates code such that NaNs compare equal to 0.0 AND do not
4715 4758 // compare equal to -0.0.
4716 4759 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4717 4760 match(ConD);
4718 4761
4719 4762 format %{ %}
4720 4763 interface(CONST_INTER);
4721 4764 %}
4722 4765
4723 4766 // Float Immediate zero
4724 4767 operand immF0() %{
4725 4768 predicate( UseSSE == 0 && n->getf() == 0.0 );
4726 4769 match(ConF);
4727 4770
4728 4771 op_cost(5);
4729 4772 format %{ %}
4730 4773 interface(CONST_INTER);
4731 4774 %}
4732 4775
4733 4776 // Float Immediate
4734 4777 operand immF() %{
4735 4778 predicate( UseSSE == 0 );
4736 4779 match(ConF);
4737 4780
4738 4781 op_cost(5);
4739 4782 format %{ %}
4740 4783 interface(CONST_INTER);
4741 4784 %}
4742 4785
4743 4786 // Float Immediate
4744 4787 operand immXF() %{
4745 4788 predicate(UseSSE >= 1);
4746 4789 match(ConF);
4747 4790
4748 4791 op_cost(5);
4749 4792 format %{ %}
4750 4793 interface(CONST_INTER);
4751 4794 %}
4752 4795
4753 4796 // Float Immediate zero. Zero and not -0.0
4754 4797 operand immXF0() %{
4755 4798 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4756 4799 match(ConF);
4757 4800
4758 4801 op_cost(5);
4759 4802 format %{ %}
4760 4803 interface(CONST_INTER);
4761 4804 %}
4762 4805
4763 4806 // Immediates for special shifts (sign extend)
4764 4807
4765 4808 // Constants for increment
4766 4809 operand immI_16() %{
4767 4810 predicate( n->get_int() == 16 );
4768 4811 match(ConI);
4769 4812
4770 4813 format %{ %}
4771 4814 interface(CONST_INTER);
4772 4815 %}
4773 4816
4774 4817 operand immI_24() %{
4775 4818 predicate( n->get_int() == 24 );
4776 4819 match(ConI);
4777 4820
4778 4821 format %{ %}
4779 4822 interface(CONST_INTER);
4780 4823 %}
4781 4824
4782 4825 // Constant for byte-wide masking
4783 4826 operand immI_255() %{
4784 4827 predicate( n->get_int() == 255 );
4785 4828 match(ConI);
4786 4829
4787 4830 format %{ %}
4788 4831 interface(CONST_INTER);
4789 4832 %}
4790 4833
4791 4834 // Constant for short-wide masking
4792 4835 operand immI_65535() %{
4793 4836 predicate(n->get_int() == 65535);
4794 4837 match(ConI);
4795 4838
4796 4839 format %{ %}
4797 4840 interface(CONST_INTER);
4798 4841 %}
4799 4842
4800 4843 // Register Operands
4801 4844 // Integer Register
4802 4845 operand eRegI() %{
4803 4846 constraint(ALLOC_IN_RC(e_reg));
4804 4847 match(RegI);
4805 4848 match(xRegI);
4806 4849 match(eAXRegI);
4807 4850 match(eBXRegI);
4808 4851 match(eCXRegI);
4809 4852 match(eDXRegI);
4810 4853 match(eDIRegI);
4811 4854 match(eSIRegI);
4812 4855
4813 4856 format %{ %}
4814 4857 interface(REG_INTER);
4815 4858 %}
4816 4859
4817 4860 // Subset of Integer Register
4818 4861 operand xRegI(eRegI reg) %{
4819 4862 constraint(ALLOC_IN_RC(x_reg));
4820 4863 match(reg);
4821 4864 match(eAXRegI);
4822 4865 match(eBXRegI);
4823 4866 match(eCXRegI);
4824 4867 match(eDXRegI);
4825 4868
4826 4869 format %{ %}
4827 4870 interface(REG_INTER);
4828 4871 %}
4829 4872
4830 4873 // Special Registers
4831 4874 operand eAXRegI(xRegI reg) %{
4832 4875 constraint(ALLOC_IN_RC(eax_reg));
4833 4876 match(reg);
4834 4877 match(eRegI);
4835 4878
4836 4879 format %{ "EAX" %}
4837 4880 interface(REG_INTER);
4838 4881 %}
4839 4882
4840 4883 // Special Registers
4841 4884 operand eBXRegI(xRegI reg) %{
4842 4885 constraint(ALLOC_IN_RC(ebx_reg));
4843 4886 match(reg);
4844 4887 match(eRegI);
4845 4888
4846 4889 format %{ "EBX" %}
4847 4890 interface(REG_INTER);
4848 4891 %}
4849 4892
4850 4893 operand eCXRegI(xRegI reg) %{
4851 4894 constraint(ALLOC_IN_RC(ecx_reg));
4852 4895 match(reg);
4853 4896 match(eRegI);
4854 4897
4855 4898 format %{ "ECX" %}
4856 4899 interface(REG_INTER);
4857 4900 %}
4858 4901
4859 4902 operand eDXRegI(xRegI reg) %{
4860 4903 constraint(ALLOC_IN_RC(edx_reg));
4861 4904 match(reg);
4862 4905 match(eRegI);
4863 4906
4864 4907 format %{ "EDX" %}
4865 4908 interface(REG_INTER);
4866 4909 %}
4867 4910
4868 4911 operand eDIRegI(xRegI reg) %{
4869 4912 constraint(ALLOC_IN_RC(edi_reg));
4870 4913 match(reg);
4871 4914 match(eRegI);
4872 4915
4873 4916 format %{ "EDI" %}
4874 4917 interface(REG_INTER);
4875 4918 %}
4876 4919
4877 4920 operand naxRegI() %{
4878 4921 constraint(ALLOC_IN_RC(nax_reg));
4879 4922 match(RegI);
4880 4923 match(eCXRegI);
4881 4924 match(eDXRegI);
4882 4925 match(eSIRegI);
4883 4926 match(eDIRegI);
4884 4927
4885 4928 format %{ %}
4886 4929 interface(REG_INTER);
4887 4930 %}
4888 4931
4889 4932 operand nadxRegI() %{
4890 4933 constraint(ALLOC_IN_RC(nadx_reg));
4891 4934 match(RegI);
4892 4935 match(eBXRegI);
4893 4936 match(eCXRegI);
4894 4937 match(eSIRegI);
4895 4938 match(eDIRegI);
4896 4939
4897 4940 format %{ %}
4898 4941 interface(REG_INTER);
4899 4942 %}
4900 4943
4901 4944 operand ncxRegI() %{
4902 4945 constraint(ALLOC_IN_RC(ncx_reg));
4903 4946 match(RegI);
4904 4947 match(eAXRegI);
4905 4948 match(eDXRegI);
4906 4949 match(eSIRegI);
4907 4950 match(eDIRegI);
4908 4951
4909 4952 format %{ %}
4910 4953 interface(REG_INTER);
4911 4954 %}
4912 4955
4913 4956 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
4914 4957 // //
4915 4958 operand eSIRegI(xRegI reg) %{
4916 4959 constraint(ALLOC_IN_RC(esi_reg));
4917 4960 match(reg);
4918 4961 match(eRegI);
4919 4962
4920 4963 format %{ "ESI" %}
4921 4964 interface(REG_INTER);
4922 4965 %}
4923 4966
4924 4967 // Pointer Register
4925 4968 operand anyRegP() %{
4926 4969 constraint(ALLOC_IN_RC(any_reg));
4927 4970 match(RegP);
4928 4971 match(eAXRegP);
4929 4972 match(eBXRegP);
4930 4973 match(eCXRegP);
4931 4974 match(eDIRegP);
4932 4975 match(eRegP);
4933 4976
4934 4977 format %{ %}
4935 4978 interface(REG_INTER);
4936 4979 %}
4937 4980
4938 4981 operand eRegP() %{
4939 4982 constraint(ALLOC_IN_RC(e_reg));
4940 4983 match(RegP);
4941 4984 match(eAXRegP);
4942 4985 match(eBXRegP);
4943 4986 match(eCXRegP);
4944 4987 match(eDIRegP);
4945 4988
4946 4989 format %{ %}
4947 4990 interface(REG_INTER);
4948 4991 %}
4949 4992
4950 4993 // On windows95, EBP is not safe to use for implicit null tests.
4951 4994 operand eRegP_no_EBP() %{
4952 4995 constraint(ALLOC_IN_RC(e_reg_no_rbp));
4953 4996 match(RegP);
4954 4997 match(eAXRegP);
4955 4998 match(eBXRegP);
4956 4999 match(eCXRegP);
4957 5000 match(eDIRegP);
4958 5001
4959 5002 op_cost(100);
4960 5003 format %{ %}
4961 5004 interface(REG_INTER);
4962 5005 %}
4963 5006
4964 5007 operand naxRegP() %{
4965 5008 constraint(ALLOC_IN_RC(nax_reg));
4966 5009 match(RegP);
4967 5010 match(eBXRegP);
4968 5011 match(eDXRegP);
4969 5012 match(eCXRegP);
4970 5013 match(eSIRegP);
4971 5014 match(eDIRegP);
4972 5015
4973 5016 format %{ %}
4974 5017 interface(REG_INTER);
4975 5018 %}
4976 5019
4977 5020 operand nabxRegP() %{
4978 5021 constraint(ALLOC_IN_RC(nabx_reg));
4979 5022 match(RegP);
4980 5023 match(eCXRegP);
4981 5024 match(eDXRegP);
4982 5025 match(eSIRegP);
4983 5026 match(eDIRegP);
4984 5027
4985 5028 format %{ %}
4986 5029 interface(REG_INTER);
4987 5030 %}
4988 5031
4989 5032 operand pRegP() %{
4990 5033 constraint(ALLOC_IN_RC(p_reg));
4991 5034 match(RegP);
4992 5035 match(eBXRegP);
4993 5036 match(eDXRegP);
4994 5037 match(eSIRegP);
4995 5038 match(eDIRegP);
4996 5039
4997 5040 format %{ %}
4998 5041 interface(REG_INTER);
4999 5042 %}
5000 5043
5001 5044 // Special Registers
5002 5045 // Return a pointer value
5003 5046 operand eAXRegP(eRegP reg) %{
5004 5047 constraint(ALLOC_IN_RC(eax_reg));
5005 5048 match(reg);
5006 5049 format %{ "EAX" %}
5007 5050 interface(REG_INTER);
5008 5051 %}
5009 5052
5010 5053 // Used in AtomicAdd
5011 5054 operand eBXRegP(eRegP reg) %{
5012 5055 constraint(ALLOC_IN_RC(ebx_reg));
5013 5056 match(reg);
5014 5057 format %{ "EBX" %}
5015 5058 interface(REG_INTER);
5016 5059 %}
5017 5060
5018 5061 // Tail-call (interprocedural jump) to interpreter
5019 5062 operand eCXRegP(eRegP reg) %{
5020 5063 constraint(ALLOC_IN_RC(ecx_reg));
5021 5064 match(reg);
5022 5065 format %{ "ECX" %}
5023 5066 interface(REG_INTER);
5024 5067 %}
5025 5068
5026 5069 operand eSIRegP(eRegP reg) %{
5027 5070 constraint(ALLOC_IN_RC(esi_reg));
5028 5071 match(reg);
5029 5072 format %{ "ESI" %}
5030 5073 interface(REG_INTER);
5031 5074 %}
5032 5075
5033 5076 // Used in rep stosw
5034 5077 operand eDIRegP(eRegP reg) %{
5035 5078 constraint(ALLOC_IN_RC(edi_reg));
5036 5079 match(reg);
5037 5080 format %{ "EDI" %}
5038 5081 interface(REG_INTER);
5039 5082 %}
5040 5083
5041 5084 operand eBPRegP() %{
5042 5085 constraint(ALLOC_IN_RC(ebp_reg));
5043 5086 match(RegP);
5044 5087 format %{ "EBP" %}
5045 5088 interface(REG_INTER);
5046 5089 %}
5047 5090
5048 5091 operand eRegL() %{
5049 5092 constraint(ALLOC_IN_RC(long_reg));
5050 5093 match(RegL);
5051 5094 match(eADXRegL);
5052 5095
5053 5096 format %{ %}
5054 5097 interface(REG_INTER);
5055 5098 %}
5056 5099
5057 5100 operand eADXRegL( eRegL reg ) %{
5058 5101 constraint(ALLOC_IN_RC(eadx_reg));
5059 5102 match(reg);
5060 5103
5061 5104 format %{ "EDX:EAX" %}
5062 5105 interface(REG_INTER);
5063 5106 %}
5064 5107
5065 5108 operand eBCXRegL( eRegL reg ) %{
5066 5109 constraint(ALLOC_IN_RC(ebcx_reg));
5067 5110 match(reg);
5068 5111
5069 5112 format %{ "EBX:ECX" %}
5070 5113 interface(REG_INTER);
5071 5114 %}
5072 5115
5073 5116 // Special case for integer high multiply
5074 5117 operand eADXRegL_low_only() %{
5075 5118 constraint(ALLOC_IN_RC(eadx_reg));
5076 5119 match(RegL);
5077 5120
5078 5121 format %{ "EAX" %}
5079 5122 interface(REG_INTER);
5080 5123 %}
5081 5124
5082 5125 // Flags register, used as output of compare instructions
5083 5126 operand eFlagsReg() %{
5084 5127 constraint(ALLOC_IN_RC(int_flags));
5085 5128 match(RegFlags);
5086 5129
5087 5130 format %{ "EFLAGS" %}
5088 5131 interface(REG_INTER);
5089 5132 %}
5090 5133
5091 5134 // Flags register, used as output of FLOATING POINT compare instructions
5092 5135 operand eFlagsRegU() %{
5093 5136 constraint(ALLOC_IN_RC(int_flags));
5094 5137 match(RegFlags);
5095 5138
5096 5139 format %{ "EFLAGS_U" %}
5097 5140 interface(REG_INTER);
5098 5141 %}
5099 5142
5100 5143 operand eFlagsRegUCF() %{
5101 5144 constraint(ALLOC_IN_RC(int_flags));
5102 5145 match(RegFlags);
5103 5146 predicate(false);
5104 5147
5105 5148 format %{ "EFLAGS_U_CF" %}
5106 5149 interface(REG_INTER);
5107 5150 %}
5108 5151
5109 5152 // Condition Code Register used by long compare
5110 5153 operand flagsReg_long_LTGE() %{
5111 5154 constraint(ALLOC_IN_RC(int_flags));
5112 5155 match(RegFlags);
5113 5156 format %{ "FLAGS_LTGE" %}
5114 5157 interface(REG_INTER);
5115 5158 %}
5116 5159 operand flagsReg_long_EQNE() %{
5117 5160 constraint(ALLOC_IN_RC(int_flags));
5118 5161 match(RegFlags);
5119 5162 format %{ "FLAGS_EQNE" %}
5120 5163 interface(REG_INTER);
5121 5164 %}
5122 5165 operand flagsReg_long_LEGT() %{
5123 5166 constraint(ALLOC_IN_RC(int_flags));
5124 5167 match(RegFlags);
5125 5168 format %{ "FLAGS_LEGT" %}
5126 5169 interface(REG_INTER);
5127 5170 %}
5128 5171
5129 5172 // Float register operands
5130 5173 operand regD() %{
5131 5174 predicate( UseSSE < 2 );
5132 5175 constraint(ALLOC_IN_RC(dbl_reg));
5133 5176 match(RegD);
5134 5177 match(regDPR1);
5135 5178 match(regDPR2);
5136 5179 format %{ %}
5137 5180 interface(REG_INTER);
5138 5181 %}
5139 5182
5140 5183 operand regDPR1(regD reg) %{
5141 5184 predicate( UseSSE < 2 );
5142 5185 constraint(ALLOC_IN_RC(dbl_reg0));
5143 5186 match(reg);
5144 5187 format %{ "FPR1" %}
5145 5188 interface(REG_INTER);
5146 5189 %}
5147 5190
5148 5191 operand regDPR2(regD reg) %{
5149 5192 predicate( UseSSE < 2 );
5150 5193 constraint(ALLOC_IN_RC(dbl_reg1));
5151 5194 match(reg);
5152 5195 format %{ "FPR2" %}
5153 5196 interface(REG_INTER);
5154 5197 %}
5155 5198
5156 5199 operand regnotDPR1(regD reg) %{
5157 5200 predicate( UseSSE < 2 );
5158 5201 constraint(ALLOC_IN_RC(dbl_notreg0));
5159 5202 match(reg);
5160 5203 format %{ %}
5161 5204 interface(REG_INTER);
5162 5205 %}
5163 5206
5164 5207 // XMM Double register operands
5165 5208 operand regXD() %{
5166 5209 predicate( UseSSE>=2 );
5167 5210 constraint(ALLOC_IN_RC(xdb_reg));
5168 5211 match(RegD);
5169 5212 match(regXD6);
5170 5213 match(regXD7);
5171 5214 format %{ %}
5172 5215 interface(REG_INTER);
5173 5216 %}
5174 5217
5175 5218 // XMM6 double register operands
5176 5219 operand regXD6(regXD reg) %{
5177 5220 predicate( UseSSE>=2 );
5178 5221 constraint(ALLOC_IN_RC(xdb_reg6));
5179 5222 match(reg);
5180 5223 format %{ "XMM6" %}
5181 5224 interface(REG_INTER);
5182 5225 %}
5183 5226
5184 5227 // XMM7 double register operands
5185 5228 operand regXD7(regXD reg) %{
5186 5229 predicate( UseSSE>=2 );
5187 5230 constraint(ALLOC_IN_RC(xdb_reg7));
5188 5231 match(reg);
5189 5232 format %{ "XMM7" %}
5190 5233 interface(REG_INTER);
5191 5234 %}
5192 5235
5193 5236 // Float register operands
5194 5237 operand regF() %{
5195 5238 predicate( UseSSE < 2 );
5196 5239 constraint(ALLOC_IN_RC(flt_reg));
5197 5240 match(RegF);
5198 5241 match(regFPR1);
5199 5242 format %{ %}
5200 5243 interface(REG_INTER);
5201 5244 %}
5202 5245
5203 5246 // Float register operands
5204 5247 operand regFPR1(regF reg) %{
5205 5248 predicate( UseSSE < 2 );
5206 5249 constraint(ALLOC_IN_RC(flt_reg0));
5207 5250 match(reg);
5208 5251 format %{ "FPR1" %}
5209 5252 interface(REG_INTER);
5210 5253 %}
5211 5254
5212 5255 // XMM register operands
5213 5256 operand regX() %{
5214 5257 predicate( UseSSE>=1 );
5215 5258 constraint(ALLOC_IN_RC(xmm_reg));
5216 5259 match(RegF);
5217 5260 format %{ %}
5218 5261 interface(REG_INTER);
5219 5262 %}
5220 5263
5221 5264
5222 5265 //----------Memory Operands----------------------------------------------------
5223 5266 // Direct Memory Operand
5224 5267 operand direct(immP addr) %{
5225 5268 match(addr);
5226 5269
5227 5270 format %{ "[$addr]" %}
5228 5271 interface(MEMORY_INTER) %{
5229 5272 base(0xFFFFFFFF);
5230 5273 index(0x4);
5231 5274 scale(0x0);
5232 5275 disp($addr);
5233 5276 %}
5234 5277 %}
5235 5278
5236 5279 // Indirect Memory Operand
5237 5280 operand indirect(eRegP reg) %{
5238 5281 constraint(ALLOC_IN_RC(e_reg));
5239 5282 match(reg);
5240 5283
5241 5284 format %{ "[$reg]" %}
5242 5285 interface(MEMORY_INTER) %{
5243 5286 base($reg);
5244 5287 index(0x4);
5245 5288 scale(0x0);
5246 5289 disp(0x0);
5247 5290 %}
5248 5291 %}
5249 5292
5250 5293 // Indirect Memory Plus Short Offset Operand
5251 5294 operand indOffset8(eRegP reg, immI8 off) %{
5252 5295 match(AddP reg off);
5253 5296
5254 5297 format %{ "[$reg + $off]" %}
5255 5298 interface(MEMORY_INTER) %{
5256 5299 base($reg);
5257 5300 index(0x4);
5258 5301 scale(0x0);
5259 5302 disp($off);
5260 5303 %}
5261 5304 %}
5262 5305
5263 5306 // Indirect Memory Plus Long Offset Operand
5264 5307 operand indOffset32(eRegP reg, immI off) %{
5265 5308 match(AddP reg off);
5266 5309
5267 5310 format %{ "[$reg + $off]" %}
5268 5311 interface(MEMORY_INTER) %{
5269 5312 base($reg);
5270 5313 index(0x4);
5271 5314 scale(0x0);
5272 5315 disp($off);
5273 5316 %}
5274 5317 %}
5275 5318
5276 5319 // Indirect Memory Plus Long Offset Operand
5277 5320 operand indOffset32X(eRegI reg, immP off) %{
5278 5321 match(AddP off reg);
5279 5322
5280 5323 format %{ "[$reg + $off]" %}
5281 5324 interface(MEMORY_INTER) %{
5282 5325 base($reg);
5283 5326 index(0x4);
5284 5327 scale(0x0);
5285 5328 disp($off);
5286 5329 %}
5287 5330 %}
5288 5331
5289 5332 // Indirect Memory Plus Index Register Plus Offset Operand
5290 5333 operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
5291 5334 match(AddP (AddP reg ireg) off);
5292 5335
5293 5336 op_cost(10);
5294 5337 format %{"[$reg + $off + $ireg]" %}
5295 5338 interface(MEMORY_INTER) %{
5296 5339 base($reg);
5297 5340 index($ireg);
5298 5341 scale(0x0);
5299 5342 disp($off);
5300 5343 %}
5301 5344 %}
5302 5345
5303 5346 // Indirect Memory Plus Index Register Plus Offset Operand
5304 5347 operand indIndex(eRegP reg, eRegI ireg) %{
5305 5348 match(AddP reg ireg);
5306 5349
5307 5350 op_cost(10);
5308 5351 format %{"[$reg + $ireg]" %}
5309 5352 interface(MEMORY_INTER) %{
5310 5353 base($reg);
5311 5354 index($ireg);
5312 5355 scale(0x0);
5313 5356 disp(0x0);
5314 5357 %}
5315 5358 %}
5316 5359
5317 5360 // // -------------------------------------------------------------------------
5318 5361 // // 486 architecture doesn't support "scale * index + offset" with out a base
5319 5362 // // -------------------------------------------------------------------------
5320 5363 // // Scaled Memory Operands
5321 5364 // // Indirect Memory Times Scale Plus Offset Operand
5322 5365 // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
5323 5366 // match(AddP off (LShiftI ireg scale));
5324 5367 //
5325 5368 // op_cost(10);
5326 5369 // format %{"[$off + $ireg << $scale]" %}
5327 5370 // interface(MEMORY_INTER) %{
5328 5371 // base(0x4);
5329 5372 // index($ireg);
5330 5373 // scale($scale);
5331 5374 // disp($off);
5332 5375 // %}
5333 5376 // %}
5334 5377
5335 5378 // Indirect Memory Times Scale Plus Index Register
5336 5379 operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
5337 5380 match(AddP reg (LShiftI ireg scale));
5338 5381
5339 5382 op_cost(10);
5340 5383 format %{"[$reg + $ireg << $scale]" %}
5341 5384 interface(MEMORY_INTER) %{
5342 5385 base($reg);
5343 5386 index($ireg);
5344 5387 scale($scale);
5345 5388 disp(0x0);
5346 5389 %}
5347 5390 %}
5348 5391
5349 5392 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5350 5393 operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
5351 5394 match(AddP (AddP reg (LShiftI ireg scale)) off);
5352 5395
5353 5396 op_cost(10);
5354 5397 format %{"[$reg + $off + $ireg << $scale]" %}
5355 5398 interface(MEMORY_INTER) %{
5356 5399 base($reg);
5357 5400 index($ireg);
5358 5401 scale($scale);
5359 5402 disp($off);
5360 5403 %}
5361 5404 %}
5362 5405
5363 5406 //----------Load Long Memory Operands------------------------------------------
5364 5407 // The load-long idiom will use it's address expression again after loading
5365 5408 // the first word of the long. If the load-long destination overlaps with
5366 5409 // registers used in the addressing expression, the 2nd half will be loaded
5367 5410 // from a clobbered address. Fix this by requiring that load-long use
5368 5411 // address registers that do not overlap with the load-long target.
5369 5412
5370 5413 // load-long support
5371 5414 operand load_long_RegP() %{
5372 5415 constraint(ALLOC_IN_RC(esi_reg));
5373 5416 match(RegP);
5374 5417 match(eSIRegP);
5375 5418 op_cost(100);
5376 5419 format %{ %}
5377 5420 interface(REG_INTER);
5378 5421 %}
5379 5422
5380 5423 // Indirect Memory Operand Long
5381 5424 operand load_long_indirect(load_long_RegP reg) %{
5382 5425 constraint(ALLOC_IN_RC(esi_reg));
5383 5426 match(reg);
5384 5427
5385 5428 format %{ "[$reg]" %}
5386 5429 interface(MEMORY_INTER) %{
5387 5430 base($reg);
5388 5431 index(0x4);
5389 5432 scale(0x0);
5390 5433 disp(0x0);
5391 5434 %}
5392 5435 %}
5393 5436
5394 5437 // Indirect Memory Plus Long Offset Operand
5395 5438 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
5396 5439 match(AddP reg off);
5397 5440
5398 5441 format %{ "[$reg + $off]" %}
5399 5442 interface(MEMORY_INTER) %{
5400 5443 base($reg);
5401 5444 index(0x4);
5402 5445 scale(0x0);
5403 5446 disp($off);
5404 5447 %}
5405 5448 %}
5406 5449
5407 5450 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
5408 5451
5409 5452
5410 5453 //----------Special Memory Operands--------------------------------------------
5411 5454 // Stack Slot Operand - This operand is used for loading and storing temporary
5412 5455 // values on the stack where a match requires a value to
5413 5456 // flow through memory.
5414 5457 operand stackSlotP(sRegP reg) %{
5415 5458 constraint(ALLOC_IN_RC(stack_slots));
5416 5459 // No match rule because this operand is only generated in matching
5417 5460 format %{ "[$reg]" %}
5418 5461 interface(MEMORY_INTER) %{
5419 5462 base(0x4); // ESP
5420 5463 index(0x4); // No Index
5421 5464 scale(0x0); // No Scale
5422 5465 disp($reg); // Stack Offset
5423 5466 %}
5424 5467 %}
5425 5468
5426 5469 operand stackSlotI(sRegI reg) %{
5427 5470 constraint(ALLOC_IN_RC(stack_slots));
5428 5471 // No match rule because this operand is only generated in matching
5429 5472 format %{ "[$reg]" %}
5430 5473 interface(MEMORY_INTER) %{
5431 5474 base(0x4); // ESP
5432 5475 index(0x4); // No Index
5433 5476 scale(0x0); // No Scale
5434 5477 disp($reg); // Stack Offset
5435 5478 %}
5436 5479 %}
5437 5480
5438 5481 operand stackSlotF(sRegF reg) %{
5439 5482 constraint(ALLOC_IN_RC(stack_slots));
5440 5483 // No match rule because this operand is only generated in matching
5441 5484 format %{ "[$reg]" %}
5442 5485 interface(MEMORY_INTER) %{
5443 5486 base(0x4); // ESP
5444 5487 index(0x4); // No Index
5445 5488 scale(0x0); // No Scale
5446 5489 disp($reg); // Stack Offset
5447 5490 %}
5448 5491 %}
5449 5492
5450 5493 operand stackSlotD(sRegD reg) %{
5451 5494 constraint(ALLOC_IN_RC(stack_slots));
5452 5495 // No match rule because this operand is only generated in matching
5453 5496 format %{ "[$reg]" %}
5454 5497 interface(MEMORY_INTER) %{
5455 5498 base(0x4); // ESP
5456 5499 index(0x4); // No Index
5457 5500 scale(0x0); // No Scale
5458 5501 disp($reg); // Stack Offset
5459 5502 %}
5460 5503 %}
5461 5504
5462 5505 operand stackSlotL(sRegL reg) %{
5463 5506 constraint(ALLOC_IN_RC(stack_slots));
5464 5507 // No match rule because this operand is only generated in matching
5465 5508 format %{ "[$reg]" %}
5466 5509 interface(MEMORY_INTER) %{
5467 5510 base(0x4); // ESP
5468 5511 index(0x4); // No Index
5469 5512 scale(0x0); // No Scale
5470 5513 disp($reg); // Stack Offset
5471 5514 %}
5472 5515 %}
5473 5516
5474 5517 //----------Memory Operands - Win95 Implicit Null Variants----------------
5475 5518 // Indirect Memory Operand
5476 5519 operand indirect_win95_safe(eRegP_no_EBP reg)
5477 5520 %{
5478 5521 constraint(ALLOC_IN_RC(e_reg));
5479 5522 match(reg);
5480 5523
5481 5524 op_cost(100);
5482 5525 format %{ "[$reg]" %}
5483 5526 interface(MEMORY_INTER) %{
5484 5527 base($reg);
5485 5528 index(0x4);
5486 5529 scale(0x0);
5487 5530 disp(0x0);
5488 5531 %}
5489 5532 %}
5490 5533
5491 5534 // Indirect Memory Plus Short Offset Operand
5492 5535 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
5493 5536 %{
5494 5537 match(AddP reg off);
5495 5538
5496 5539 op_cost(100);
5497 5540 format %{ "[$reg + $off]" %}
5498 5541 interface(MEMORY_INTER) %{
5499 5542 base($reg);
5500 5543 index(0x4);
5501 5544 scale(0x0);
5502 5545 disp($off);
5503 5546 %}
5504 5547 %}
5505 5548
5506 5549 // Indirect Memory Plus Long Offset Operand
5507 5550 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
5508 5551 %{
5509 5552 match(AddP reg off);
5510 5553
5511 5554 op_cost(100);
5512 5555 format %{ "[$reg + $off]" %}
5513 5556 interface(MEMORY_INTER) %{
5514 5557 base($reg);
5515 5558 index(0x4);
5516 5559 scale(0x0);
5517 5560 disp($off);
5518 5561 %}
5519 5562 %}
5520 5563
5521 5564 // Indirect Memory Plus Index Register Plus Offset Operand
5522 5565 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
5523 5566 %{
5524 5567 match(AddP (AddP reg ireg) off);
5525 5568
5526 5569 op_cost(100);
5527 5570 format %{"[$reg + $off + $ireg]" %}
5528 5571 interface(MEMORY_INTER) %{
5529 5572 base($reg);
5530 5573 index($ireg);
5531 5574 scale(0x0);
5532 5575 disp($off);
5533 5576 %}
5534 5577 %}
5535 5578
5536 5579 // Indirect Memory Times Scale Plus Index Register
5537 5580 operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
5538 5581 %{
5539 5582 match(AddP reg (LShiftI ireg scale));
5540 5583
5541 5584 op_cost(100);
5542 5585 format %{"[$reg + $ireg << $scale]" %}
5543 5586 interface(MEMORY_INTER) %{
5544 5587 base($reg);
5545 5588 index($ireg);
5546 5589 scale($scale);
5547 5590 disp(0x0);
5548 5591 %}
5549 5592 %}
5550 5593
5551 5594 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5552 5595 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
5553 5596 %{
5554 5597 match(AddP (AddP reg (LShiftI ireg scale)) off);
5555 5598
5556 5599 op_cost(100);
5557 5600 format %{"[$reg + $off + $ireg << $scale]" %}
5558 5601 interface(MEMORY_INTER) %{
5559 5602 base($reg);
5560 5603 index($ireg);
5561 5604 scale($scale);
5562 5605 disp($off);
5563 5606 %}
5564 5607 %}
5565 5608
5566 5609 //----------Conditional Branch Operands----------------------------------------
5567 5610 // Comparison Op - This is the operation of the comparison, and is limited to
5568 5611 // the following set of codes:
5569 5612 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5570 5613 //
5571 5614 // Other attributes of the comparison, such as unsignedness, are specified
5572 5615 // by the comparison instruction that sets a condition code flags register.
5573 5616 // That result is represented by a flags operand whose subtype is appropriate
5574 5617 // to the unsignedness (etc.) of the comparison.
5575 5618 //
5576 5619 // Later, the instruction which matches both the Comparison Op (a Bool) and
5577 5620 // the flags (produced by the Cmp) specifies the coding of the comparison op
5578 5621 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5579 5622
5580 5623 // Comparision Code
5581 5624 operand cmpOp() %{
5582 5625 match(Bool);
5583 5626
5584 5627 format %{ "" %}
5585 5628 interface(COND_INTER) %{
5586 5629 equal(0x4, "e");
5587 5630 not_equal(0x5, "ne");
5588 5631 less(0xC, "l");
5589 5632 greater_equal(0xD, "ge");
5590 5633 less_equal(0xE, "le");
5591 5634 greater(0xF, "g");
5592 5635 %}
5593 5636 %}
5594 5637
5595 5638 // Comparison Code, unsigned compare. Used by FP also, with
5596 5639 // C2 (unordered) turned into GT or LT already. The other bits
5597 5640 // C0 and C3 are turned into Carry & Zero flags.
5598 5641 operand cmpOpU() %{
5599 5642 match(Bool);
5600 5643
5601 5644 format %{ "" %}
5602 5645 interface(COND_INTER) %{
5603 5646 equal(0x4, "e");
5604 5647 not_equal(0x5, "ne");
5605 5648 less(0x2, "b");
5606 5649 greater_equal(0x3, "nb");
5607 5650 less_equal(0x6, "be");
5608 5651 greater(0x7, "nbe");
5609 5652 %}
5610 5653 %}
5611 5654
5612 5655 // Floating comparisons that don't require any fixup for the unordered case
5613 5656 operand cmpOpUCF() %{
5614 5657 match(Bool);
5615 5658 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5616 5659 n->as_Bool()->_test._test == BoolTest::ge ||
5617 5660 n->as_Bool()->_test._test == BoolTest::le ||
5618 5661 n->as_Bool()->_test._test == BoolTest::gt);
5619 5662 format %{ "" %}
5620 5663 interface(COND_INTER) %{
5621 5664 equal(0x4, "e");
5622 5665 not_equal(0x5, "ne");
5623 5666 less(0x2, "b");
5624 5667 greater_equal(0x3, "nb");
5625 5668 less_equal(0x6, "be");
5626 5669 greater(0x7, "nbe");
5627 5670 %}
5628 5671 %}
5629 5672
5630 5673
5631 5674 // Floating comparisons that can be fixed up with extra conditional jumps
5632 5675 operand cmpOpUCF2() %{
5633 5676 match(Bool);
5634 5677 predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5635 5678 n->as_Bool()->_test._test == BoolTest::eq);
5636 5679 format %{ "" %}
5637 5680 interface(COND_INTER) %{
5638 5681 equal(0x4, "e");
5639 5682 not_equal(0x5, "ne");
5640 5683 less(0x2, "b");
5641 5684 greater_equal(0x3, "nb");
5642 5685 less_equal(0x6, "be");
5643 5686 greater(0x7, "nbe");
5644 5687 %}
5645 5688 %}
5646 5689
5647 5690 // Comparison Code for FP conditional move
5648 5691 operand cmpOp_fcmov() %{
5649 5692 match(Bool);
5650 5693
5651 5694 format %{ "" %}
5652 5695 interface(COND_INTER) %{
5653 5696 equal (0x0C8);
5654 5697 not_equal (0x1C8);
5655 5698 less (0x0C0);
5656 5699 greater_equal(0x1C0);
5657 5700 less_equal (0x0D0);
5658 5701 greater (0x1D0);
5659 5702 %}
5660 5703 %}
5661 5704
5662 5705 // Comparision Code used in long compares
5663 5706 operand cmpOp_commute() %{
5664 5707 match(Bool);
5665 5708
5666 5709 format %{ "" %}
5667 5710 interface(COND_INTER) %{
5668 5711 equal(0x4, "e");
5669 5712 not_equal(0x5, "ne");
5670 5713 less(0xF, "g");
5671 5714 greater_equal(0xE, "le");
5672 5715 less_equal(0xD, "ge");
5673 5716 greater(0xC, "l");
5674 5717 %}
5675 5718 %}
5676 5719
5677 5720 //----------OPERAND CLASSES----------------------------------------------------
5678 5721 // Operand Classes are groups of operands that are used as to simplify
5679 5722 // instruction definitions by not requiring the AD writer to specify separate
5680 5723 // instructions for every form of operand when the instruction accepts
5681 5724 // multiple operand types with the same basic encoding and format. The classic
5682 5725 // case of this is memory operands.
5683 5726
5684 5727 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5685 5728 indIndex, indIndexScale, indIndexScaleOffset);
5686 5729
5687 5730 // Long memory operations are encoded in 2 instructions and a +4 offset.
5688 5731 // This means some kind of offset is always required and you cannot use
5689 5732 // an oop as the offset (done when working on static globals).
5690 5733 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5691 5734 indIndex, indIndexScale, indIndexScaleOffset);
5692 5735
5693 5736
5694 5737 //----------PIPELINE-----------------------------------------------------------
5695 5738 // Rules which define the behavior of the target architectures pipeline.
5696 5739 pipeline %{
5697 5740
5698 5741 //----------ATTRIBUTES---------------------------------------------------------
5699 5742 attributes %{
5700 5743 variable_size_instructions; // Fixed size instructions
5701 5744 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
5702 5745 instruction_unit_size = 1; // An instruction is 1 bytes long
5703 5746 instruction_fetch_unit_size = 16; // The processor fetches one line
5704 5747 instruction_fetch_units = 1; // of 16 bytes
5705 5748
5706 5749 // List of nop instructions
5707 5750 nops( MachNop );
5708 5751 %}
5709 5752
5710 5753 //----------RESOURCES----------------------------------------------------------
5711 5754 // Resources are the functional units available to the machine
5712 5755
5713 5756 // Generic P2/P3 pipeline
5714 5757 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5715 5758 // 3 instructions decoded per cycle.
5716 5759 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5717 5760 // 2 ALU op, only ALU0 handles mul/div instructions.
5718 5761 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5719 5762 MS0, MS1, MEM = MS0 | MS1,
5720 5763 BR, FPU,
5721 5764 ALU0, ALU1, ALU = ALU0 | ALU1 );
5722 5765
5723 5766 //----------PIPELINE DESCRIPTION-----------------------------------------------
5724 5767 // Pipeline Description specifies the stages in the machine's pipeline
5725 5768
5726 5769 // Generic P2/P3 pipeline
5727 5770 pipe_desc(S0, S1, S2, S3, S4, S5);
5728 5771
5729 5772 //----------PIPELINE CLASSES---------------------------------------------------
5730 5773 // Pipeline Classes describe the stages in which input and output are
5731 5774 // referenced by the hardware pipeline.
5732 5775
5733 5776 // Naming convention: ialu or fpu
5734 5777 // Then: _reg
5735 5778 // Then: _reg if there is a 2nd register
5736 5779 // Then: _long if it's a pair of instructions implementing a long
5737 5780 // Then: _fat if it requires the big decoder
5738 5781 // Or: _mem if it requires the big decoder and a memory unit.
5739 5782
5740 5783 // Integer ALU reg operation
5741 5784 pipe_class ialu_reg(eRegI dst) %{
5742 5785 single_instruction;
5743 5786 dst : S4(write);
5744 5787 dst : S3(read);
5745 5788 DECODE : S0; // any decoder
5746 5789 ALU : S3; // any alu
5747 5790 %}
5748 5791
5749 5792 // Long ALU reg operation
5750 5793 pipe_class ialu_reg_long(eRegL dst) %{
5751 5794 instruction_count(2);
5752 5795 dst : S4(write);
5753 5796 dst : S3(read);
5754 5797 DECODE : S0(2); // any 2 decoders
5755 5798 ALU : S3(2); // both alus
5756 5799 %}
5757 5800
5758 5801 // Integer ALU reg operation using big decoder
5759 5802 pipe_class ialu_reg_fat(eRegI dst) %{
5760 5803 single_instruction;
5761 5804 dst : S4(write);
5762 5805 dst : S3(read);
5763 5806 D0 : S0; // big decoder only
5764 5807 ALU : S3; // any alu
5765 5808 %}
5766 5809
5767 5810 // Long ALU reg operation using big decoder
5768 5811 pipe_class ialu_reg_long_fat(eRegL dst) %{
5769 5812 instruction_count(2);
5770 5813 dst : S4(write);
5771 5814 dst : S3(read);
5772 5815 D0 : S0(2); // big decoder only; twice
5773 5816 ALU : S3(2); // any 2 alus
5774 5817 %}
5775 5818
5776 5819 // Integer ALU reg-reg operation
5777 5820 pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5778 5821 single_instruction;
5779 5822 dst : S4(write);
5780 5823 src : S3(read);
5781 5824 DECODE : S0; // any decoder
5782 5825 ALU : S3; // any alu
5783 5826 %}
5784 5827
5785 5828 // Long ALU reg-reg operation
5786 5829 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5787 5830 instruction_count(2);
5788 5831 dst : S4(write);
5789 5832 src : S3(read);
5790 5833 DECODE : S0(2); // any 2 decoders
5791 5834 ALU : S3(2); // both alus
5792 5835 %}
5793 5836
5794 5837 // Integer ALU reg-reg operation
5795 5838 pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5796 5839 single_instruction;
5797 5840 dst : S4(write);
5798 5841 src : S3(read);
5799 5842 D0 : S0; // big decoder only
5800 5843 ALU : S3; // any alu
5801 5844 %}
5802 5845
5803 5846 // Long ALU reg-reg operation
5804 5847 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5805 5848 instruction_count(2);
5806 5849 dst : S4(write);
5807 5850 src : S3(read);
5808 5851 D0 : S0(2); // big decoder only; twice
5809 5852 ALU : S3(2); // both alus
5810 5853 %}
5811 5854
5812 5855 // Integer ALU reg-mem operation
5813 5856 pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5814 5857 single_instruction;
5815 5858 dst : S5(write);
5816 5859 mem : S3(read);
5817 5860 D0 : S0; // big decoder only
5818 5861 ALU : S4; // any alu
5819 5862 MEM : S3; // any mem
5820 5863 %}
5821 5864
5822 5865 // Long ALU reg-mem operation
5823 5866 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5824 5867 instruction_count(2);
5825 5868 dst : S5(write);
5826 5869 mem : S3(read);
5827 5870 D0 : S0(2); // big decoder only; twice
5828 5871 ALU : S4(2); // any 2 alus
5829 5872 MEM : S3(2); // both mems
5830 5873 %}
5831 5874
5832 5875 // Integer mem operation (prefetch)
5833 5876 pipe_class ialu_mem(memory mem)
5834 5877 %{
5835 5878 single_instruction;
5836 5879 mem : S3(read);
5837 5880 D0 : S0; // big decoder only
5838 5881 MEM : S3; // any mem
5839 5882 %}
5840 5883
5841 5884 // Integer Store to Memory
5842 5885 pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5843 5886 single_instruction;
5844 5887 mem : S3(read);
5845 5888 src : S5(read);
5846 5889 D0 : S0; // big decoder only
5847 5890 ALU : S4; // any alu
5848 5891 MEM : S3;
5849 5892 %}
5850 5893
5851 5894 // Long Store to Memory
5852 5895 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5853 5896 instruction_count(2);
5854 5897 mem : S3(read);
5855 5898 src : S5(read);
5856 5899 D0 : S0(2); // big decoder only; twice
5857 5900 ALU : S4(2); // any 2 alus
5858 5901 MEM : S3(2); // Both mems
5859 5902 %}
5860 5903
5861 5904 // Integer Store to Memory
5862 5905 pipe_class ialu_mem_imm(memory mem) %{
5863 5906 single_instruction;
5864 5907 mem : S3(read);
5865 5908 D0 : S0; // big decoder only
5866 5909 ALU : S4; // any alu
5867 5910 MEM : S3;
5868 5911 %}
5869 5912
5870 5913 // Integer ALU0 reg-reg operation
5871 5914 pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5872 5915 single_instruction;
5873 5916 dst : S4(write);
5874 5917 src : S3(read);
5875 5918 D0 : S0; // Big decoder only
5876 5919 ALU0 : S3; // only alu0
5877 5920 %}
5878 5921
5879 5922 // Integer ALU0 reg-mem operation
5880 5923 pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
5881 5924 single_instruction;
5882 5925 dst : S5(write);
5883 5926 mem : S3(read);
5884 5927 D0 : S0; // big decoder only
5885 5928 ALU0 : S4; // ALU0 only
5886 5929 MEM : S3; // any mem
5887 5930 %}
5888 5931
5889 5932 // Integer ALU reg-reg operation
5890 5933 pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
5891 5934 single_instruction;
5892 5935 cr : S4(write);
5893 5936 src1 : S3(read);
5894 5937 src2 : S3(read);
5895 5938 DECODE : S0; // any decoder
5896 5939 ALU : S3; // any alu
5897 5940 %}
5898 5941
5899 5942 // Integer ALU reg-imm operation
5900 5943 pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
5901 5944 single_instruction;
5902 5945 cr : S4(write);
5903 5946 src1 : S3(read);
5904 5947 DECODE : S0; // any decoder
5905 5948 ALU : S3; // any alu
5906 5949 %}
5907 5950
5908 5951 // Integer ALU reg-mem operation
5909 5952 pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
5910 5953 single_instruction;
5911 5954 cr : S4(write);
5912 5955 src1 : S3(read);
5913 5956 src2 : S3(read);
5914 5957 D0 : S0; // big decoder only
5915 5958 ALU : S4; // any alu
5916 5959 MEM : S3;
5917 5960 %}
5918 5961
5919 5962 // Conditional move reg-reg
5920 5963 pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
5921 5964 instruction_count(4);
5922 5965 y : S4(read);
5923 5966 q : S3(read);
5924 5967 p : S3(read);
5925 5968 DECODE : S0(4); // any decoder
5926 5969 %}
5927 5970
5928 5971 // Conditional move reg-reg
5929 5972 pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
5930 5973 single_instruction;
5931 5974 dst : S4(write);
5932 5975 src : S3(read);
5933 5976 cr : S3(read);
5934 5977 DECODE : S0; // any decoder
5935 5978 %}
5936 5979
5937 5980 // Conditional move reg-mem
5938 5981 pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
5939 5982 single_instruction;
5940 5983 dst : S4(write);
5941 5984 src : S3(read);
5942 5985 cr : S3(read);
5943 5986 DECODE : S0; // any decoder
5944 5987 MEM : S3;
5945 5988 %}
5946 5989
5947 5990 // Conditional move reg-reg long
5948 5991 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
5949 5992 single_instruction;
5950 5993 dst : S4(write);
5951 5994 src : S3(read);
5952 5995 cr : S3(read);
5953 5996 DECODE : S0(2); // any 2 decoders
5954 5997 %}
5955 5998
5956 5999 // Conditional move double reg-reg
5957 6000 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
5958 6001 single_instruction;
5959 6002 dst : S4(write);
5960 6003 src : S3(read);
5961 6004 cr : S3(read);
5962 6005 DECODE : S0; // any decoder
5963 6006 %}
5964 6007
5965 6008 // Float reg-reg operation
5966 6009 pipe_class fpu_reg(regD dst) %{
5967 6010 instruction_count(2);
5968 6011 dst : S3(read);
5969 6012 DECODE : S0(2); // any 2 decoders
5970 6013 FPU : S3;
5971 6014 %}
5972 6015
5973 6016 // Float reg-reg operation
5974 6017 pipe_class fpu_reg_reg(regD dst, regD src) %{
5975 6018 instruction_count(2);
5976 6019 dst : S4(write);
5977 6020 src : S3(read);
5978 6021 DECODE : S0(2); // any 2 decoders
5979 6022 FPU : S3;
5980 6023 %}
5981 6024
5982 6025 // Float reg-reg operation
5983 6026 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
5984 6027 instruction_count(3);
5985 6028 dst : S4(write);
5986 6029 src1 : S3(read);
5987 6030 src2 : S3(read);
5988 6031 DECODE : S0(3); // any 3 decoders
5989 6032 FPU : S3(2);
5990 6033 %}
5991 6034
5992 6035 // Float reg-reg operation
5993 6036 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
5994 6037 instruction_count(4);
5995 6038 dst : S4(write);
5996 6039 src1 : S3(read);
5997 6040 src2 : S3(read);
5998 6041 src3 : S3(read);
5999 6042 DECODE : S0(4); // any 3 decoders
6000 6043 FPU : S3(2);
6001 6044 %}
6002 6045
6003 6046 // Float reg-reg operation
6004 6047 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
6005 6048 instruction_count(4);
6006 6049 dst : S4(write);
6007 6050 src1 : S3(read);
6008 6051 src2 : S3(read);
6009 6052 src3 : S3(read);
6010 6053 DECODE : S1(3); // any 3 decoders
6011 6054 D0 : S0; // Big decoder only
6012 6055 FPU : S3(2);
6013 6056 MEM : S3;
6014 6057 %}
6015 6058
6016 6059 // Float reg-mem operation
6017 6060 pipe_class fpu_reg_mem(regD dst, memory mem) %{
6018 6061 instruction_count(2);
6019 6062 dst : S5(write);
6020 6063 mem : S3(read);
6021 6064 D0 : S0; // big decoder only
6022 6065 DECODE : S1; // any decoder for FPU POP
6023 6066 FPU : S4;
6024 6067 MEM : S3; // any mem
6025 6068 %}
6026 6069
6027 6070 // Float reg-mem operation
6028 6071 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
6029 6072 instruction_count(3);
6030 6073 dst : S5(write);
6031 6074 src1 : S3(read);
6032 6075 mem : S3(read);
6033 6076 D0 : S0; // big decoder only
6034 6077 DECODE : S1(2); // any decoder for FPU POP
6035 6078 FPU : S4;
6036 6079 MEM : S3; // any mem
6037 6080 %}
6038 6081
6039 6082 // Float mem-reg operation
6040 6083 pipe_class fpu_mem_reg(memory mem, regD src) %{
6041 6084 instruction_count(2);
6042 6085 src : S5(read);
6043 6086 mem : S3(read);
6044 6087 DECODE : S0; // any decoder for FPU PUSH
6045 6088 D0 : S1; // big decoder only
6046 6089 FPU : S4;
6047 6090 MEM : S3; // any mem
6048 6091 %}
6049 6092
6050 6093 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
6051 6094 instruction_count(3);
6052 6095 src1 : S3(read);
6053 6096 src2 : S3(read);
6054 6097 mem : S3(read);
6055 6098 DECODE : S0(2); // any decoder for FPU PUSH
6056 6099 D0 : S1; // big decoder only
6057 6100 FPU : S4;
6058 6101 MEM : S3; // any mem
6059 6102 %}
6060 6103
6061 6104 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
6062 6105 instruction_count(3);
6063 6106 src1 : S3(read);
6064 6107 src2 : S3(read);
6065 6108 mem : S4(read);
6066 6109 DECODE : S0; // any decoder for FPU PUSH
6067 6110 D0 : S0(2); // big decoder only
6068 6111 FPU : S4;
6069 6112 MEM : S3(2); // any mem
6070 6113 %}
6071 6114
6072 6115 pipe_class fpu_mem_mem(memory dst, memory src1) %{
6073 6116 instruction_count(2);
6074 6117 src1 : S3(read);
6075 6118 dst : S4(read);
6076 6119 D0 : S0(2); // big decoder only
6077 6120 MEM : S3(2); // any mem
6078 6121 %}
6079 6122
6080 6123 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
6081 6124 instruction_count(3);
6082 6125 src1 : S3(read);
6083 6126 src2 : S3(read);
6084 6127 dst : S4(read);
6085 6128 D0 : S0(3); // big decoder only
6086 6129 FPU : S4;
6087 6130 MEM : S3(3); // any mem
6088 6131 %}
6089 6132
6090 6133 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
6091 6134 instruction_count(3);
6092 6135 src1 : S4(read);
6093 6136 mem : S4(read);
6094 6137 DECODE : S0; // any decoder for FPU PUSH
6095 6138 D0 : S0(2); // big decoder only
6096 6139 FPU : S4;
6097 6140 MEM : S3(2); // any mem
6098 6141 %}
6099 6142
6100 6143 // Float load constant
6101 6144 pipe_class fpu_reg_con(regD dst) %{
6102 6145 instruction_count(2);
6103 6146 dst : S5(write);
6104 6147 D0 : S0; // big decoder only for the load
6105 6148 DECODE : S1; // any decoder for FPU POP
6106 6149 FPU : S4;
6107 6150 MEM : S3; // any mem
6108 6151 %}
6109 6152
6110 6153 // Float load constant
6111 6154 pipe_class fpu_reg_reg_con(regD dst, regD src) %{
6112 6155 instruction_count(3);
6113 6156 dst : S5(write);
6114 6157 src : S3(read);
6115 6158 D0 : S0; // big decoder only for the load
6116 6159 DECODE : S1(2); // any decoder for FPU POP
6117 6160 FPU : S4;
6118 6161 MEM : S3; // any mem
6119 6162 %}
6120 6163
6121 6164 // UnConditional branch
6122 6165 pipe_class pipe_jmp( label labl ) %{
6123 6166 single_instruction;
6124 6167 BR : S3;
6125 6168 %}
6126 6169
6127 6170 // Conditional branch
6128 6171 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
6129 6172 single_instruction;
6130 6173 cr : S1(read);
6131 6174 BR : S3;
6132 6175 %}
6133 6176
6134 6177 // Allocation idiom
6135 6178 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
6136 6179 instruction_count(1); force_serialization;
6137 6180 fixed_latency(6);
6138 6181 heap_ptr : S3(read);
6139 6182 DECODE : S0(3);
6140 6183 D0 : S2;
6141 6184 MEM : S3;
6142 6185 ALU : S3(2);
6143 6186 dst : S5(write);
6144 6187 BR : S5;
6145 6188 %}
6146 6189
6147 6190 // Generic big/slow expanded idiom
6148 6191 pipe_class pipe_slow( ) %{
6149 6192 instruction_count(10); multiple_bundles; force_serialization;
6150 6193 fixed_latency(100);
6151 6194 D0 : S0(2);
6152 6195 MEM : S3(2);
6153 6196 %}
6154 6197
6155 6198 // The real do-nothing guy
6156 6199 pipe_class empty( ) %{
6157 6200 instruction_count(0);
6158 6201 %}
6159 6202
6160 6203 // Define the class for the Nop node
6161 6204 define %{
6162 6205 MachNop = empty;
6163 6206 %}
6164 6207
6165 6208 %}
6166 6209
6167 6210 //----------INSTRUCTIONS-------------------------------------------------------
6168 6211 //
6169 6212 // match -- States which machine-independent subtree may be replaced
6170 6213 // by this instruction.
6171 6214 // ins_cost -- The estimated cost of this instruction is used by instruction
6172 6215 // selection to identify a minimum cost tree of machine
6173 6216 // instructions that matches a tree of machine-independent
6174 6217 // instructions.
6175 6218 // format -- A string providing the disassembly for this instruction.
6176 6219 // The value of an instruction's operand may be inserted
6177 6220 // by referring to it with a '$' prefix.
6178 6221 // opcode -- Three instruction opcodes may be provided. These are referred
6179 6222 // to within an encode class as $primary, $secondary, and $tertiary
6180 6223 // respectively. The primary opcode is commonly used to
6181 6224 // indicate the type of machine instruction, while secondary
6182 6225 // and tertiary are often used for prefix options or addressing
6183 6226 // modes.
6184 6227 // ins_encode -- A list of encode classes with parameters. The encode class
6185 6228 // name must have been defined in an 'enc_class' specification
6186 6229 // in the encode section of the architecture description.
6187 6230
6188 6231 //----------BSWAP-Instruction--------------------------------------------------
6189 6232 instruct bytes_reverse_int(eRegI dst) %{
6190 6233 match(Set dst (ReverseBytesI dst));
6191 6234
6192 6235 format %{ "BSWAP $dst" %}
6193 6236 opcode(0x0F, 0xC8);
6194 6237 ins_encode( OpcP, OpcSReg(dst) );
6195 6238 ins_pipe( ialu_reg );
6196 6239 %}
6197 6240
6198 6241 instruct bytes_reverse_long(eRegL dst) %{
6199 6242 match(Set dst (ReverseBytesL dst));
6200 6243
6201 6244 format %{ "BSWAP $dst.lo\n\t"
6202 6245 "BSWAP $dst.hi\n\t"
6203 6246 "XCHG $dst.lo $dst.hi" %}
6204 6247
6205 6248 ins_cost(125);
6206 6249 ins_encode( bswap_long_bytes(dst) );
6207 6250 ins_pipe( ialu_reg_reg);
6208 6251 %}
6209 6252
6210 6253
6211 6254 //---------- Zeros Count Instructions ------------------------------------------
6212 6255
6213 6256 instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6214 6257 predicate(UseCountLeadingZerosInstruction);
6215 6258 match(Set dst (CountLeadingZerosI src));
6216 6259 effect(KILL cr);
6217 6260
6218 6261 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %}
6219 6262 ins_encode %{
6220 6263 __ lzcntl($dst$$Register, $src$$Register);
6221 6264 %}
6222 6265 ins_pipe(ialu_reg);
6223 6266 %}
6224 6267
6225 6268 instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
6226 6269 predicate(!UseCountLeadingZerosInstruction);
6227 6270 match(Set dst (CountLeadingZerosI src));
6228 6271 effect(KILL cr);
6229 6272
6230 6273 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t"
6231 6274 "JNZ skip\n\t"
6232 6275 "MOV $dst, -1\n"
6233 6276 "skip:\n\t"
6234 6277 "NEG $dst\n\t"
6235 6278 "ADD $dst, 31" %}
6236 6279 ins_encode %{
6237 6280 Register Rdst = $dst$$Register;
6238 6281 Register Rsrc = $src$$Register;
6239 6282 Label skip;
6240 6283 __ bsrl(Rdst, Rsrc);
6241 6284 __ jccb(Assembler::notZero, skip);
6242 6285 __ movl(Rdst, -1);
6243 6286 __ bind(skip);
6244 6287 __ negl(Rdst);
6245 6288 __ addl(Rdst, BitsPerInt - 1);
6246 6289 %}
6247 6290 ins_pipe(ialu_reg);
6248 6291 %}
6249 6292
6250 6293 instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6251 6294 predicate(UseCountLeadingZerosInstruction);
6252 6295 match(Set dst (CountLeadingZerosL src));
6253 6296 effect(TEMP dst, KILL cr);
6254 6297
6255 6298 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t"
6256 6299 "JNC done\n\t"
6257 6300 "LZCNT $dst, $src.lo\n\t"
6258 6301 "ADD $dst, 32\n"
6259 6302 "done:" %}
6260 6303 ins_encode %{
6261 6304 Register Rdst = $dst$$Register;
6262 6305 Register Rsrc = $src$$Register;
6263 6306 Label done;
6264 6307 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
6265 6308 __ jccb(Assembler::carryClear, done);
6266 6309 __ lzcntl(Rdst, Rsrc);
6267 6310 __ addl(Rdst, BitsPerInt);
6268 6311 __ bind(done);
6269 6312 %}
6270 6313 ins_pipe(ialu_reg);
6271 6314 %}
6272 6315
6273 6316 instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
6274 6317 predicate(!UseCountLeadingZerosInstruction);
6275 6318 match(Set dst (CountLeadingZerosL src));
6276 6319 effect(TEMP dst, KILL cr);
6277 6320
6278 6321 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t"
6279 6322 "JZ msw_is_zero\n\t"
6280 6323 "ADD $dst, 32\n\t"
6281 6324 "JMP not_zero\n"
6282 6325 "msw_is_zero:\n\t"
6283 6326 "BSR $dst, $src.lo\n\t"
6284 6327 "JNZ not_zero\n\t"
6285 6328 "MOV $dst, -1\n"
6286 6329 "not_zero:\n\t"
6287 6330 "NEG $dst\n\t"
6288 6331 "ADD $dst, 63\n" %}
6289 6332 ins_encode %{
6290 6333 Register Rdst = $dst$$Register;
6291 6334 Register Rsrc = $src$$Register;
6292 6335 Label msw_is_zero;
6293 6336 Label not_zero;
6294 6337 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
6295 6338 __ jccb(Assembler::zero, msw_is_zero);
6296 6339 __ addl(Rdst, BitsPerInt);
6297 6340 __ jmpb(not_zero);
6298 6341 __ bind(msw_is_zero);
6299 6342 __ bsrl(Rdst, Rsrc);
6300 6343 __ jccb(Assembler::notZero, not_zero);
6301 6344 __ movl(Rdst, -1);
6302 6345 __ bind(not_zero);
6303 6346 __ negl(Rdst);
6304 6347 __ addl(Rdst, BitsPerLong - 1);
6305 6348 %}
6306 6349 ins_pipe(ialu_reg);
6307 6350 %}
6308 6351
6309 6352 instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6310 6353 match(Set dst (CountTrailingZerosI src));
6311 6354 effect(KILL cr);
6312 6355
6313 6356 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
6314 6357 "JNZ done\n\t"
6315 6358 "MOV $dst, 32\n"
6316 6359 "done:" %}
6317 6360 ins_encode %{
6318 6361 Register Rdst = $dst$$Register;
6319 6362 Label done;
6320 6363 __ bsfl(Rdst, $src$$Register);
6321 6364 __ jccb(Assembler::notZero, done);
6322 6365 __ movl(Rdst, BitsPerInt);
6323 6366 __ bind(done);
6324 6367 %}
6325 6368 ins_pipe(ialu_reg);
6326 6369 %}
6327 6370
6328 6371 instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6329 6372 match(Set dst (CountTrailingZerosL src));
6330 6373 effect(TEMP dst, KILL cr);
6331 6374
6332 6375 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
6333 6376 "JNZ done\n\t"
6334 6377 "BSF $dst, $src.hi\n\t"
6335 6378 "JNZ msw_not_zero\n\t"
6336 6379 "MOV $dst, 32\n"
6337 6380 "msw_not_zero:\n\t"
6338 6381 "ADD $dst, 32\n"
6339 6382 "done:" %}
6340 6383 ins_encode %{
6341 6384 Register Rdst = $dst$$Register;
6342 6385 Register Rsrc = $src$$Register;
6343 6386 Label msw_not_zero;
6344 6387 Label done;
6345 6388 __ bsfl(Rdst, Rsrc);
6346 6389 __ jccb(Assembler::notZero, done);
6347 6390 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
6348 6391 __ jccb(Assembler::notZero, msw_not_zero);
6349 6392 __ movl(Rdst, BitsPerInt);
6350 6393 __ bind(msw_not_zero);
6351 6394 __ addl(Rdst, BitsPerInt);
6352 6395 __ bind(done);
6353 6396 %}
6354 6397 ins_pipe(ialu_reg);
6355 6398 %}
6356 6399
6357 6400
6358 6401 //---------- Population Count Instructions -------------------------------------
6359 6402
6360 6403 instruct popCountI(eRegI dst, eRegI src) %{
6361 6404 predicate(UsePopCountInstruction);
6362 6405 match(Set dst (PopCountI src));
6363 6406
6364 6407 format %{ "POPCNT $dst, $src" %}
6365 6408 ins_encode %{
6366 6409 __ popcntl($dst$$Register, $src$$Register);
6367 6410 %}
6368 6411 ins_pipe(ialu_reg);
6369 6412 %}
6370 6413
6371 6414 instruct popCountI_mem(eRegI dst, memory mem) %{
6372 6415 predicate(UsePopCountInstruction);
6373 6416 match(Set dst (PopCountI (LoadI mem)));
6374 6417
6375 6418 format %{ "POPCNT $dst, $mem" %}
6376 6419 ins_encode %{
6377 6420 __ popcntl($dst$$Register, $mem$$Address);
6378 6421 %}
6379 6422 ins_pipe(ialu_reg);
6380 6423 %}
6381 6424
6382 6425 // Note: Long.bitCount(long) returns an int.
6383 6426 instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
6384 6427 predicate(UsePopCountInstruction);
6385 6428 match(Set dst (PopCountL src));
6386 6429 effect(KILL cr, TEMP tmp, TEMP dst);
6387 6430
6388 6431 format %{ "POPCNT $dst, $src.lo\n\t"
6389 6432 "POPCNT $tmp, $src.hi\n\t"
6390 6433 "ADD $dst, $tmp" %}
6391 6434 ins_encode %{
6392 6435 __ popcntl($dst$$Register, $src$$Register);
6393 6436 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
6394 6437 __ addl($dst$$Register, $tmp$$Register);
6395 6438 %}
6396 6439 ins_pipe(ialu_reg);
6397 6440 %}
6398 6441
6399 6442 // Note: Long.bitCount(long) returns an int.
6400 6443 instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
6401 6444 predicate(UsePopCountInstruction);
6402 6445 match(Set dst (PopCountL (LoadL mem)));
6403 6446 effect(KILL cr, TEMP tmp, TEMP dst);
6404 6447
6405 6448 format %{ "POPCNT $dst, $mem\n\t"
6406 6449 "POPCNT $tmp, $mem+4\n\t"
6407 6450 "ADD $dst, $tmp" %}
6408 6451 ins_encode %{
6409 6452 //__ popcntl($dst$$Register, $mem$$Address$$first);
6410 6453 //__ popcntl($tmp$$Register, $mem$$Address$$second);
6411 6454 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
6412 6455 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
6413 6456 __ addl($dst$$Register, $tmp$$Register);
6414 6457 %}
6415 6458 ins_pipe(ialu_reg);
6416 6459 %}
6417 6460
6418 6461
6419 6462 //----------Load/Store/Move Instructions---------------------------------------
6420 6463 //----------Load Instructions--------------------------------------------------
6421 6464 // Load Byte (8bit signed)
6422 6465 instruct loadB(xRegI dst, memory mem) %{
6423 6466 match(Set dst (LoadB mem));
6424 6467
6425 6468 ins_cost(125);
6426 6469 format %{ "MOVSX8 $dst,$mem\t# byte" %}
6427 6470
6428 6471 ins_encode %{
6429 6472 __ movsbl($dst$$Register, $mem$$Address);
6430 6473 %}
6431 6474
6432 6475 ins_pipe(ialu_reg_mem);
6433 6476 %}
6434 6477
6435 6478 // Load Byte (8bit signed) into Long Register
6436 6479 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6437 6480 match(Set dst (ConvI2L (LoadB mem)));
6438 6481 effect(KILL cr);
6439 6482
6440 6483 ins_cost(375);
6441 6484 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
6442 6485 "MOV $dst.hi,$dst.lo\n\t"
6443 6486 "SAR $dst.hi,7" %}
6444 6487
6445 6488 ins_encode %{
6446 6489 __ movsbl($dst$$Register, $mem$$Address);
6447 6490 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6448 6491 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
6449 6492 %}
6450 6493
6451 6494 ins_pipe(ialu_reg_mem);
6452 6495 %}
6453 6496
6454 6497 // Load Unsigned Byte (8bit UNsigned)
6455 6498 instruct loadUB(xRegI dst, memory mem) %{
6456 6499 match(Set dst (LoadUB mem));
6457 6500
6458 6501 ins_cost(125);
6459 6502 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
6460 6503
6461 6504 ins_encode %{
6462 6505 __ movzbl($dst$$Register, $mem$$Address);
6463 6506 %}
6464 6507
6465 6508 ins_pipe(ialu_reg_mem);
6466 6509 %}
6467 6510
6468 6511 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6469 6512 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6470 6513 match(Set dst (ConvI2L (LoadUB mem)));
6471 6514 effect(KILL cr);
6472 6515
6473 6516 ins_cost(250);
6474 6517 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
6475 6518 "XOR $dst.hi,$dst.hi" %}
6476 6519
6477 6520 ins_encode %{
6478 6521 Register Rdst = $dst$$Register;
6479 6522 __ movzbl(Rdst, $mem$$Address);
6480 6523 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6481 6524 %}
6482 6525
6483 6526 ins_pipe(ialu_reg_mem);
6484 6527 %}
6485 6528
6486 6529 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
6487 6530 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
6488 6531 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6489 6532 effect(KILL cr);
6490 6533
6491 6534 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
6492 6535 "XOR $dst.hi,$dst.hi\n\t"
6493 6536 "AND $dst.lo,$mask" %}
6494 6537 ins_encode %{
6495 6538 Register Rdst = $dst$$Register;
6496 6539 __ movzbl(Rdst, $mem$$Address);
6497 6540 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6498 6541 __ andl(Rdst, $mask$$constant);
6499 6542 %}
6500 6543 ins_pipe(ialu_reg_mem);
6501 6544 %}
6502 6545
6503 6546 // Load Short (16bit signed)
6504 6547 instruct loadS(eRegI dst, memory mem) %{
6505 6548 match(Set dst (LoadS mem));
6506 6549
6507 6550 ins_cost(125);
6508 6551 format %{ "MOVSX $dst,$mem\t# short" %}
6509 6552
6510 6553 ins_encode %{
6511 6554 __ movswl($dst$$Register, $mem$$Address);
6512 6555 %}
6513 6556
6514 6557 ins_pipe(ialu_reg_mem);
6515 6558 %}
6516 6559
6517 6560 // Load Short (16 bit signed) to Byte (8 bit signed)
6518 6561 instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6519 6562 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6520 6563
6521 6564 ins_cost(125);
6522 6565 format %{ "MOVSX $dst, $mem\t# short -> byte" %}
6523 6566 ins_encode %{
6524 6567 __ movsbl($dst$$Register, $mem$$Address);
6525 6568 %}
6526 6569 ins_pipe(ialu_reg_mem);
6527 6570 %}
6528 6571
6529 6572 // Load Short (16bit signed) into Long Register
6530 6573 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6531 6574 match(Set dst (ConvI2L (LoadS mem)));
6532 6575 effect(KILL cr);
6533 6576
6534 6577 ins_cost(375);
6535 6578 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t"
6536 6579 "MOV $dst.hi,$dst.lo\n\t"
6537 6580 "SAR $dst.hi,15" %}
6538 6581
6539 6582 ins_encode %{
6540 6583 __ movswl($dst$$Register, $mem$$Address);
6541 6584 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6542 6585 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
6543 6586 %}
6544 6587
6545 6588 ins_pipe(ialu_reg_mem);
6546 6589 %}
6547 6590
6548 6591 // Load Unsigned Short/Char (16bit unsigned)
6549 6592 instruct loadUS(eRegI dst, memory mem) %{
6550 6593 match(Set dst (LoadUS mem));
6551 6594
6552 6595 ins_cost(125);
6553 6596 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %}
6554 6597
6555 6598 ins_encode %{
6556 6599 __ movzwl($dst$$Register, $mem$$Address);
6557 6600 %}
6558 6601
6559 6602 ins_pipe(ialu_reg_mem);
6560 6603 %}
6561 6604
6562 6605 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6563 6606 instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6564 6607 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6565 6608
6566 6609 ins_cost(125);
6567 6610 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %}
6568 6611 ins_encode %{
6569 6612 __ movsbl($dst$$Register, $mem$$Address);
6570 6613 %}
6571 6614 ins_pipe(ialu_reg_mem);
6572 6615 %}
6573 6616
6574 6617 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6575 6618 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6576 6619 match(Set dst (ConvI2L (LoadUS mem)));
6577 6620 effect(KILL cr);
6578 6621
6579 6622 ins_cost(250);
6580 6623 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t"
6581 6624 "XOR $dst.hi,$dst.hi" %}
6582 6625
6583 6626 ins_encode %{
6584 6627 __ movzwl($dst$$Register, $mem$$Address);
6585 6628 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6586 6629 %}
6587 6630
6588 6631 ins_pipe(ialu_reg_mem);
6589 6632 %}
6590 6633
6591 6634 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6592 6635 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6593 6636 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6594 6637 effect(KILL cr);
6595 6638
6596 6639 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
6597 6640 "XOR $dst.hi,$dst.hi" %}
6598 6641 ins_encode %{
6599 6642 Register Rdst = $dst$$Register;
6600 6643 __ movzbl(Rdst, $mem$$Address);
6601 6644 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6602 6645 %}
6603 6646 ins_pipe(ialu_reg_mem);
6604 6647 %}
6605 6648
6606 6649 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
6607 6650 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
6608 6651 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6609 6652 effect(KILL cr);
6610 6653
6611 6654 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6612 6655 "XOR $dst.hi,$dst.hi\n\t"
6613 6656 "AND $dst.lo,$mask" %}
6614 6657 ins_encode %{
6615 6658 Register Rdst = $dst$$Register;
6616 6659 __ movzwl(Rdst, $mem$$Address);
6617 6660 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6618 6661 __ andl(Rdst, $mask$$constant);
6619 6662 %}
6620 6663 ins_pipe(ialu_reg_mem);
6621 6664 %}
6622 6665
6623 6666 // Load Integer
6624 6667 instruct loadI(eRegI dst, memory mem) %{
6625 6668 match(Set dst (LoadI mem));
6626 6669
6627 6670 ins_cost(125);
6628 6671 format %{ "MOV $dst,$mem\t# int" %}
6629 6672
6630 6673 ins_encode %{
6631 6674 __ movl($dst$$Register, $mem$$Address);
6632 6675 %}
6633 6676
6634 6677 ins_pipe(ialu_reg_mem);
6635 6678 %}
6636 6679
6637 6680 // Load Integer (32 bit signed) to Byte (8 bit signed)
6638 6681 instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6639 6682 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6640 6683
6641 6684 ins_cost(125);
6642 6685 format %{ "MOVSX $dst, $mem\t# int -> byte" %}
6643 6686 ins_encode %{
6644 6687 __ movsbl($dst$$Register, $mem$$Address);
6645 6688 %}
6646 6689 ins_pipe(ialu_reg_mem);
6647 6690 %}
6648 6691
6649 6692 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6650 6693 instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6651 6694 match(Set dst (AndI (LoadI mem) mask));
6652 6695
6653 6696 ins_cost(125);
6654 6697 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %}
6655 6698 ins_encode %{
6656 6699 __ movzbl($dst$$Register, $mem$$Address);
6657 6700 %}
6658 6701 ins_pipe(ialu_reg_mem);
6659 6702 %}
6660 6703
6661 6704 // Load Integer (32 bit signed) to Short (16 bit signed)
6662 6705 instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6663 6706 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6664 6707
6665 6708 ins_cost(125);
6666 6709 format %{ "MOVSX $dst, $mem\t# int -> short" %}
6667 6710 ins_encode %{
6668 6711 __ movswl($dst$$Register, $mem$$Address);
6669 6712 %}
6670 6713 ins_pipe(ialu_reg_mem);
6671 6714 %}
6672 6715
6673 6716 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6674 6717 instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6675 6718 match(Set dst (AndI (LoadI mem) mask));
6676 6719
6677 6720 ins_cost(125);
6678 6721 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %}
6679 6722 ins_encode %{
6680 6723 __ movzwl($dst$$Register, $mem$$Address);
6681 6724 %}
6682 6725 ins_pipe(ialu_reg_mem);
6683 6726 %}
6684 6727
6685 6728 // Load Integer into Long Register
6686 6729 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6687 6730 match(Set dst (ConvI2L (LoadI mem)));
6688 6731 effect(KILL cr);
6689 6732
6690 6733 ins_cost(375);
6691 6734 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t"
6692 6735 "MOV $dst.hi,$dst.lo\n\t"
6693 6736 "SAR $dst.hi,31" %}
6694 6737
6695 6738 ins_encode %{
6696 6739 __ movl($dst$$Register, $mem$$Address);
6697 6740 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6698 6741 __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6699 6742 %}
6700 6743
6701 6744 ins_pipe(ialu_reg_mem);
6702 6745 %}
6703 6746
6704 6747 // Load Integer with mask 0xFF into Long Register
6705 6748 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6706 6749 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6707 6750 effect(KILL cr);
6708 6751
6709 6752 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6710 6753 "XOR $dst.hi,$dst.hi" %}
6711 6754 ins_encode %{
6712 6755 Register Rdst = $dst$$Register;
6713 6756 __ movzbl(Rdst, $mem$$Address);
6714 6757 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6715 6758 %}
6716 6759 ins_pipe(ialu_reg_mem);
6717 6760 %}
6718 6761
6719 6762 // Load Integer with mask 0xFFFF into Long Register
6720 6763 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6721 6764 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6722 6765 effect(KILL cr);
6723 6766
6724 6767 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6725 6768 "XOR $dst.hi,$dst.hi" %}
6726 6769 ins_encode %{
6727 6770 Register Rdst = $dst$$Register;
6728 6771 __ movzwl(Rdst, $mem$$Address);
6729 6772 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6730 6773 %}
6731 6774 ins_pipe(ialu_reg_mem);
6732 6775 %}
6733 6776
6734 6777 // Load Integer with 32-bit mask into Long Register
6735 6778 instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6736 6779 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6737 6780 effect(KILL cr);
6738 6781
6739 6782 format %{ "MOV $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6740 6783 "XOR $dst.hi,$dst.hi\n\t"
6741 6784 "AND $dst.lo,$mask" %}
6742 6785 ins_encode %{
6743 6786 Register Rdst = $dst$$Register;
6744 6787 __ movl(Rdst, $mem$$Address);
6745 6788 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6746 6789 __ andl(Rdst, $mask$$constant);
6747 6790 %}
6748 6791 ins_pipe(ialu_reg_mem);
6749 6792 %}
6750 6793
6751 6794 // Load Unsigned Integer into Long Register
6752 6795 instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6753 6796 match(Set dst (LoadUI2L mem));
6754 6797 effect(KILL cr);
6755 6798
6756 6799 ins_cost(250);
6757 6800 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t"
6758 6801 "XOR $dst.hi,$dst.hi" %}
6759 6802
6760 6803 ins_encode %{
6761 6804 __ movl($dst$$Register, $mem$$Address);
6762 6805 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6763 6806 %}
6764 6807
6765 6808 ins_pipe(ialu_reg_mem);
6766 6809 %}
6767 6810
6768 6811 // Load Long. Cannot clobber address while loading, so restrict address
6769 6812 // register to ESI
6770 6813 instruct loadL(eRegL dst, load_long_memory mem) %{
6771 6814 predicate(!((LoadLNode*)n)->require_atomic_access());
6772 6815 match(Set dst (LoadL mem));
6773 6816
6774 6817 ins_cost(250);
6775 6818 format %{ "MOV $dst.lo,$mem\t# long\n\t"
6776 6819 "MOV $dst.hi,$mem+4" %}
6777 6820
6778 6821 ins_encode %{
6779 6822 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6780 6823 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6781 6824 __ movl($dst$$Register, Amemlo);
6782 6825 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6783 6826 %}
6784 6827
6785 6828 ins_pipe(ialu_reg_long_mem);
6786 6829 %}
6787 6830
6788 6831 // Volatile Load Long. Must be atomic, so do 64-bit FILD
6789 6832 // then store it down to the stack and reload on the int
6790 6833 // side.
6791 6834 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6792 6835 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6793 6836 match(Set dst (LoadL mem));
6794 6837
6795 6838 ins_cost(200);
6796 6839 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
6797 6840 "FISTp $dst" %}
6798 6841 ins_encode(enc_loadL_volatile(mem,dst));
6799 6842 ins_pipe( fpu_reg_mem );
6800 6843 %}
6801 6844
6802 6845 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6803 6846 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6804 6847 match(Set dst (LoadL mem));
6805 6848 effect(TEMP tmp);
6806 6849 ins_cost(180);
6807 6850 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6808 6851 "MOVSD $dst,$tmp" %}
6809 6852 ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6810 6853 ins_pipe( pipe_slow );
6811 6854 %}
6812 6855
6813 6856 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6814 6857 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6815 6858 match(Set dst (LoadL mem));
6816 6859 effect(TEMP tmp);
6817 6860 ins_cost(160);
6818 6861 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6819 6862 "MOVD $dst.lo,$tmp\n\t"
6820 6863 "PSRLQ $tmp,32\n\t"
6821 6864 "MOVD $dst.hi,$tmp" %}
6822 6865 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6823 6866 ins_pipe( pipe_slow );
6824 6867 %}
6825 6868
6826 6869 // Load Range
6827 6870 instruct loadRange(eRegI dst, memory mem) %{
6828 6871 match(Set dst (LoadRange mem));
6829 6872
6830 6873 ins_cost(125);
6831 6874 format %{ "MOV $dst,$mem" %}
6832 6875 opcode(0x8B);
6833 6876 ins_encode( OpcP, RegMem(dst,mem));
6834 6877 ins_pipe( ialu_reg_mem );
6835 6878 %}
6836 6879
6837 6880
6838 6881 // Load Pointer
6839 6882 instruct loadP(eRegP dst, memory mem) %{
6840 6883 match(Set dst (LoadP mem));
6841 6884
6842 6885 ins_cost(125);
6843 6886 format %{ "MOV $dst,$mem" %}
6844 6887 opcode(0x8B);
6845 6888 ins_encode( OpcP, RegMem(dst,mem));
6846 6889 ins_pipe( ialu_reg_mem );
6847 6890 %}
6848 6891
6849 6892 // Load Klass Pointer
6850 6893 instruct loadKlass(eRegP dst, memory mem) %{
6851 6894 match(Set dst (LoadKlass mem));
6852 6895
6853 6896 ins_cost(125);
6854 6897 format %{ "MOV $dst,$mem" %}
6855 6898 opcode(0x8B);
6856 6899 ins_encode( OpcP, RegMem(dst,mem));
6857 6900 ins_pipe( ialu_reg_mem );
6858 6901 %}
6859 6902
6860 6903 // Load Double
6861 6904 instruct loadD(regD dst, memory mem) %{
6862 6905 predicate(UseSSE<=1);
6863 6906 match(Set dst (LoadD mem));
6864 6907
6865 6908 ins_cost(150);
6866 6909 format %{ "FLD_D ST,$mem\n\t"
6867 6910 "FSTP $dst" %}
6868 6911 opcode(0xDD); /* DD /0 */
6869 6912 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6870 6913 Pop_Reg_D(dst) );
6871 6914 ins_pipe( fpu_reg_mem );
6872 6915 %}
6873 6916
6874 6917 // Load Double to XMM
6875 6918 instruct loadXD(regXD dst, memory mem) %{
6876 6919 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6877 6920 match(Set dst (LoadD mem));
6878 6921 ins_cost(145);
6879 6922 format %{ "MOVSD $dst,$mem" %}
6880 6923 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6881 6924 ins_pipe( pipe_slow );
6882 6925 %}
6883 6926
6884 6927 instruct loadXD_partial(regXD dst, memory mem) %{
6885 6928 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6886 6929 match(Set dst (LoadD mem));
6887 6930 ins_cost(145);
6888 6931 format %{ "MOVLPD $dst,$mem" %}
6889 6932 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
6890 6933 ins_pipe( pipe_slow );
6891 6934 %}
6892 6935
6893 6936 // Load to XMM register (single-precision floating point)
6894 6937 // MOVSS instruction
6895 6938 instruct loadX(regX dst, memory mem) %{
6896 6939 predicate(UseSSE>=1);
6897 6940 match(Set dst (LoadF mem));
6898 6941 ins_cost(145);
6899 6942 format %{ "MOVSS $dst,$mem" %}
6900 6943 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6901 6944 ins_pipe( pipe_slow );
6902 6945 %}
6903 6946
6904 6947 // Load Float
6905 6948 instruct loadF(regF dst, memory mem) %{
6906 6949 predicate(UseSSE==0);
6907 6950 match(Set dst (LoadF mem));
6908 6951
6909 6952 ins_cost(150);
6910 6953 format %{ "FLD_S ST,$mem\n\t"
6911 6954 "FSTP $dst" %}
6912 6955 opcode(0xD9); /* D9 /0 */
6913 6956 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6914 6957 Pop_Reg_F(dst) );
6915 6958 ins_pipe( fpu_reg_mem );
6916 6959 %}
6917 6960
6918 6961 // Load Aligned Packed Byte to XMM register
6919 6962 instruct loadA8B(regXD dst, memory mem) %{
6920 6963 predicate(UseSSE>=1);
6921 6964 match(Set dst (Load8B mem));
6922 6965 ins_cost(125);
6923 6966 format %{ "MOVQ $dst,$mem\t! packed8B" %}
6924 6967 ins_encode( movq_ld(dst, mem));
6925 6968 ins_pipe( pipe_slow );
6926 6969 %}
6927 6970
6928 6971 // Load Aligned Packed Short to XMM register
6929 6972 instruct loadA4S(regXD dst, memory mem) %{
6930 6973 predicate(UseSSE>=1);
6931 6974 match(Set dst (Load4S mem));
6932 6975 ins_cost(125);
6933 6976 format %{ "MOVQ $dst,$mem\t! packed4S" %}
6934 6977 ins_encode( movq_ld(dst, mem));
6935 6978 ins_pipe( pipe_slow );
6936 6979 %}
6937 6980
6938 6981 // Load Aligned Packed Char to XMM register
6939 6982 instruct loadA4C(regXD dst, memory mem) %{
6940 6983 predicate(UseSSE>=1);
6941 6984 match(Set dst (Load4C mem));
6942 6985 ins_cost(125);
6943 6986 format %{ "MOVQ $dst,$mem\t! packed4C" %}
6944 6987 ins_encode( movq_ld(dst, mem));
6945 6988 ins_pipe( pipe_slow );
6946 6989 %}
6947 6990
6948 6991 // Load Aligned Packed Integer to XMM register
6949 6992 instruct load2IU(regXD dst, memory mem) %{
6950 6993 predicate(UseSSE>=1);
6951 6994 match(Set dst (Load2I mem));
6952 6995 ins_cost(125);
6953 6996 format %{ "MOVQ $dst,$mem\t! packed2I" %}
6954 6997 ins_encode( movq_ld(dst, mem));
6955 6998 ins_pipe( pipe_slow );
6956 6999 %}
6957 7000
6958 7001 // Load Aligned Packed Single to XMM
6959 7002 instruct loadA2F(regXD dst, memory mem) %{
6960 7003 predicate(UseSSE>=1);
6961 7004 match(Set dst (Load2F mem));
6962 7005 ins_cost(145);
6963 7006 format %{ "MOVQ $dst,$mem\t! packed2F" %}
6964 7007 ins_encode( movq_ld(dst, mem));
6965 7008 ins_pipe( pipe_slow );
6966 7009 %}
6967 7010
6968 7011 // Load Effective Address
6969 7012 instruct leaP8(eRegP dst, indOffset8 mem) %{
6970 7013 match(Set dst mem);
6971 7014
6972 7015 ins_cost(110);
6973 7016 format %{ "LEA $dst,$mem" %}
6974 7017 opcode(0x8D);
6975 7018 ins_encode( OpcP, RegMem(dst,mem));
6976 7019 ins_pipe( ialu_reg_reg_fat );
6977 7020 %}
6978 7021
6979 7022 instruct leaP32(eRegP dst, indOffset32 mem) %{
6980 7023 match(Set dst mem);
6981 7024
6982 7025 ins_cost(110);
6983 7026 format %{ "LEA $dst,$mem" %}
6984 7027 opcode(0x8D);
6985 7028 ins_encode( OpcP, RegMem(dst,mem));
6986 7029 ins_pipe( ialu_reg_reg_fat );
6987 7030 %}
6988 7031
6989 7032 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6990 7033 match(Set dst mem);
6991 7034
6992 7035 ins_cost(110);
6993 7036 format %{ "LEA $dst,$mem" %}
6994 7037 opcode(0x8D);
6995 7038 ins_encode( OpcP, RegMem(dst,mem));
6996 7039 ins_pipe( ialu_reg_reg_fat );
6997 7040 %}
6998 7041
6999 7042 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
7000 7043 match(Set dst mem);
7001 7044
7002 7045 ins_cost(110);
7003 7046 format %{ "LEA $dst,$mem" %}
7004 7047 opcode(0x8D);
7005 7048 ins_encode( OpcP, RegMem(dst,mem));
7006 7049 ins_pipe( ialu_reg_reg_fat );
7007 7050 %}
7008 7051
7009 7052 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
7010 7053 match(Set dst mem);
7011 7054
7012 7055 ins_cost(110);
7013 7056 format %{ "LEA $dst,$mem" %}
7014 7057 opcode(0x8D);
7015 7058 ins_encode( OpcP, RegMem(dst,mem));
7016 7059 ins_pipe( ialu_reg_reg_fat );
7017 7060 %}
7018 7061
7019 7062 // Load Constant
7020 7063 instruct loadConI(eRegI dst, immI src) %{
7021 7064 match(Set dst src);
7022 7065
7023 7066 format %{ "MOV $dst,$src" %}
7024 7067 ins_encode( LdImmI(dst, src) );
7025 7068 ins_pipe( ialu_reg_fat );
7026 7069 %}
7027 7070
7028 7071 // Load Constant zero
7029 7072 instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
7030 7073 match(Set dst src);
7031 7074 effect(KILL cr);
7032 7075
7033 7076 ins_cost(50);
7034 7077 format %{ "XOR $dst,$dst" %}
7035 7078 opcode(0x33); /* + rd */
7036 7079 ins_encode( OpcP, RegReg( dst, dst ) );
7037 7080 ins_pipe( ialu_reg );
7038 7081 %}
7039 7082
7040 7083 instruct loadConP(eRegP dst, immP src) %{
7041 7084 match(Set dst src);
7042 7085
7043 7086 format %{ "MOV $dst,$src" %}
7044 7087 opcode(0xB8); /* + rd */
7045 7088 ins_encode( LdImmP(dst, src) );
7046 7089 ins_pipe( ialu_reg_fat );
7047 7090 %}
7048 7091
7049 7092 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
7050 7093 match(Set dst src);
7051 7094 effect(KILL cr);
7052 7095 ins_cost(200);
7053 7096 format %{ "MOV $dst.lo,$src.lo\n\t"
7054 7097 "MOV $dst.hi,$src.hi" %}
7055 7098 opcode(0xB8);
7056 7099 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
7057 7100 ins_pipe( ialu_reg_long_fat );
7058 7101 %}
7059 7102
7060 7103 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
7061 7104 match(Set dst src);
7062 7105 effect(KILL cr);
7063 7106 ins_cost(150);
7064 7107 format %{ "XOR $dst.lo,$dst.lo\n\t"
7065 7108 "XOR $dst.hi,$dst.hi" %}
7066 7109 opcode(0x33,0x33);
7067 7110 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
7068 7111 ins_pipe( ialu_reg_long );
7069 7112 %}
7070 7113
7071 7114 // The instruction usage is guarded by predicate in operand immF().
7072 7115 instruct loadConF(regF dst, immF src) %{
7073 7116 match(Set dst src);
7074 7117 ins_cost(125);
7075 7118
7076 7119 format %{ "FLD_S ST,$src\n\t"
7077 7120 "FSTP $dst" %}
7078 7121 opcode(0xD9, 0x00); /* D9 /0 */
7079 7122 ins_encode(LdImmF(src), Pop_Reg_F(dst) );
7080 7123 ins_pipe( fpu_reg_con );
7081 7124 %}
7082 7125
7083 7126 // The instruction usage is guarded by predicate in operand immXF().
7084 7127 instruct loadConX(regX dst, immXF con) %{
7085 7128 match(Set dst con);
7086 7129 ins_cost(125);
7087 7130 format %{ "MOVSS $dst,[$con]" %}
7088 7131 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con));
7089 7132 ins_pipe( pipe_slow );
7090 7133 %}
7091 7134
7092 7135 // The instruction usage is guarded by predicate in operand immXF0().
7093 7136 instruct loadConX0(regX dst, immXF0 src) %{
7094 7137 match(Set dst src);
7095 7138 ins_cost(100);
7096 7139 format %{ "XORPS $dst,$dst\t# float 0.0" %}
7097 7140 ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7098 7141 ins_pipe( pipe_slow );
7099 7142 %}
7100 7143
7101 7144 // The instruction usage is guarded by predicate in operand immD().
7102 7145 instruct loadConD(regD dst, immD src) %{
7103 7146 match(Set dst src);
7104 7147 ins_cost(125);
7105 7148
7106 7149 format %{ "FLD_D ST,$src\n\t"
7107 7150 "FSTP $dst" %}
7108 7151 ins_encode(LdImmD(src), Pop_Reg_D(dst) );
7109 7152 ins_pipe( fpu_reg_con );
7110 7153 %}
7111 7154
7112 7155 // The instruction usage is guarded by predicate in operand immXD().
7113 7156 instruct loadConXD(regXD dst, immXD con) %{
7114 7157 match(Set dst con);
7115 7158 ins_cost(125);
7116 7159 format %{ "MOVSD $dst,[$con]" %}
7117 7160 ins_encode(load_conXD(dst, con));
7118 7161 ins_pipe( pipe_slow );
7119 7162 %}
7120 7163
7121 7164 // The instruction usage is guarded by predicate in operand immXD0().
7122 7165 instruct loadConXD0(regXD dst, immXD0 src) %{
7123 7166 match(Set dst src);
7124 7167 ins_cost(100);
7125 7168 format %{ "XORPD $dst,$dst\t# double 0.0" %}
7126 7169 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7127 7170 ins_pipe( pipe_slow );
7128 7171 %}
7129 7172
7130 7173 // Load Stack Slot
7131 7174 instruct loadSSI(eRegI dst, stackSlotI src) %{
7132 7175 match(Set dst src);
7133 7176 ins_cost(125);
7134 7177
7135 7178 format %{ "MOV $dst,$src" %}
7136 7179 opcode(0x8B);
7137 7180 ins_encode( OpcP, RegMem(dst,src));
7138 7181 ins_pipe( ialu_reg_mem );
7139 7182 %}
7140 7183
7141 7184 instruct loadSSL(eRegL dst, stackSlotL src) %{
7142 7185 match(Set dst src);
7143 7186
7144 7187 ins_cost(200);
7145 7188 format %{ "MOV $dst,$src.lo\n\t"
7146 7189 "MOV $dst+4,$src.hi" %}
7147 7190 opcode(0x8B, 0x8B);
7148 7191 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
7149 7192 ins_pipe( ialu_mem_long_reg );
7150 7193 %}
7151 7194
7152 7195 // Load Stack Slot
7153 7196 instruct loadSSP(eRegP dst, stackSlotP src) %{
7154 7197 match(Set dst src);
7155 7198 ins_cost(125);
7156 7199
7157 7200 format %{ "MOV $dst,$src" %}
7158 7201 opcode(0x8B);
7159 7202 ins_encode( OpcP, RegMem(dst,src));
7160 7203 ins_pipe( ialu_reg_mem );
7161 7204 %}
7162 7205
7163 7206 // Load Stack Slot
7164 7207 instruct loadSSF(regF dst, stackSlotF src) %{
7165 7208 match(Set dst src);
7166 7209 ins_cost(125);
7167 7210
7168 7211 format %{ "FLD_S $src\n\t"
7169 7212 "FSTP $dst" %}
7170 7213 opcode(0xD9); /* D9 /0, FLD m32real */
7171 7214 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7172 7215 Pop_Reg_F(dst) );
7173 7216 ins_pipe( fpu_reg_mem );
7174 7217 %}
7175 7218
7176 7219 // Load Stack Slot
7177 7220 instruct loadSSD(regD dst, stackSlotD src) %{
7178 7221 match(Set dst src);
7179 7222 ins_cost(125);
7180 7223
7181 7224 format %{ "FLD_D $src\n\t"
7182 7225 "FSTP $dst" %}
7183 7226 opcode(0xDD); /* DD /0, FLD m64real */
7184 7227 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7185 7228 Pop_Reg_D(dst) );
7186 7229 ins_pipe( fpu_reg_mem );
7187 7230 %}
7188 7231
7189 7232 // Prefetch instructions.
7190 7233 // Must be safe to execute with invalid address (cannot fault).
7191 7234
7192 7235 instruct prefetchr0( memory mem ) %{
7193 7236 predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7194 7237 match(PrefetchRead mem);
7195 7238 ins_cost(0);
7196 7239 size(0);
7197 7240 format %{ "PREFETCHR (non-SSE is empty encoding)" %}
7198 7241 ins_encode();
7199 7242 ins_pipe(empty);
7200 7243 %}
7201 7244
7202 7245 instruct prefetchr( memory mem ) %{
7203 7246 predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3);
7204 7247 match(PrefetchRead mem);
7205 7248 ins_cost(100);
7206 7249
7207 7250 format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
7208 7251 opcode(0x0F, 0x0d); /* Opcode 0F 0d /0 */
7209 7252 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7210 7253 ins_pipe(ialu_mem);
7211 7254 %}
7212 7255
7213 7256 instruct prefetchrNTA( memory mem ) %{
7214 7257 predicate(UseSSE>=1 && ReadPrefetchInstr==0);
7215 7258 match(PrefetchRead mem);
7216 7259 ins_cost(100);
7217 7260
7218 7261 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
7219 7262 opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */
7220 7263 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7221 7264 ins_pipe(ialu_mem);
7222 7265 %}
7223 7266
7224 7267 instruct prefetchrT0( memory mem ) %{
7225 7268 predicate(UseSSE>=1 && ReadPrefetchInstr==1);
7226 7269 match(PrefetchRead mem);
7227 7270 ins_cost(100);
7228 7271
7229 7272 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
7230 7273 opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
7231 7274 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7232 7275 ins_pipe(ialu_mem);
7233 7276 %}
7234 7277
7235 7278 instruct prefetchrT2( memory mem ) %{
7236 7279 predicate(UseSSE>=1 && ReadPrefetchInstr==2);
7237 7280 match(PrefetchRead mem);
7238 7281 ins_cost(100);
7239 7282
7240 7283 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
7241 7284 opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
7242 7285 ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7243 7286 ins_pipe(ialu_mem);
7244 7287 %}
7245 7288
7246 7289 instruct prefetchw0( memory mem ) %{
7247 7290 predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7248 7291 match(PrefetchWrite mem);
7249 7292 ins_cost(0);
7250 7293 size(0);
7251 7294 format %{ "Prefetch (non-SSE is empty encoding)" %}
7252 7295 ins_encode();
7253 7296 ins_pipe(empty);
7254 7297 %}
7255 7298
7256 7299 instruct prefetchw( memory mem ) %{
7257 7300 predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3);
7258 7301 match( PrefetchWrite mem );
7259 7302 ins_cost(100);
7260 7303
7261 7304 format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
7262 7305 opcode(0x0F, 0x0D); /* Opcode 0F 0D /1 */
7263 7306 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7264 7307 ins_pipe(ialu_mem);
7265 7308 %}
7266 7309
7267 7310 instruct prefetchwNTA( memory mem ) %{
7268 7311 predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
7269 7312 match(PrefetchWrite mem);
7270 7313 ins_cost(100);
7271 7314
7272 7315 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
7273 7316 opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */
7274 7317 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7275 7318 ins_pipe(ialu_mem);
7276 7319 %}
7277 7320
7278 7321 instruct prefetchwT0( memory mem ) %{
7279 7322 predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
7280 7323 match(PrefetchWrite mem);
7281 7324 ins_cost(100);
7282 7325
7283 7326 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
7284 7327 opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
7285 7328 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7286 7329 ins_pipe(ialu_mem);
7287 7330 %}
7288 7331
7289 7332 instruct prefetchwT2( memory mem ) %{
7290 7333 predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
7291 7334 match(PrefetchWrite mem);
7292 7335 ins_cost(100);
7293 7336
7294 7337 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
7295 7338 opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
7296 7339 ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7297 7340 ins_pipe(ialu_mem);
7298 7341 %}
7299 7342
7300 7343 //----------Store Instructions-------------------------------------------------
7301 7344
7302 7345 // Store Byte
7303 7346 instruct storeB(memory mem, xRegI src) %{
7304 7347 match(Set mem (StoreB mem src));
7305 7348
7306 7349 ins_cost(125);
7307 7350 format %{ "MOV8 $mem,$src" %}
7308 7351 opcode(0x88);
7309 7352 ins_encode( OpcP, RegMem( src, mem ) );
7310 7353 ins_pipe( ialu_mem_reg );
7311 7354 %}
7312 7355
7313 7356 // Store Char/Short
7314 7357 instruct storeC(memory mem, eRegI src) %{
7315 7358 match(Set mem (StoreC mem src));
7316 7359
7317 7360 ins_cost(125);
7318 7361 format %{ "MOV16 $mem,$src" %}
7319 7362 opcode(0x89, 0x66);
7320 7363 ins_encode( OpcS, OpcP, RegMem( src, mem ) );
7321 7364 ins_pipe( ialu_mem_reg );
7322 7365 %}
7323 7366
7324 7367 // Store Integer
7325 7368 instruct storeI(memory mem, eRegI src) %{
7326 7369 match(Set mem (StoreI mem src));
7327 7370
7328 7371 ins_cost(125);
7329 7372 format %{ "MOV $mem,$src" %}
7330 7373 opcode(0x89);
7331 7374 ins_encode( OpcP, RegMem( src, mem ) );
7332 7375 ins_pipe( ialu_mem_reg );
7333 7376 %}
7334 7377
7335 7378 // Store Long
7336 7379 instruct storeL(long_memory mem, eRegL src) %{
7337 7380 predicate(!((StoreLNode*)n)->require_atomic_access());
7338 7381 match(Set mem (StoreL mem src));
7339 7382
7340 7383 ins_cost(200);
7341 7384 format %{ "MOV $mem,$src.lo\n\t"
7342 7385 "MOV $mem+4,$src.hi" %}
7343 7386 opcode(0x89, 0x89);
7344 7387 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
7345 7388 ins_pipe( ialu_mem_long_reg );
7346 7389 %}
7347 7390
7348 7391 // Store Long to Integer
7349 7392 instruct storeL2I(memory mem, eRegL src) %{
7350 7393 match(Set mem (StoreI mem (ConvL2I src)));
7351 7394
7352 7395 format %{ "MOV $mem,$src.lo\t# long -> int" %}
7353 7396 ins_encode %{
7354 7397 __ movl($mem$$Address, $src$$Register);
7355 7398 %}
7356 7399 ins_pipe(ialu_mem_reg);
7357 7400 %}
7358 7401
7359 7402 // Volatile Store Long. Must be atomic, so move it into
7360 7403 // the FP TOS and then do a 64-bit FIST. Has to probe the
7361 7404 // target address before the store (for null-ptr checks)
7362 7405 // so the memory operand is used twice in the encoding.
7363 7406 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7364 7407 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7365 7408 match(Set mem (StoreL mem src));
7366 7409 effect( KILL cr );
7367 7410 ins_cost(400);
7368 7411 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7369 7412 "FILD $src\n\t"
7370 7413 "FISTp $mem\t # 64-bit atomic volatile long store" %}
7371 7414 opcode(0x3B);
7372 7415 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7373 7416 ins_pipe( fpu_reg_mem );
7374 7417 %}
7375 7418
7376 7419 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7377 7420 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7378 7421 match(Set mem (StoreL mem src));
7379 7422 effect( TEMP tmp, KILL cr );
7380 7423 ins_cost(380);
7381 7424 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7382 7425 "MOVSD $tmp,$src\n\t"
7383 7426 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7384 7427 opcode(0x3B);
7385 7428 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7386 7429 ins_pipe( pipe_slow );
7387 7430 %}
7388 7431
7389 7432 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7390 7433 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7391 7434 match(Set mem (StoreL mem src));
7392 7435 effect( TEMP tmp2 , TEMP tmp, KILL cr );
7393 7436 ins_cost(360);
7394 7437 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7395 7438 "MOVD $tmp,$src.lo\n\t"
7396 7439 "MOVD $tmp2,$src.hi\n\t"
7397 7440 "PUNPCKLDQ $tmp,$tmp2\n\t"
7398 7441 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7399 7442 opcode(0x3B);
7400 7443 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7401 7444 ins_pipe( pipe_slow );
7402 7445 %}
7403 7446
7404 7447 // Store Pointer; for storing unknown oops and raw pointers
7405 7448 instruct storeP(memory mem, anyRegP src) %{
7406 7449 match(Set mem (StoreP mem src));
7407 7450
7408 7451 ins_cost(125);
7409 7452 format %{ "MOV $mem,$src" %}
7410 7453 opcode(0x89);
7411 7454 ins_encode( OpcP, RegMem( src, mem ) );
7412 7455 ins_pipe( ialu_mem_reg );
7413 7456 %}
7414 7457
7415 7458 // Store Integer Immediate
7416 7459 instruct storeImmI(memory mem, immI src) %{
7417 7460 match(Set mem (StoreI mem src));
7418 7461
7419 7462 ins_cost(150);
7420 7463 format %{ "MOV $mem,$src" %}
7421 7464 opcode(0xC7); /* C7 /0 */
7422 7465 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
7423 7466 ins_pipe( ialu_mem_imm );
7424 7467 %}
7425 7468
7426 7469 // Store Short/Char Immediate
7427 7470 instruct storeImmI16(memory mem, immI16 src) %{
7428 7471 predicate(UseStoreImmI16);
7429 7472 match(Set mem (StoreC mem src));
7430 7473
7431 7474 ins_cost(150);
7432 7475 format %{ "MOV16 $mem,$src" %}
7433 7476 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7434 7477 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src ));
7435 7478 ins_pipe( ialu_mem_imm );
7436 7479 %}
7437 7480
7438 7481 // Store Pointer Immediate; null pointers or constant oops that do not
7439 7482 // need card-mark barriers.
7440 7483 instruct storeImmP(memory mem, immP src) %{
7441 7484 match(Set mem (StoreP mem src));
7442 7485
7443 7486 ins_cost(150);
7444 7487 format %{ "MOV $mem,$src" %}
7445 7488 opcode(0xC7); /* C7 /0 */
7446 7489 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
7447 7490 ins_pipe( ialu_mem_imm );
7448 7491 %}
7449 7492
7450 7493 // Store Byte Immediate
7451 7494 instruct storeImmB(memory mem, immI8 src) %{
7452 7495 match(Set mem (StoreB mem src));
7453 7496
7454 7497 ins_cost(150);
7455 7498 format %{ "MOV8 $mem,$src" %}
7456 7499 opcode(0xC6); /* C6 /0 */
7457 7500 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7458 7501 ins_pipe( ialu_mem_imm );
7459 7502 %}
7460 7503
7461 7504 // Store Aligned Packed Byte XMM register to memory
7462 7505 instruct storeA8B(memory mem, regXD src) %{
7463 7506 predicate(UseSSE>=1);
7464 7507 match(Set mem (Store8B mem src));
7465 7508 ins_cost(145);
7466 7509 format %{ "MOVQ $mem,$src\t! packed8B" %}
7467 7510 ins_encode( movq_st(mem, src));
7468 7511 ins_pipe( pipe_slow );
7469 7512 %}
7470 7513
7471 7514 // Store Aligned Packed Char/Short XMM register to memory
7472 7515 instruct storeA4C(memory mem, regXD src) %{
7473 7516 predicate(UseSSE>=1);
7474 7517 match(Set mem (Store4C mem src));
7475 7518 ins_cost(145);
7476 7519 format %{ "MOVQ $mem,$src\t! packed4C" %}
7477 7520 ins_encode( movq_st(mem, src));
7478 7521 ins_pipe( pipe_slow );
7479 7522 %}
7480 7523
7481 7524 // Store Aligned Packed Integer XMM register to memory
7482 7525 instruct storeA2I(memory mem, regXD src) %{
7483 7526 predicate(UseSSE>=1);
7484 7527 match(Set mem (Store2I mem src));
7485 7528 ins_cost(145);
7486 7529 format %{ "MOVQ $mem,$src\t! packed2I" %}
7487 7530 ins_encode( movq_st(mem, src));
7488 7531 ins_pipe( pipe_slow );
7489 7532 %}
7490 7533
7491 7534 // Store CMS card-mark Immediate
7492 7535 instruct storeImmCM(memory mem, immI8 src) %{
7493 7536 match(Set mem (StoreCM mem src));
7494 7537
7495 7538 ins_cost(150);
7496 7539 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
7497 7540 opcode(0xC6); /* C6 /0 */
7498 7541 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7499 7542 ins_pipe( ialu_mem_imm );
7500 7543 %}
7501 7544
7502 7545 // Store Double
7503 7546 instruct storeD( memory mem, regDPR1 src) %{
7504 7547 predicate(UseSSE<=1);
7505 7548 match(Set mem (StoreD mem src));
7506 7549
7507 7550 ins_cost(100);
7508 7551 format %{ "FST_D $mem,$src" %}
7509 7552 opcode(0xDD); /* DD /2 */
7510 7553 ins_encode( enc_FP_store(mem,src) );
7511 7554 ins_pipe( fpu_mem_reg );
7512 7555 %}
7513 7556
7514 7557 // Store double does rounding on x86
7515 7558 instruct storeD_rounded( memory mem, regDPR1 src) %{
7516 7559 predicate(UseSSE<=1);
7517 7560 match(Set mem (StoreD mem (RoundDouble src)));
7518 7561
7519 7562 ins_cost(100);
7520 7563 format %{ "FST_D $mem,$src\t# round" %}
7521 7564 opcode(0xDD); /* DD /2 */
7522 7565 ins_encode( enc_FP_store(mem,src) );
7523 7566 ins_pipe( fpu_mem_reg );
7524 7567 %}
7525 7568
7526 7569 // Store XMM register to memory (double-precision floating points)
7527 7570 // MOVSD instruction
7528 7571 instruct storeXD(memory mem, regXD src) %{
7529 7572 predicate(UseSSE>=2);
7530 7573 match(Set mem (StoreD mem src));
7531 7574 ins_cost(95);
7532 7575 format %{ "MOVSD $mem,$src" %}
7533 7576 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7534 7577 ins_pipe( pipe_slow );
7535 7578 %}
7536 7579
7537 7580 // Store XMM register to memory (single-precision floating point)
7538 7581 // MOVSS instruction
7539 7582 instruct storeX(memory mem, regX src) %{
7540 7583 predicate(UseSSE>=1);
7541 7584 match(Set mem (StoreF mem src));
7542 7585 ins_cost(95);
7543 7586 format %{ "MOVSS $mem,$src" %}
7544 7587 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7545 7588 ins_pipe( pipe_slow );
7546 7589 %}
7547 7590
7548 7591 // Store Aligned Packed Single Float XMM register to memory
7549 7592 instruct storeA2F(memory mem, regXD src) %{
7550 7593 predicate(UseSSE>=1);
7551 7594 match(Set mem (Store2F mem src));
7552 7595 ins_cost(145);
7553 7596 format %{ "MOVQ $mem,$src\t! packed2F" %}
7554 7597 ins_encode( movq_st(mem, src));
7555 7598 ins_pipe( pipe_slow );
7556 7599 %}
7557 7600
7558 7601 // Store Float
7559 7602 instruct storeF( memory mem, regFPR1 src) %{
7560 7603 predicate(UseSSE==0);
7561 7604 match(Set mem (StoreF mem src));
7562 7605
7563 7606 ins_cost(100);
7564 7607 format %{ "FST_S $mem,$src" %}
7565 7608 opcode(0xD9); /* D9 /2 */
7566 7609 ins_encode( enc_FP_store(mem,src) );
7567 7610 ins_pipe( fpu_mem_reg );
7568 7611 %}
7569 7612
7570 7613 // Store Float does rounding on x86
7571 7614 instruct storeF_rounded( memory mem, regFPR1 src) %{
7572 7615 predicate(UseSSE==0);
7573 7616 match(Set mem (StoreF mem (RoundFloat src)));
7574 7617
7575 7618 ins_cost(100);
7576 7619 format %{ "FST_S $mem,$src\t# round" %}
7577 7620 opcode(0xD9); /* D9 /2 */
7578 7621 ins_encode( enc_FP_store(mem,src) );
7579 7622 ins_pipe( fpu_mem_reg );
7580 7623 %}
7581 7624
7582 7625 // Store Float does rounding on x86
7583 7626 instruct storeF_Drounded( memory mem, regDPR1 src) %{
7584 7627 predicate(UseSSE<=1);
7585 7628 match(Set mem (StoreF mem (ConvD2F src)));
7586 7629
7587 7630 ins_cost(100);
7588 7631 format %{ "FST_S $mem,$src\t# D-round" %}
7589 7632 opcode(0xD9); /* D9 /2 */
7590 7633 ins_encode( enc_FP_store(mem,src) );
7591 7634 ins_pipe( fpu_mem_reg );
7592 7635 %}
7593 7636
7594 7637 // Store immediate Float value (it is faster than store from FPU register)
7595 7638 // The instruction usage is guarded by predicate in operand immF().
7596 7639 instruct storeF_imm( memory mem, immF src) %{
7597 7640 match(Set mem (StoreF mem src));
7598 7641
7599 7642 ins_cost(50);
7600 7643 format %{ "MOV $mem,$src\t# store float" %}
7601 7644 opcode(0xC7); /* C7 /0 */
7602 7645 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
7603 7646 ins_pipe( ialu_mem_imm );
7604 7647 %}
7605 7648
7606 7649 // Store immediate Float value (it is faster than store from XMM register)
7607 7650 // The instruction usage is guarded by predicate in operand immXF().
7608 7651 instruct storeX_imm( memory mem, immXF src) %{
7609 7652 match(Set mem (StoreF mem src));
7610 7653
7611 7654 ins_cost(50);
7612 7655 format %{ "MOV $mem,$src\t# store float" %}
7613 7656 opcode(0xC7); /* C7 /0 */
7614 7657 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src ));
7615 7658 ins_pipe( ialu_mem_imm );
7616 7659 %}
7617 7660
7618 7661 // Store Integer to stack slot
7619 7662 instruct storeSSI(stackSlotI dst, eRegI src) %{
7620 7663 match(Set dst src);
7621 7664
7622 7665 ins_cost(100);
7623 7666 format %{ "MOV $dst,$src" %}
7624 7667 opcode(0x89);
7625 7668 ins_encode( OpcPRegSS( dst, src ) );
7626 7669 ins_pipe( ialu_mem_reg );
7627 7670 %}
7628 7671
7629 7672 // Store Integer to stack slot
7630 7673 instruct storeSSP(stackSlotP dst, eRegP src) %{
7631 7674 match(Set dst src);
7632 7675
7633 7676 ins_cost(100);
7634 7677 format %{ "MOV $dst,$src" %}
7635 7678 opcode(0x89);
7636 7679 ins_encode( OpcPRegSS( dst, src ) );
7637 7680 ins_pipe( ialu_mem_reg );
7638 7681 %}
7639 7682
7640 7683 // Store Long to stack slot
7641 7684 instruct storeSSL(stackSlotL dst, eRegL src) %{
7642 7685 match(Set dst src);
7643 7686
7644 7687 ins_cost(200);
7645 7688 format %{ "MOV $dst,$src.lo\n\t"
7646 7689 "MOV $dst+4,$src.hi" %}
7647 7690 opcode(0x89, 0x89);
7648 7691 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7649 7692 ins_pipe( ialu_mem_long_reg );
7650 7693 %}
7651 7694
7652 7695 //----------MemBar Instructions-----------------------------------------------
7653 7696 // Memory barrier flavors
7654 7697
7655 7698 instruct membar_acquire() %{
7656 7699 match(MemBarAcquire);
7657 7700 ins_cost(400);
7658 7701
7659 7702 size(0);
7660 7703 format %{ "MEMBAR-acquire ! (empty encoding)" %}
7661 7704 ins_encode();
7662 7705 ins_pipe(empty);
7663 7706 %}
7664 7707
7665 7708 instruct membar_acquire_lock() %{
7666 7709 match(MemBarAcquire);
7667 7710 predicate(Matcher::prior_fast_lock(n));
7668 7711 ins_cost(0);
7669 7712
7670 7713 size(0);
7671 7714 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7672 7715 ins_encode( );
7673 7716 ins_pipe(empty);
7674 7717 %}
7675 7718
7676 7719 instruct membar_release() %{
7677 7720 match(MemBarRelease);
7678 7721 ins_cost(400);
7679 7722
7680 7723 size(0);
7681 7724 format %{ "MEMBAR-release ! (empty encoding)" %}
7682 7725 ins_encode( );
7683 7726 ins_pipe(empty);
7684 7727 %}
7685 7728
7686 7729 instruct membar_release_lock() %{
7687 7730 match(MemBarRelease);
7688 7731 predicate(Matcher::post_fast_unlock(n));
7689 7732 ins_cost(0);
7690 7733
7691 7734 size(0);
7692 7735 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7693 7736 ins_encode( );
7694 7737 ins_pipe(empty);
7695 7738 %}
7696 7739
7697 7740 instruct membar_volatile(eFlagsReg cr) %{
7698 7741 match(MemBarVolatile);
7699 7742 effect(KILL cr);
7700 7743 ins_cost(400);
7701 7744
7702 7745 format %{
7703 7746 $$template
7704 7747 if (os::is_MP()) {
7705 7748 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7706 7749 } else {
7707 7750 $$emit$$"MEMBAR-volatile ! (empty encoding)"
7708 7751 }
7709 7752 %}
7710 7753 ins_encode %{
7711 7754 __ membar(Assembler::StoreLoad);
7712 7755 %}
7713 7756 ins_pipe(pipe_slow);
7714 7757 %}
7715 7758
7716 7759 instruct unnecessary_membar_volatile() %{
7717 7760 match(MemBarVolatile);
7718 7761 predicate(Matcher::post_store_load_barrier(n));
7719 7762 ins_cost(0);
7720 7763
7721 7764 size(0);
7722 7765 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7723 7766 ins_encode( );
7724 7767 ins_pipe(empty);
7725 7768 %}
7726 7769
7727 7770 //----------Move Instructions--------------------------------------------------
7728 7771 instruct castX2P(eAXRegP dst, eAXRegI src) %{
7729 7772 match(Set dst (CastX2P src));
7730 7773 format %{ "# X2P $dst, $src" %}
7731 7774 ins_encode( /*empty encoding*/ );
7732 7775 ins_cost(0);
7733 7776 ins_pipe(empty);
7734 7777 %}
7735 7778
7736 7779 instruct castP2X(eRegI dst, eRegP src ) %{
7737 7780 match(Set dst (CastP2X src));
7738 7781 ins_cost(50);
7739 7782 format %{ "MOV $dst, $src\t# CastP2X" %}
7740 7783 ins_encode( enc_Copy( dst, src) );
7741 7784 ins_pipe( ialu_reg_reg );
7742 7785 %}
7743 7786
7744 7787 //----------Conditional Move---------------------------------------------------
7745 7788 // Conditional move
7746 7789 instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7747 7790 predicate(VM_Version::supports_cmov() );
7748 7791 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7749 7792 ins_cost(200);
7750 7793 format %{ "CMOV$cop $dst,$src" %}
7751 7794 opcode(0x0F,0x40);
7752 7795 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7753 7796 ins_pipe( pipe_cmov_reg );
7754 7797 %}
7755 7798
7756 7799 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7757 7800 predicate(VM_Version::supports_cmov() );
7758 7801 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7759 7802 ins_cost(200);
7760 7803 format %{ "CMOV$cop $dst,$src" %}
7761 7804 opcode(0x0F,0x40);
7762 7805 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7763 7806 ins_pipe( pipe_cmov_reg );
7764 7807 %}
7765 7808
7766 7809 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7767 7810 predicate(VM_Version::supports_cmov() );
7768 7811 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7769 7812 ins_cost(200);
7770 7813 expand %{
7771 7814 cmovI_regU(cop, cr, dst, src);
7772 7815 %}
7773 7816 %}
7774 7817
7775 7818 // Conditional move
7776 7819 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7777 7820 predicate(VM_Version::supports_cmov() );
7778 7821 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7779 7822 ins_cost(250);
7780 7823 format %{ "CMOV$cop $dst,$src" %}
7781 7824 opcode(0x0F,0x40);
7782 7825 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7783 7826 ins_pipe( pipe_cmov_mem );
7784 7827 %}
7785 7828
7786 7829 // Conditional move
7787 7830 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7788 7831 predicate(VM_Version::supports_cmov() );
7789 7832 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7790 7833 ins_cost(250);
7791 7834 format %{ "CMOV$cop $dst,$src" %}
7792 7835 opcode(0x0F,0x40);
7793 7836 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7794 7837 ins_pipe( pipe_cmov_mem );
7795 7838 %}
7796 7839
7797 7840 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7798 7841 predicate(VM_Version::supports_cmov() );
7799 7842 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7800 7843 ins_cost(250);
7801 7844 expand %{
7802 7845 cmovI_memU(cop, cr, dst, src);
7803 7846 %}
7804 7847 %}
7805 7848
7806 7849 // Conditional move
7807 7850 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7808 7851 predicate(VM_Version::supports_cmov() );
7809 7852 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7810 7853 ins_cost(200);
7811 7854 format %{ "CMOV$cop $dst,$src\t# ptr" %}
7812 7855 opcode(0x0F,0x40);
7813 7856 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7814 7857 ins_pipe( pipe_cmov_reg );
7815 7858 %}
7816 7859
7817 7860 // Conditional move (non-P6 version)
7818 7861 // Note: a CMoveP is generated for stubs and native wrappers
7819 7862 // regardless of whether we are on a P6, so we
7820 7863 // emulate a cmov here
7821 7864 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7822 7865 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7823 7866 ins_cost(300);
7824 7867 format %{ "Jn$cop skip\n\t"
7825 7868 "MOV $dst,$src\t# pointer\n"
7826 7869 "skip:" %}
7827 7870 opcode(0x8b);
7828 7871 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7829 7872 ins_pipe( pipe_cmov_reg );
7830 7873 %}
7831 7874
7832 7875 // Conditional move
7833 7876 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7834 7877 predicate(VM_Version::supports_cmov() );
7835 7878 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7836 7879 ins_cost(200);
7837 7880 format %{ "CMOV$cop $dst,$src\t# ptr" %}
7838 7881 opcode(0x0F,0x40);
7839 7882 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7840 7883 ins_pipe( pipe_cmov_reg );
7841 7884 %}
7842 7885
7843 7886 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7844 7887 predicate(VM_Version::supports_cmov() );
7845 7888 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7846 7889 ins_cost(200);
7847 7890 expand %{
7848 7891 cmovP_regU(cop, cr, dst, src);
7849 7892 %}
7850 7893 %}
7851 7894
7852 7895 // DISABLED: Requires the ADLC to emit a bottom_type call that
7853 7896 // correctly meets the two pointer arguments; one is an incoming
7854 7897 // register but the other is a memory operand. ALSO appears to
7855 7898 // be buggy with implicit null checks.
7856 7899 //
7857 7900 //// Conditional move
7858 7901 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
7859 7902 // predicate(VM_Version::supports_cmov() );
7860 7903 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7861 7904 // ins_cost(250);
7862 7905 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
7863 7906 // opcode(0x0F,0x40);
7864 7907 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7865 7908 // ins_pipe( pipe_cmov_mem );
7866 7909 //%}
7867 7910 //
7868 7911 //// Conditional move
7869 7912 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
7870 7913 // predicate(VM_Version::supports_cmov() );
7871 7914 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7872 7915 // ins_cost(250);
7873 7916 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
7874 7917 // opcode(0x0F,0x40);
7875 7918 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7876 7919 // ins_pipe( pipe_cmov_mem );
7877 7920 //%}
7878 7921
7879 7922 // Conditional move
7880 7923 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
7881 7924 predicate(UseSSE<=1);
7882 7925 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7883 7926 ins_cost(200);
7884 7927 format %{ "FCMOV$cop $dst,$src\t# double" %}
7885 7928 opcode(0xDA);
7886 7929 ins_encode( enc_cmov_d(cop,src) );
7887 7930 ins_pipe( pipe_cmovD_reg );
7888 7931 %}
7889 7932
7890 7933 // Conditional move
7891 7934 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
7892 7935 predicate(UseSSE==0);
7893 7936 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7894 7937 ins_cost(200);
7895 7938 format %{ "FCMOV$cop $dst,$src\t# float" %}
7896 7939 opcode(0xDA);
7897 7940 ins_encode( enc_cmov_d(cop,src) );
7898 7941 ins_pipe( pipe_cmovD_reg );
7899 7942 %}
7900 7943
7901 7944 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7902 7945 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7903 7946 predicate(UseSSE<=1);
7904 7947 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7905 7948 ins_cost(200);
7906 7949 format %{ "Jn$cop skip\n\t"
7907 7950 "MOV $dst,$src\t# double\n"
7908 7951 "skip:" %}
7909 7952 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
7910 7953 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
7911 7954 ins_pipe( pipe_cmovD_reg );
7912 7955 %}
7913 7956
7914 7957 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7915 7958 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7916 7959 predicate(UseSSE==0);
7917 7960 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7918 7961 ins_cost(200);
7919 7962 format %{ "Jn$cop skip\n\t"
7920 7963 "MOV $dst,$src\t# float\n"
7921 7964 "skip:" %}
7922 7965 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
7923 7966 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
7924 7967 ins_pipe( pipe_cmovD_reg );
7925 7968 %}
7926 7969
7927 7970 // No CMOVE with SSE/SSE2
7928 7971 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
7929 7972 predicate (UseSSE>=1);
7930 7973 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7931 7974 ins_cost(200);
7932 7975 format %{ "Jn$cop skip\n\t"
7933 7976 "MOVSS $dst,$src\t# float\n"
7934 7977 "skip:" %}
7935 7978 ins_encode %{
7936 7979 Label skip;
7937 7980 // Invert sense of branch from sense of CMOV
7938 7981 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7939 7982 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7940 7983 __ bind(skip);
7941 7984 %}
7942 7985 ins_pipe( pipe_slow );
7943 7986 %}
7944 7987
7945 7988 // No CMOVE with SSE/SSE2
7946 7989 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
7947 7990 predicate (UseSSE>=2);
7948 7991 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7949 7992 ins_cost(200);
7950 7993 format %{ "Jn$cop skip\n\t"
7951 7994 "MOVSD $dst,$src\t# float\n"
7952 7995 "skip:" %}
7953 7996 ins_encode %{
7954 7997 Label skip;
7955 7998 // Invert sense of branch from sense of CMOV
7956 7999 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7957 8000 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7958 8001 __ bind(skip);
7959 8002 %}
7960 8003 ins_pipe( pipe_slow );
7961 8004 %}
7962 8005
7963 8006 // unsigned version
7964 8007 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
7965 8008 predicate (UseSSE>=1);
7966 8009 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7967 8010 ins_cost(200);
7968 8011 format %{ "Jn$cop skip\n\t"
7969 8012 "MOVSS $dst,$src\t# float\n"
7970 8013 "skip:" %}
7971 8014 ins_encode %{
7972 8015 Label skip;
7973 8016 // Invert sense of branch from sense of CMOV
7974 8017 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7975 8018 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7976 8019 __ bind(skip);
7977 8020 %}
7978 8021 ins_pipe( pipe_slow );
7979 8022 %}
7980 8023
7981 8024 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
7982 8025 predicate (UseSSE>=1);
7983 8026 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7984 8027 ins_cost(200);
7985 8028 expand %{
7986 8029 fcmovX_regU(cop, cr, dst, src);
7987 8030 %}
7988 8031 %}
7989 8032
7990 8033 // unsigned version
7991 8034 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
7992 8035 predicate (UseSSE>=2);
7993 8036 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7994 8037 ins_cost(200);
7995 8038 format %{ "Jn$cop skip\n\t"
7996 8039 "MOVSD $dst,$src\t# float\n"
7997 8040 "skip:" %}
7998 8041 ins_encode %{
7999 8042 Label skip;
8000 8043 // Invert sense of branch from sense of CMOV
8001 8044 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8002 8045 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8003 8046 __ bind(skip);
8004 8047 %}
8005 8048 ins_pipe( pipe_slow );
8006 8049 %}
8007 8050
8008 8051 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
8009 8052 predicate (UseSSE>=2);
8010 8053 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8011 8054 ins_cost(200);
8012 8055 expand %{
8013 8056 fcmovXD_regU(cop, cr, dst, src);
8014 8057 %}
8015 8058 %}
8016 8059
8017 8060 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
8018 8061 predicate(VM_Version::supports_cmov() );
8019 8062 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8020 8063 ins_cost(200);
8021 8064 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8022 8065 "CMOV$cop $dst.hi,$src.hi" %}
8023 8066 opcode(0x0F,0x40);
8024 8067 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8025 8068 ins_pipe( pipe_cmov_reg_long );
8026 8069 %}
8027 8070
8028 8071 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
8029 8072 predicate(VM_Version::supports_cmov() );
8030 8073 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8031 8074 ins_cost(200);
8032 8075 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8033 8076 "CMOV$cop $dst.hi,$src.hi" %}
8034 8077 opcode(0x0F,0x40);
8035 8078 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8036 8079 ins_pipe( pipe_cmov_reg_long );
8037 8080 %}
8038 8081
8039 8082 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
8040 8083 predicate(VM_Version::supports_cmov() );
8041 8084 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8042 8085 ins_cost(200);
8043 8086 expand %{
8044 8087 cmovL_regU(cop, cr, dst, src);
8045 8088 %}
8046 8089 %}
8047 8090
8048 8091 //----------Arithmetic Instructions--------------------------------------------
8049 8092 //----------Addition Instructions----------------------------------------------
8050 8093 // Integer Addition Instructions
8051 8094 instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8052 8095 match(Set dst (AddI dst src));
8053 8096 effect(KILL cr);
8054 8097
8055 8098 size(2);
8056 8099 format %{ "ADD $dst,$src" %}
8057 8100 opcode(0x03);
8058 8101 ins_encode( OpcP, RegReg( dst, src) );
8059 8102 ins_pipe( ialu_reg_reg );
8060 8103 %}
8061 8104
8062 8105 instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8063 8106 match(Set dst (AddI dst src));
8064 8107 effect(KILL cr);
8065 8108
8066 8109 format %{ "ADD $dst,$src" %}
8067 8110 opcode(0x81, 0x00); /* /0 id */
8068 8111 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8069 8112 ins_pipe( ialu_reg );
8070 8113 %}
8071 8114
8072 8115 instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
8073 8116 predicate(UseIncDec);
8074 8117 match(Set dst (AddI dst src));
8075 8118 effect(KILL cr);
8076 8119
8077 8120 size(1);
8078 8121 format %{ "INC $dst" %}
8079 8122 opcode(0x40); /* */
8080 8123 ins_encode( Opc_plus( primary, dst ) );
8081 8124 ins_pipe( ialu_reg );
8082 8125 %}
8083 8126
8084 8127 instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
8085 8128 match(Set dst (AddI src0 src1));
8086 8129 ins_cost(110);
8087 8130
8088 8131 format %{ "LEA $dst,[$src0 + $src1]" %}
8089 8132 opcode(0x8D); /* 0x8D /r */
8090 8133 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8091 8134 ins_pipe( ialu_reg_reg );
8092 8135 %}
8093 8136
8094 8137 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
8095 8138 match(Set dst (AddP src0 src1));
8096 8139 ins_cost(110);
8097 8140
8098 8141 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %}
8099 8142 opcode(0x8D); /* 0x8D /r */
8100 8143 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8101 8144 ins_pipe( ialu_reg_reg );
8102 8145 %}
8103 8146
8104 8147 instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
8105 8148 predicate(UseIncDec);
8106 8149 match(Set dst (AddI dst src));
8107 8150 effect(KILL cr);
8108 8151
8109 8152 size(1);
8110 8153 format %{ "DEC $dst" %}
8111 8154 opcode(0x48); /* */
8112 8155 ins_encode( Opc_plus( primary, dst ) );
8113 8156 ins_pipe( ialu_reg );
8114 8157 %}
8115 8158
8116 8159 instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
8117 8160 match(Set dst (AddP dst src));
8118 8161 effect(KILL cr);
8119 8162
8120 8163 size(2);
8121 8164 format %{ "ADD $dst,$src" %}
8122 8165 opcode(0x03);
8123 8166 ins_encode( OpcP, RegReg( dst, src) );
8124 8167 ins_pipe( ialu_reg_reg );
8125 8168 %}
8126 8169
8127 8170 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
8128 8171 match(Set dst (AddP dst src));
8129 8172 effect(KILL cr);
8130 8173
8131 8174 format %{ "ADD $dst,$src" %}
8132 8175 opcode(0x81,0x00); /* Opcode 81 /0 id */
8133 8176 // ins_encode( RegImm( dst, src) );
8134 8177 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8135 8178 ins_pipe( ialu_reg );
8136 8179 %}
8137 8180
8138 8181 instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8139 8182 match(Set dst (AddI dst (LoadI src)));
8140 8183 effect(KILL cr);
8141 8184
8142 8185 ins_cost(125);
8143 8186 format %{ "ADD $dst,$src" %}
8144 8187 opcode(0x03);
8145 8188 ins_encode( OpcP, RegMem( dst, src) );
8146 8189 ins_pipe( ialu_reg_mem );
8147 8190 %}
8148 8191
8149 8192 instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8150 8193 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8151 8194 effect(KILL cr);
8152 8195
8153 8196 ins_cost(150);
8154 8197 format %{ "ADD $dst,$src" %}
8155 8198 opcode(0x01); /* Opcode 01 /r */
8156 8199 ins_encode( OpcP, RegMem( src, dst ) );
8157 8200 ins_pipe( ialu_mem_reg );
8158 8201 %}
8159 8202
8160 8203 // Add Memory with Immediate
8161 8204 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8162 8205 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8163 8206 effect(KILL cr);
8164 8207
8165 8208 ins_cost(125);
8166 8209 format %{ "ADD $dst,$src" %}
8167 8210 opcode(0x81); /* Opcode 81 /0 id */
8168 8211 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
8169 8212 ins_pipe( ialu_mem_imm );
8170 8213 %}
8171 8214
8172 8215 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
8173 8216 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8174 8217 effect(KILL cr);
8175 8218
8176 8219 ins_cost(125);
8177 8220 format %{ "INC $dst" %}
8178 8221 opcode(0xFF); /* Opcode FF /0 */
8179 8222 ins_encode( OpcP, RMopc_Mem(0x00,dst));
8180 8223 ins_pipe( ialu_mem_imm );
8181 8224 %}
8182 8225
8183 8226 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
8184 8227 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8185 8228 effect(KILL cr);
8186 8229
8187 8230 ins_cost(125);
8188 8231 format %{ "DEC $dst" %}
8189 8232 opcode(0xFF); /* Opcode FF /1 */
8190 8233 ins_encode( OpcP, RMopc_Mem(0x01,dst));
8191 8234 ins_pipe( ialu_mem_imm );
8192 8235 %}
8193 8236
8194 8237
8195 8238 instruct checkCastPP( eRegP dst ) %{
8196 8239 match(Set dst (CheckCastPP dst));
8197 8240
8198 8241 size(0);
8199 8242 format %{ "#checkcastPP of $dst" %}
8200 8243 ins_encode( /*empty encoding*/ );
8201 8244 ins_pipe( empty );
8202 8245 %}
8203 8246
8204 8247 instruct castPP( eRegP dst ) %{
8205 8248 match(Set dst (CastPP dst));
8206 8249 format %{ "#castPP of $dst" %}
8207 8250 ins_encode( /*empty encoding*/ );
8208 8251 ins_pipe( empty );
8209 8252 %}
8210 8253
8211 8254 instruct castII( eRegI dst ) %{
8212 8255 match(Set dst (CastII dst));
8213 8256 format %{ "#castII of $dst" %}
8214 8257 ins_encode( /*empty encoding*/ );
8215 8258 ins_cost(0);
8216 8259 ins_pipe( empty );
8217 8260 %}
8218 8261
8219 8262
8220 8263 // Load-locked - same as a regular pointer load when used with compare-swap
8221 8264 instruct loadPLocked(eRegP dst, memory mem) %{
8222 8265 match(Set dst (LoadPLocked mem));
8223 8266
8224 8267 ins_cost(125);
8225 8268 format %{ "MOV $dst,$mem\t# Load ptr. locked" %}
8226 8269 opcode(0x8B);
8227 8270 ins_encode( OpcP, RegMem(dst,mem));
8228 8271 ins_pipe( ialu_reg_mem );
8229 8272 %}
8230 8273
8231 8274 // LoadLong-locked - same as a volatile long load when used with compare-swap
8232 8275 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8233 8276 predicate(UseSSE<=1);
8234 8277 match(Set dst (LoadLLocked mem));
8235 8278
8236 8279 ins_cost(200);
8237 8280 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
8238 8281 "FISTp $dst" %}
8239 8282 ins_encode(enc_loadL_volatile(mem,dst));
8240 8283 ins_pipe( fpu_reg_mem );
8241 8284 %}
8242 8285
8243 8286 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8244 8287 predicate(UseSSE>=2);
8245 8288 match(Set dst (LoadLLocked mem));
8246 8289 effect(TEMP tmp);
8247 8290 ins_cost(180);
8248 8291 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
8249 8292 "MOVSD $dst,$tmp" %}
8250 8293 ins_encode(enc_loadLX_volatile(mem, dst, tmp));
8251 8294 ins_pipe( pipe_slow );
8252 8295 %}
8253 8296
8254 8297 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8255 8298 predicate(UseSSE>=2);
8256 8299 match(Set dst (LoadLLocked mem));
8257 8300 effect(TEMP tmp);
8258 8301 ins_cost(160);
8259 8302 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
8260 8303 "MOVD $dst.lo,$tmp\n\t"
8261 8304 "PSRLQ $tmp,32\n\t"
8262 8305 "MOVD $dst.hi,$tmp" %}
8263 8306 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
8264 8307 ins_pipe( pipe_slow );
8265 8308 %}
8266 8309
8267 8310 // Conditional-store of the updated heap-top.
8268 8311 // Used during allocation of the shared heap.
8269 8312 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.
8270 8313 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8271 8314 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8272 8315 // EAX is killed if there is contention, but then it's also unused.
8273 8316 // In the common case of no contention, EAX holds the new oop address.
8274 8317 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8275 8318 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8276 8319 ins_pipe( pipe_cmpxchg );
8277 8320 %}
8278 8321
8279 8322 // Conditional-store of an int value.
8280 8323 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.
8281 8324 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8282 8325 match(Set cr (StoreIConditional mem (Binary oldval newval)));
8283 8326 effect(KILL oldval);
8284 8327 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
8285 8328 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
8286 8329 ins_pipe( pipe_cmpxchg );
8287 8330 %}
8288 8331
8289 8332 // Conditional-store of a long value.
8290 8333 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel.
8291 8334 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8292 8335 match(Set cr (StoreLConditional mem (Binary oldval newval)));
8293 8336 effect(KILL oldval);
8294 8337 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
8295 8338 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
8296 8339 "XCHG EBX,ECX"
8297 8340 %}
8298 8341 ins_encode %{
8299 8342 // Note: we need to swap rbx, and rcx before and after the
8300 8343 // cmpxchg8 instruction because the instruction uses
8301 8344 // rcx as the high order word of the new value to store but
8302 8345 // our register encoding uses rbx.
8303 8346 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8304 8347 if( os::is_MP() )
8305 8348 __ lock();
8306 8349 __ cmpxchg8($mem$$Address);
8307 8350 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8308 8351 %}
8309 8352 ins_pipe( pipe_cmpxchg );
8310 8353 %}
8311 8354
8312 8355 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8313 8356
8314 8357 instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8315 8358 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8316 8359 effect(KILL cr, KILL oldval);
8317 8360 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8318 8361 "MOV $res,0\n\t"
8319 8362 "JNE,s fail\n\t"
8320 8363 "MOV $res,1\n"
8321 8364 "fail:" %}
8322 8365 ins_encode( enc_cmpxchg8(mem_ptr),
8323 8366 enc_flags_ne_to_boolean(res) );
8324 8367 ins_pipe( pipe_cmpxchg );
8325 8368 %}
8326 8369
8327 8370 instruct compareAndSwapP( eRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
8328 8371 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8329 8372 effect(KILL cr, KILL oldval);
8330 8373 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8331 8374 "MOV $res,0\n\t"
8332 8375 "JNE,s fail\n\t"
8333 8376 "MOV $res,1\n"
8334 8377 "fail:" %}
8335 8378 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8336 8379 ins_pipe( pipe_cmpxchg );
8337 8380 %}
8338 8381
8339 8382 instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
8340 8383 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8341 8384 effect(KILL cr, KILL oldval);
8342 8385 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8343 8386 "MOV $res,0\n\t"
8344 8387 "JNE,s fail\n\t"
8345 8388 "MOV $res,1\n"
8346 8389 "fail:" %}
8347 8390 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8348 8391 ins_pipe( pipe_cmpxchg );
8349 8392 %}
8350 8393
8351 8394 //----------Subtraction Instructions-------------------------------------------
8352 8395 // Integer Subtraction Instructions
8353 8396 instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8354 8397 match(Set dst (SubI dst src));
8355 8398 effect(KILL cr);
8356 8399
8357 8400 size(2);
8358 8401 format %{ "SUB $dst,$src" %}
8359 8402 opcode(0x2B);
8360 8403 ins_encode( OpcP, RegReg( dst, src) );
8361 8404 ins_pipe( ialu_reg_reg );
8362 8405 %}
8363 8406
8364 8407 instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8365 8408 match(Set dst (SubI dst src));
8366 8409 effect(KILL cr);
8367 8410
8368 8411 format %{ "SUB $dst,$src" %}
8369 8412 opcode(0x81,0x05); /* Opcode 81 /5 */
8370 8413 // ins_encode( RegImm( dst, src) );
8371 8414 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8372 8415 ins_pipe( ialu_reg );
8373 8416 %}
8374 8417
8375 8418 instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8376 8419 match(Set dst (SubI dst (LoadI src)));
8377 8420 effect(KILL cr);
8378 8421
8379 8422 ins_cost(125);
8380 8423 format %{ "SUB $dst,$src" %}
8381 8424 opcode(0x2B);
8382 8425 ins_encode( OpcP, RegMem( dst, src) );
8383 8426 ins_pipe( ialu_reg_mem );
8384 8427 %}
8385 8428
8386 8429 instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8387 8430 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8388 8431 effect(KILL cr);
8389 8432
8390 8433 ins_cost(150);
8391 8434 format %{ "SUB $dst,$src" %}
8392 8435 opcode(0x29); /* Opcode 29 /r */
8393 8436 ins_encode( OpcP, RegMem( src, dst ) );
8394 8437 ins_pipe( ialu_mem_reg );
8395 8438 %}
8396 8439
8397 8440 // Subtract from a pointer
8398 8441 instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8399 8442 match(Set dst (AddP dst (SubI zero src)));
8400 8443 effect(KILL cr);
8401 8444
8402 8445 size(2);
8403 8446 format %{ "SUB $dst,$src" %}
8404 8447 opcode(0x2B);
8405 8448 ins_encode( OpcP, RegReg( dst, src) );
8406 8449 ins_pipe( ialu_reg_reg );
8407 8450 %}
8408 8451
8409 8452 instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8410 8453 match(Set dst (SubI zero dst));
8411 8454 effect(KILL cr);
8412 8455
8413 8456 size(2);
8414 8457 format %{ "NEG $dst" %}
8415 8458 opcode(0xF7,0x03); // Opcode F7 /3
8416 8459 ins_encode( OpcP, RegOpc( dst ) );
8417 8460 ins_pipe( ialu_reg );
8418 8461 %}
8419 8462
8420 8463
8421 8464 //----------Multiplication/Division Instructions-------------------------------
8422 8465 // Integer Multiplication Instructions
8423 8466 // Multiply Register
8424 8467 instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8425 8468 match(Set dst (MulI dst src));
8426 8469 effect(KILL cr);
8427 8470
8428 8471 size(3);
8429 8472 ins_cost(300);
8430 8473 format %{ "IMUL $dst,$src" %}
8431 8474 opcode(0xAF, 0x0F);
8432 8475 ins_encode( OpcS, OpcP, RegReg( dst, src) );
8433 8476 ins_pipe( ialu_reg_reg_alu0 );
8434 8477 %}
8435 8478
8436 8479 // Multiply 32-bit Immediate
8437 8480 instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8438 8481 match(Set dst (MulI src imm));
8439 8482 effect(KILL cr);
8440 8483
8441 8484 ins_cost(300);
8442 8485 format %{ "IMUL $dst,$src,$imm" %}
8443 8486 opcode(0x69); /* 69 /r id */
8444 8487 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8445 8488 ins_pipe( ialu_reg_reg_alu0 );
8446 8489 %}
8447 8490
8448 8491 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8449 8492 match(Set dst src);
8450 8493 effect(KILL cr);
8451 8494
8452 8495 // Note that this is artificially increased to make it more expensive than loadConL
8453 8496 ins_cost(250);
8454 8497 format %{ "MOV EAX,$src\t// low word only" %}
8455 8498 opcode(0xB8);
8456 8499 ins_encode( LdImmL_Lo(dst, src) );
8457 8500 ins_pipe( ialu_reg_fat );
8458 8501 %}
8459 8502
8460 8503 // Multiply by 32-bit Immediate, taking the shifted high order results
8461 8504 // (special case for shift by 32)
8462 8505 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8463 8506 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8464 8507 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8465 8508 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8466 8509 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8467 8510 effect(USE src1, KILL cr);
8468 8511
8469 8512 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8470 8513 ins_cost(0*100 + 1*400 - 150);
8471 8514 format %{ "IMUL EDX:EAX,$src1" %}
8472 8515 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8473 8516 ins_pipe( pipe_slow );
8474 8517 %}
8475 8518
8476 8519 // Multiply by 32-bit Immediate, taking the shifted high order results
8477 8520 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8478 8521 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8479 8522 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8480 8523 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8481 8524 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8482 8525 effect(USE src1, KILL cr);
8483 8526
8484 8527 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8485 8528 ins_cost(1*100 + 1*400 - 150);
8486 8529 format %{ "IMUL EDX:EAX,$src1\n\t"
8487 8530 "SAR EDX,$cnt-32" %}
8488 8531 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8489 8532 ins_pipe( pipe_slow );
8490 8533 %}
8491 8534
8492 8535 // Multiply Memory 32-bit Immediate
8493 8536 instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8494 8537 match(Set dst (MulI (LoadI src) imm));
8495 8538 effect(KILL cr);
8496 8539
8497 8540 ins_cost(300);
8498 8541 format %{ "IMUL $dst,$src,$imm" %}
8499 8542 opcode(0x69); /* 69 /r id */
8500 8543 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8501 8544 ins_pipe( ialu_reg_mem_alu0 );
8502 8545 %}
8503 8546
8504 8547 // Multiply Memory
8505 8548 instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8506 8549 match(Set dst (MulI dst (LoadI src)));
8507 8550 effect(KILL cr);
8508 8551
8509 8552 ins_cost(350);
8510 8553 format %{ "IMUL $dst,$src" %}
8511 8554 opcode(0xAF, 0x0F);
8512 8555 ins_encode( OpcS, OpcP, RegMem( dst, src) );
8513 8556 ins_pipe( ialu_reg_mem_alu0 );
8514 8557 %}
8515 8558
8516 8559 // Multiply Register Int to Long
8517 8560 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8518 8561 // Basic Idea: long = (long)int * (long)int
8519 8562 match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8520 8563 effect(DEF dst, USE src, USE src1, KILL flags);
8521 8564
8522 8565 ins_cost(300);
8523 8566 format %{ "IMUL $dst,$src1" %}
8524 8567
8525 8568 ins_encode( long_int_multiply( dst, src1 ) );
8526 8569 ins_pipe( ialu_reg_reg_alu0 );
8527 8570 %}
8528 8571
8529 8572 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8530 8573 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
8531 8574 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8532 8575 effect(KILL flags);
8533 8576
8534 8577 ins_cost(300);
8535 8578 format %{ "MUL $dst,$src1" %}
8536 8579
8537 8580 ins_encode( long_uint_multiply(dst, src1) );
8538 8581 ins_pipe( ialu_reg_reg_alu0 );
8539 8582 %}
8540 8583
8541 8584 // Multiply Register Long
8542 8585 instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8543 8586 match(Set dst (MulL dst src));
8544 8587 effect(KILL cr, TEMP tmp);
8545 8588 ins_cost(4*100+3*400);
8546 8589 // Basic idea: lo(result) = lo(x_lo * y_lo)
8547 8590 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8548 8591 format %{ "MOV $tmp,$src.lo\n\t"
8549 8592 "IMUL $tmp,EDX\n\t"
8550 8593 "MOV EDX,$src.hi\n\t"
8551 8594 "IMUL EDX,EAX\n\t"
8552 8595 "ADD $tmp,EDX\n\t"
8553 8596 "MUL EDX:EAX,$src.lo\n\t"
8554 8597 "ADD EDX,$tmp" %}
8555 8598 ins_encode( long_multiply( dst, src, tmp ) );
8556 8599 ins_pipe( pipe_slow );
8557 8600 %}
8558 8601
8559 8602 // Multiply Register Long by small constant
8560 8603 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8561 8604 match(Set dst (MulL dst src));
8562 8605 effect(KILL cr, TEMP tmp);
8563 8606 ins_cost(2*100+2*400);
8564 8607 size(12);
8565 8608 // Basic idea: lo(result) = lo(src * EAX)
8566 8609 // hi(result) = hi(src * EAX) + lo(src * EDX)
8567 8610 format %{ "IMUL $tmp,EDX,$src\n\t"
8568 8611 "MOV EDX,$src\n\t"
8569 8612 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t"
8570 8613 "ADD EDX,$tmp" %}
8571 8614 ins_encode( long_multiply_con( dst, src, tmp ) );
8572 8615 ins_pipe( pipe_slow );
8573 8616 %}
8574 8617
8575 8618 // Integer DIV with Register
8576 8619 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8577 8620 match(Set rax (DivI rax div));
8578 8621 effect(KILL rdx, KILL cr);
8579 8622 size(26);
8580 8623 ins_cost(30*100+10*100);
8581 8624 format %{ "CMP EAX,0x80000000\n\t"
8582 8625 "JNE,s normal\n\t"
8583 8626 "XOR EDX,EDX\n\t"
8584 8627 "CMP ECX,-1\n\t"
8585 8628 "JE,s done\n"
8586 8629 "normal: CDQ\n\t"
8587 8630 "IDIV $div\n\t"
8588 8631 "done:" %}
8589 8632 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8590 8633 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8591 8634 ins_pipe( ialu_reg_reg_alu0 );
8592 8635 %}
8593 8636
8594 8637 // Divide Register Long
8595 8638 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8596 8639 match(Set dst (DivL src1 src2));
8597 8640 effect( KILL cr, KILL cx, KILL bx );
8598 8641 ins_cost(10000);
8599 8642 format %{ "PUSH $src1.hi\n\t"
8600 8643 "PUSH $src1.lo\n\t"
8601 8644 "PUSH $src2.hi\n\t"
8602 8645 "PUSH $src2.lo\n\t"
8603 8646 "CALL SharedRuntime::ldiv\n\t"
8604 8647 "ADD ESP,16" %}
8605 8648 ins_encode( long_div(src1,src2) );
8606 8649 ins_pipe( pipe_slow );
8607 8650 %}
8608 8651
8609 8652 // Integer DIVMOD with Register, both quotient and mod results
8610 8653 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8611 8654 match(DivModI rax div);
8612 8655 effect(KILL cr);
8613 8656 size(26);
8614 8657 ins_cost(30*100+10*100);
8615 8658 format %{ "CMP EAX,0x80000000\n\t"
8616 8659 "JNE,s normal\n\t"
8617 8660 "XOR EDX,EDX\n\t"
8618 8661 "CMP ECX,-1\n\t"
8619 8662 "JE,s done\n"
8620 8663 "normal: CDQ\n\t"
8621 8664 "IDIV $div\n\t"
8622 8665 "done:" %}
8623 8666 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8624 8667 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8625 8668 ins_pipe( pipe_slow );
8626 8669 %}
8627 8670
8628 8671 // Integer MOD with Register
8629 8672 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8630 8673 match(Set rdx (ModI rax div));
8631 8674 effect(KILL rax, KILL cr);
8632 8675
8633 8676 size(26);
8634 8677 ins_cost(300);
8635 8678 format %{ "CDQ\n\t"
8636 8679 "IDIV $div" %}
8637 8680 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8638 8681 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8639 8682 ins_pipe( ialu_reg_reg_alu0 );
8640 8683 %}
8641 8684
8642 8685 // Remainder Register Long
8643 8686 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8644 8687 match(Set dst (ModL src1 src2));
8645 8688 effect( KILL cr, KILL cx, KILL bx );
8646 8689 ins_cost(10000);
8647 8690 format %{ "PUSH $src1.hi\n\t"
8648 8691 "PUSH $src1.lo\n\t"
8649 8692 "PUSH $src2.hi\n\t"
8650 8693 "PUSH $src2.lo\n\t"
8651 8694 "CALL SharedRuntime::lrem\n\t"
8652 8695 "ADD ESP,16" %}
8653 8696 ins_encode( long_mod(src1,src2) );
8654 8697 ins_pipe( pipe_slow );
8655 8698 %}
8656 8699
8657 8700 // Integer Shift Instructions
8658 8701 // Shift Left by one
8659 8702 instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8660 8703 match(Set dst (LShiftI dst shift));
8661 8704 effect(KILL cr);
8662 8705
8663 8706 size(2);
8664 8707 format %{ "SHL $dst,$shift" %}
8665 8708 opcode(0xD1, 0x4); /* D1 /4 */
8666 8709 ins_encode( OpcP, RegOpc( dst ) );
8667 8710 ins_pipe( ialu_reg );
8668 8711 %}
8669 8712
8670 8713 // Shift Left by 8-bit immediate
8671 8714 instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8672 8715 match(Set dst (LShiftI dst shift));
8673 8716 effect(KILL cr);
8674 8717
8675 8718 size(3);
8676 8719 format %{ "SHL $dst,$shift" %}
8677 8720 opcode(0xC1, 0x4); /* C1 /4 ib */
8678 8721 ins_encode( RegOpcImm( dst, shift) );
8679 8722 ins_pipe( ialu_reg );
8680 8723 %}
8681 8724
8682 8725 // Shift Left by variable
8683 8726 instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8684 8727 match(Set dst (LShiftI dst shift));
8685 8728 effect(KILL cr);
8686 8729
8687 8730 size(2);
8688 8731 format %{ "SHL $dst,$shift" %}
8689 8732 opcode(0xD3, 0x4); /* D3 /4 */
8690 8733 ins_encode( OpcP, RegOpc( dst ) );
8691 8734 ins_pipe( ialu_reg_reg );
8692 8735 %}
8693 8736
8694 8737 // Arithmetic shift right by one
8695 8738 instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8696 8739 match(Set dst (RShiftI dst shift));
8697 8740 effect(KILL cr);
8698 8741
8699 8742 size(2);
8700 8743 format %{ "SAR $dst,$shift" %}
8701 8744 opcode(0xD1, 0x7); /* D1 /7 */
8702 8745 ins_encode( OpcP, RegOpc( dst ) );
8703 8746 ins_pipe( ialu_reg );
8704 8747 %}
8705 8748
8706 8749 // Arithmetic shift right by one
8707 8750 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8708 8751 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8709 8752 effect(KILL cr);
8710 8753 format %{ "SAR $dst,$shift" %}
8711 8754 opcode(0xD1, 0x7); /* D1 /7 */
8712 8755 ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8713 8756 ins_pipe( ialu_mem_imm );
8714 8757 %}
8715 8758
8716 8759 // Arithmetic Shift Right by 8-bit immediate
8717 8760 instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8718 8761 match(Set dst (RShiftI dst shift));
8719 8762 effect(KILL cr);
8720 8763
8721 8764 size(3);
8722 8765 format %{ "SAR $dst,$shift" %}
8723 8766 opcode(0xC1, 0x7); /* C1 /7 ib */
8724 8767 ins_encode( RegOpcImm( dst, shift ) );
8725 8768 ins_pipe( ialu_mem_imm );
8726 8769 %}
8727 8770
8728 8771 // Arithmetic Shift Right by 8-bit immediate
8729 8772 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8730 8773 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8731 8774 effect(KILL cr);
8732 8775
8733 8776 format %{ "SAR $dst,$shift" %}
8734 8777 opcode(0xC1, 0x7); /* C1 /7 ib */
8735 8778 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8736 8779 ins_pipe( ialu_mem_imm );
8737 8780 %}
8738 8781
8739 8782 // Arithmetic Shift Right by variable
8740 8783 instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8741 8784 match(Set dst (RShiftI dst shift));
8742 8785 effect(KILL cr);
8743 8786
8744 8787 size(2);
8745 8788 format %{ "SAR $dst,$shift" %}
8746 8789 opcode(0xD3, 0x7); /* D3 /7 */
8747 8790 ins_encode( OpcP, RegOpc( dst ) );
8748 8791 ins_pipe( ialu_reg_reg );
8749 8792 %}
8750 8793
8751 8794 // Logical shift right by one
8752 8795 instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8753 8796 match(Set dst (URShiftI dst shift));
8754 8797 effect(KILL cr);
8755 8798
8756 8799 size(2);
8757 8800 format %{ "SHR $dst,$shift" %}
8758 8801 opcode(0xD1, 0x5); /* D1 /5 */
8759 8802 ins_encode( OpcP, RegOpc( dst ) );
8760 8803 ins_pipe( ialu_reg );
8761 8804 %}
8762 8805
8763 8806 // Logical Shift Right by 8-bit immediate
8764 8807 instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8765 8808 match(Set dst (URShiftI dst shift));
8766 8809 effect(KILL cr);
8767 8810
8768 8811 size(3);
8769 8812 format %{ "SHR $dst,$shift" %}
8770 8813 opcode(0xC1, 0x5); /* C1 /5 ib */
8771 8814 ins_encode( RegOpcImm( dst, shift) );
8772 8815 ins_pipe( ialu_reg );
8773 8816 %}
8774 8817
8775 8818
8776 8819 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8777 8820 // This idiom is used by the compiler for the i2b bytecode.
8778 8821 instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
8779 8822 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8780 8823
8781 8824 size(3);
8782 8825 format %{ "MOVSX $dst,$src :8" %}
8783 8826 ins_encode %{
8784 8827 __ movsbl($dst$$Register, $src$$Register);
8785 8828 %}
8786 8829 ins_pipe(ialu_reg_reg);
8787 8830 %}
8788 8831
8789 8832 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8790 8833 // This idiom is used by the compiler the i2s bytecode.
8791 8834 instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
8792 8835 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8793 8836
8794 8837 size(3);
8795 8838 format %{ "MOVSX $dst,$src :16" %}
8796 8839 ins_encode %{
8797 8840 __ movswl($dst$$Register, $src$$Register);
8798 8841 %}
8799 8842 ins_pipe(ialu_reg_reg);
8800 8843 %}
8801 8844
8802 8845
8803 8846 // Logical Shift Right by variable
8804 8847 instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8805 8848 match(Set dst (URShiftI dst shift));
8806 8849 effect(KILL cr);
8807 8850
8808 8851 size(2);
8809 8852 format %{ "SHR $dst,$shift" %}
8810 8853 opcode(0xD3, 0x5); /* D3 /5 */
8811 8854 ins_encode( OpcP, RegOpc( dst ) );
8812 8855 ins_pipe( ialu_reg_reg );
8813 8856 %}
8814 8857
8815 8858
8816 8859 //----------Logical Instructions-----------------------------------------------
8817 8860 //----------Integer Logical Instructions---------------------------------------
8818 8861 // And Instructions
8819 8862 // And Register with Register
8820 8863 instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8821 8864 match(Set dst (AndI dst src));
8822 8865 effect(KILL cr);
8823 8866
8824 8867 size(2);
8825 8868 format %{ "AND $dst,$src" %}
8826 8869 opcode(0x23);
8827 8870 ins_encode( OpcP, RegReg( dst, src) );
8828 8871 ins_pipe( ialu_reg_reg );
8829 8872 %}
8830 8873
8831 8874 // And Register with Immediate
8832 8875 instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8833 8876 match(Set dst (AndI dst src));
8834 8877 effect(KILL cr);
8835 8878
8836 8879 format %{ "AND $dst,$src" %}
8837 8880 opcode(0x81,0x04); /* Opcode 81 /4 */
8838 8881 // ins_encode( RegImm( dst, src) );
8839 8882 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8840 8883 ins_pipe( ialu_reg );
8841 8884 %}
8842 8885
8843 8886 // And Register with Memory
8844 8887 instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8845 8888 match(Set dst (AndI dst (LoadI src)));
8846 8889 effect(KILL cr);
8847 8890
8848 8891 ins_cost(125);
8849 8892 format %{ "AND $dst,$src" %}
8850 8893 opcode(0x23);
8851 8894 ins_encode( OpcP, RegMem( dst, src) );
8852 8895 ins_pipe( ialu_reg_mem );
8853 8896 %}
8854 8897
8855 8898 // And Memory with Register
8856 8899 instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8857 8900 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8858 8901 effect(KILL cr);
8859 8902
8860 8903 ins_cost(150);
8861 8904 format %{ "AND $dst,$src" %}
8862 8905 opcode(0x21); /* Opcode 21 /r */
8863 8906 ins_encode( OpcP, RegMem( src, dst ) );
8864 8907 ins_pipe( ialu_mem_reg );
8865 8908 %}
8866 8909
8867 8910 // And Memory with Immediate
8868 8911 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8869 8912 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8870 8913 effect(KILL cr);
8871 8914
8872 8915 ins_cost(125);
8873 8916 format %{ "AND $dst,$src" %}
8874 8917 opcode(0x81, 0x4); /* Opcode 81 /4 id */
8875 8918 // ins_encode( MemImm( dst, src) );
8876 8919 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8877 8920 ins_pipe( ialu_mem_imm );
8878 8921 %}
8879 8922
8880 8923 // Or Instructions
8881 8924 // Or Register with Register
8882 8925 instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8883 8926 match(Set dst (OrI dst src));
8884 8927 effect(KILL cr);
8885 8928
8886 8929 size(2);
8887 8930 format %{ "OR $dst,$src" %}
8888 8931 opcode(0x0B);
8889 8932 ins_encode( OpcP, RegReg( dst, src) );
8890 8933 ins_pipe( ialu_reg_reg );
8891 8934 %}
8892 8935
8893 8936 instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
8894 8937 match(Set dst (OrI dst (CastP2X src)));
8895 8938 effect(KILL cr);
8896 8939
8897 8940 size(2);
8898 8941 format %{ "OR $dst,$src" %}
8899 8942 opcode(0x0B);
8900 8943 ins_encode( OpcP, RegReg( dst, src) );
8901 8944 ins_pipe( ialu_reg_reg );
8902 8945 %}
8903 8946
8904 8947
8905 8948 // Or Register with Immediate
8906 8949 instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8907 8950 match(Set dst (OrI dst src));
8908 8951 effect(KILL cr);
8909 8952
8910 8953 format %{ "OR $dst,$src" %}
8911 8954 opcode(0x81,0x01); /* Opcode 81 /1 id */
8912 8955 // ins_encode( RegImm( dst, src) );
8913 8956 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8914 8957 ins_pipe( ialu_reg );
8915 8958 %}
8916 8959
8917 8960 // Or Register with Memory
8918 8961 instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8919 8962 match(Set dst (OrI dst (LoadI src)));
8920 8963 effect(KILL cr);
8921 8964
8922 8965 ins_cost(125);
8923 8966 format %{ "OR $dst,$src" %}
8924 8967 opcode(0x0B);
8925 8968 ins_encode( OpcP, RegMem( dst, src) );
8926 8969 ins_pipe( ialu_reg_mem );
8927 8970 %}
8928 8971
8929 8972 // Or Memory with Register
8930 8973 instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8931 8974 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8932 8975 effect(KILL cr);
8933 8976
8934 8977 ins_cost(150);
8935 8978 format %{ "OR $dst,$src" %}
8936 8979 opcode(0x09); /* Opcode 09 /r */
8937 8980 ins_encode( OpcP, RegMem( src, dst ) );
8938 8981 ins_pipe( ialu_mem_reg );
8939 8982 %}
8940 8983
8941 8984 // Or Memory with Immediate
8942 8985 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8943 8986 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8944 8987 effect(KILL cr);
8945 8988
8946 8989 ins_cost(125);
8947 8990 format %{ "OR $dst,$src" %}
8948 8991 opcode(0x81,0x1); /* Opcode 81 /1 id */
8949 8992 // ins_encode( MemImm( dst, src) );
8950 8993 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8951 8994 ins_pipe( ialu_mem_imm );
8952 8995 %}
8953 8996
8954 8997 // ROL/ROR
8955 8998 // ROL expand
8956 8999 instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8957 9000 effect(USE_DEF dst, USE shift, KILL cr);
8958 9001
8959 9002 format %{ "ROL $dst, $shift" %}
8960 9003 opcode(0xD1, 0x0); /* Opcode D1 /0 */
8961 9004 ins_encode( OpcP, RegOpc( dst ));
8962 9005 ins_pipe( ialu_reg );
8963 9006 %}
8964 9007
8965 9008 instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
8966 9009 effect(USE_DEF dst, USE shift, KILL cr);
8967 9010
8968 9011 format %{ "ROL $dst, $shift" %}
8969 9012 opcode(0xC1, 0x0); /*Opcode /C1 /0 */
8970 9013 ins_encode( RegOpcImm(dst, shift) );
8971 9014 ins_pipe(ialu_reg);
8972 9015 %}
8973 9016
8974 9017 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8975 9018 effect(USE_DEF dst, USE shift, KILL cr);
8976 9019
8977 9020 format %{ "ROL $dst, $shift" %}
8978 9021 opcode(0xD3, 0x0); /* Opcode D3 /0 */
8979 9022 ins_encode(OpcP, RegOpc(dst));
8980 9023 ins_pipe( ialu_reg_reg );
8981 9024 %}
8982 9025 // end of ROL expand
8983 9026
8984 9027 // ROL 32bit by one once
8985 9028 instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8986 9029 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8987 9030
8988 9031 expand %{
8989 9032 rolI_eReg_imm1(dst, lshift, cr);
8990 9033 %}
8991 9034 %}
8992 9035
8993 9036 // ROL 32bit var by imm8 once
8994 9037 instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8995 9038 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8996 9039 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8997 9040
8998 9041 expand %{
8999 9042 rolI_eReg_imm8(dst, lshift, cr);
9000 9043 %}
9001 9044 %}
9002 9045
9003 9046 // ROL 32bit var by var once
9004 9047 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9005 9048 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9006 9049
9007 9050 expand %{
9008 9051 rolI_eReg_CL(dst, shift, cr);
9009 9052 %}
9010 9053 %}
9011 9054
9012 9055 // ROL 32bit var by var once
9013 9056 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9014 9057 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9015 9058
9016 9059 expand %{
9017 9060 rolI_eReg_CL(dst, shift, cr);
9018 9061 %}
9019 9062 %}
9020 9063
9021 9064 // ROR expand
9022 9065 instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9023 9066 effect(USE_DEF dst, USE shift, KILL cr);
9024 9067
9025 9068 format %{ "ROR $dst, $shift" %}
9026 9069 opcode(0xD1,0x1); /* Opcode D1 /1 */
9027 9070 ins_encode( OpcP, RegOpc( dst ) );
9028 9071 ins_pipe( ialu_reg );
9029 9072 %}
9030 9073
9031 9074 instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9032 9075 effect (USE_DEF dst, USE shift, KILL cr);
9033 9076
9034 9077 format %{ "ROR $dst, $shift" %}
9035 9078 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
9036 9079 ins_encode( RegOpcImm(dst, shift) );
9037 9080 ins_pipe( ialu_reg );
9038 9081 %}
9039 9082
9040 9083 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
9041 9084 effect(USE_DEF dst, USE shift, KILL cr);
9042 9085
9043 9086 format %{ "ROR $dst, $shift" %}
9044 9087 opcode(0xD3, 0x1); /* Opcode D3 /1 */
9045 9088 ins_encode(OpcP, RegOpc(dst));
9046 9089 ins_pipe( ialu_reg_reg );
9047 9090 %}
9048 9091 // end of ROR expand
9049 9092
9050 9093 // ROR right once
9051 9094 instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
9052 9095 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9053 9096
9054 9097 expand %{
9055 9098 rorI_eReg_imm1(dst, rshift, cr);
9056 9099 %}
9057 9100 %}
9058 9101
9059 9102 // ROR 32bit by immI8 once
9060 9103 instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
9061 9104 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9062 9105 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9063 9106
9064 9107 expand %{
9065 9108 rorI_eReg_imm8(dst, rshift, cr);
9066 9109 %}
9067 9110 %}
9068 9111
9069 9112 // ROR 32bit var by var once
9070 9113 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9071 9114 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9072 9115
9073 9116 expand %{
9074 9117 rorI_eReg_CL(dst, shift, cr);
9075 9118 %}
9076 9119 %}
9077 9120
9078 9121 // ROR 32bit var by var once
9079 9122 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9080 9123 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9081 9124
9082 9125 expand %{
9083 9126 rorI_eReg_CL(dst, shift, cr);
9084 9127 %}
9085 9128 %}
9086 9129
9087 9130 // Xor Instructions
9088 9131 // Xor Register with Register
9089 9132 instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9090 9133 match(Set dst (XorI dst src));
9091 9134 effect(KILL cr);
9092 9135
9093 9136 size(2);
9094 9137 format %{ "XOR $dst,$src" %}
9095 9138 opcode(0x33);
9096 9139 ins_encode( OpcP, RegReg( dst, src) );
9097 9140 ins_pipe( ialu_reg_reg );
9098 9141 %}
9099 9142
9100 9143 // Xor Register with Immediate -1
9101 9144 instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
9102 9145 match(Set dst (XorI dst imm));
9103 9146
9104 9147 size(2);
9105 9148 format %{ "NOT $dst" %}
9106 9149 ins_encode %{
9107 9150 __ notl($dst$$Register);
9108 9151 %}
9109 9152 ins_pipe( ialu_reg );
9110 9153 %}
9111 9154
9112 9155 // Xor Register with Immediate
9113 9156 instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9114 9157 match(Set dst (XorI dst src));
9115 9158 effect(KILL cr);
9116 9159
9117 9160 format %{ "XOR $dst,$src" %}
9118 9161 opcode(0x81,0x06); /* Opcode 81 /6 id */
9119 9162 // ins_encode( RegImm( dst, src) );
9120 9163 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9121 9164 ins_pipe( ialu_reg );
9122 9165 %}
9123 9166
9124 9167 // Xor Register with Memory
9125 9168 instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9126 9169 match(Set dst (XorI dst (LoadI src)));
9127 9170 effect(KILL cr);
9128 9171
9129 9172 ins_cost(125);
9130 9173 format %{ "XOR $dst,$src" %}
9131 9174 opcode(0x33);
9132 9175 ins_encode( OpcP, RegMem(dst, src) );
9133 9176 ins_pipe( ialu_reg_mem );
9134 9177 %}
9135 9178
9136 9179 // Xor Memory with Register
9137 9180 instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9138 9181 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9139 9182 effect(KILL cr);
9140 9183
9141 9184 ins_cost(150);
9142 9185 format %{ "XOR $dst,$src" %}
9143 9186 opcode(0x31); /* Opcode 31 /r */
9144 9187 ins_encode( OpcP, RegMem( src, dst ) );
9145 9188 ins_pipe( ialu_mem_reg );
9146 9189 %}
9147 9190
9148 9191 // Xor Memory with Immediate
9149 9192 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9150 9193 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9151 9194 effect(KILL cr);
9152 9195
9153 9196 ins_cost(125);
9154 9197 format %{ "XOR $dst,$src" %}
9155 9198 opcode(0x81,0x6); /* Opcode 81 /6 id */
9156 9199 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9157 9200 ins_pipe( ialu_mem_imm );
9158 9201 %}
9159 9202
9160 9203 //----------Convert Int to Boolean---------------------------------------------
9161 9204
9162 9205 instruct movI_nocopy(eRegI dst, eRegI src) %{
9163 9206 effect( DEF dst, USE src );
9164 9207 format %{ "MOV $dst,$src" %}
9165 9208 ins_encode( enc_Copy( dst, src) );
9166 9209 ins_pipe( ialu_reg_reg );
9167 9210 %}
9168 9211
9169 9212 instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
9170 9213 effect( USE_DEF dst, USE src, KILL cr );
9171 9214
9172 9215 size(4);
9173 9216 format %{ "NEG $dst\n\t"
9174 9217 "ADC $dst,$src" %}
9175 9218 ins_encode( neg_reg(dst),
9176 9219 OpcRegReg(0x13,dst,src) );
9177 9220 ins_pipe( ialu_reg_reg_long );
9178 9221 %}
9179 9222
9180 9223 instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
9181 9224 match(Set dst (Conv2B src));
9182 9225
9183 9226 expand %{
9184 9227 movI_nocopy(dst,src);
9185 9228 ci2b(dst,src,cr);
9186 9229 %}
9187 9230 %}
9188 9231
9189 9232 instruct movP_nocopy(eRegI dst, eRegP src) %{
9190 9233 effect( DEF dst, USE src );
9191 9234 format %{ "MOV $dst,$src" %}
9192 9235 ins_encode( enc_Copy( dst, src) );
9193 9236 ins_pipe( ialu_reg_reg );
9194 9237 %}
9195 9238
9196 9239 instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
9197 9240 effect( USE_DEF dst, USE src, KILL cr );
9198 9241 format %{ "NEG $dst\n\t"
9199 9242 "ADC $dst,$src" %}
9200 9243 ins_encode( neg_reg(dst),
9201 9244 OpcRegReg(0x13,dst,src) );
9202 9245 ins_pipe( ialu_reg_reg_long );
9203 9246 %}
9204 9247
9205 9248 instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
9206 9249 match(Set dst (Conv2B src));
9207 9250
9208 9251 expand %{
9209 9252 movP_nocopy(dst,src);
9210 9253 cp2b(dst,src,cr);
9211 9254 %}
9212 9255 %}
9213 9256
9214 9257 instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
9215 9258 match(Set dst (CmpLTMask p q));
9216 9259 effect( KILL cr );
9217 9260 ins_cost(400);
9218 9261
9219 9262 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
9220 9263 format %{ "XOR $dst,$dst\n\t"
9221 9264 "CMP $p,$q\n\t"
9222 9265 "SETlt $dst\n\t"
9223 9266 "NEG $dst" %}
9224 9267 ins_encode( OpcRegReg(0x33,dst,dst),
9225 9268 OpcRegReg(0x3B,p,q),
9226 9269 setLT_reg(dst), neg_reg(dst) );
9227 9270 ins_pipe( pipe_slow );
9228 9271 %}
9229 9272
9230 9273 instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9231 9274 match(Set dst (CmpLTMask dst zero));
9232 9275 effect( DEF dst, KILL cr );
9233 9276 ins_cost(100);
9234 9277
9235 9278 format %{ "SAR $dst,31" %}
9236 9279 opcode(0xC1, 0x7); /* C1 /7 ib */
9237 9280 ins_encode( RegOpcImm( dst, 0x1F ) );
9238 9281 ins_pipe( ialu_reg );
9239 9282 %}
9240 9283
9241 9284
9242 9285 instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9243 9286 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9244 9287 effect( KILL tmp, KILL cr );
9245 9288 ins_cost(400);
9246 9289 // annoyingly, $tmp has no edges so you cant ask for it in
9247 9290 // any format or encoding
9248 9291 format %{ "SUB $p,$q\n\t"
9249 9292 "SBB ECX,ECX\n\t"
9250 9293 "AND ECX,$y\n\t"
9251 9294 "ADD $p,ECX" %}
9252 9295 ins_encode( enc_cmpLTP(p,q,y,tmp) );
9253 9296 ins_pipe( pipe_cmplt );
9254 9297 %}
9255 9298
9256 9299 /* If I enable this, I encourage spilling in the inner loop of compress.
9257 9300 instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9258 9301 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9259 9302 effect( USE_KILL tmp, KILL cr );
9260 9303 ins_cost(400);
9261 9304
9262 9305 format %{ "SUB $p,$q\n\t"
9263 9306 "SBB ECX,ECX\n\t"
9264 9307 "AND ECX,$y\n\t"
9265 9308 "ADD $p,ECX" %}
9266 9309 ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9267 9310 %}
9268 9311 */
9269 9312
9270 9313 //----------Long Instructions------------------------------------------------
9271 9314 // Add Long Register with Register
9272 9315 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9273 9316 match(Set dst (AddL dst src));
9274 9317 effect(KILL cr);
9275 9318 ins_cost(200);
9276 9319 format %{ "ADD $dst.lo,$src.lo\n\t"
9277 9320 "ADC $dst.hi,$src.hi" %}
9278 9321 opcode(0x03, 0x13);
9279 9322 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9280 9323 ins_pipe( ialu_reg_reg_long );
9281 9324 %}
9282 9325
9283 9326 // Add Long Register with Immediate
9284 9327 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9285 9328 match(Set dst (AddL dst src));
9286 9329 effect(KILL cr);
9287 9330 format %{ "ADD $dst.lo,$src.lo\n\t"
9288 9331 "ADC $dst.hi,$src.hi" %}
9289 9332 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */
9290 9333 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9291 9334 ins_pipe( ialu_reg_long );
9292 9335 %}
9293 9336
9294 9337 // Add Long Register with Memory
9295 9338 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9296 9339 match(Set dst (AddL dst (LoadL mem)));
9297 9340 effect(KILL cr);
9298 9341 ins_cost(125);
9299 9342 format %{ "ADD $dst.lo,$mem\n\t"
9300 9343 "ADC $dst.hi,$mem+4" %}
9301 9344 opcode(0x03, 0x13);
9302 9345 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9303 9346 ins_pipe( ialu_reg_long_mem );
9304 9347 %}
9305 9348
9306 9349 // Subtract Long Register with Register.
9307 9350 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9308 9351 match(Set dst (SubL dst src));
9309 9352 effect(KILL cr);
9310 9353 ins_cost(200);
9311 9354 format %{ "SUB $dst.lo,$src.lo\n\t"
9312 9355 "SBB $dst.hi,$src.hi" %}
9313 9356 opcode(0x2B, 0x1B);
9314 9357 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9315 9358 ins_pipe( ialu_reg_reg_long );
9316 9359 %}
9317 9360
9318 9361 // Subtract Long Register with Immediate
9319 9362 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9320 9363 match(Set dst (SubL dst src));
9321 9364 effect(KILL cr);
9322 9365 format %{ "SUB $dst.lo,$src.lo\n\t"
9323 9366 "SBB $dst.hi,$src.hi" %}
9324 9367 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */
9325 9368 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9326 9369 ins_pipe( ialu_reg_long );
9327 9370 %}
9328 9371
9329 9372 // Subtract Long Register with Memory
9330 9373 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9331 9374 match(Set dst (SubL dst (LoadL mem)));
9332 9375 effect(KILL cr);
9333 9376 ins_cost(125);
9334 9377 format %{ "SUB $dst.lo,$mem\n\t"
9335 9378 "SBB $dst.hi,$mem+4" %}
9336 9379 opcode(0x2B, 0x1B);
9337 9380 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9338 9381 ins_pipe( ialu_reg_long_mem );
9339 9382 %}
9340 9383
9341 9384 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9342 9385 match(Set dst (SubL zero dst));
9343 9386 effect(KILL cr);
9344 9387 ins_cost(300);
9345 9388 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %}
9346 9389 ins_encode( neg_long(dst) );
9347 9390 ins_pipe( ialu_reg_reg_long );
9348 9391 %}
9349 9392
9350 9393 // And Long Register with Register
9351 9394 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9352 9395 match(Set dst (AndL dst src));
9353 9396 effect(KILL cr);
9354 9397 format %{ "AND $dst.lo,$src.lo\n\t"
9355 9398 "AND $dst.hi,$src.hi" %}
9356 9399 opcode(0x23,0x23);
9357 9400 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9358 9401 ins_pipe( ialu_reg_reg_long );
9359 9402 %}
9360 9403
9361 9404 // And Long Register with Immediate
9362 9405 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9363 9406 match(Set dst (AndL dst src));
9364 9407 effect(KILL cr);
9365 9408 format %{ "AND $dst.lo,$src.lo\n\t"
9366 9409 "AND $dst.hi,$src.hi" %}
9367 9410 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */
9368 9411 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9369 9412 ins_pipe( ialu_reg_long );
9370 9413 %}
9371 9414
9372 9415 // And Long Register with Memory
9373 9416 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9374 9417 match(Set dst (AndL dst (LoadL mem)));
9375 9418 effect(KILL cr);
9376 9419 ins_cost(125);
9377 9420 format %{ "AND $dst.lo,$mem\n\t"
9378 9421 "AND $dst.hi,$mem+4" %}
9379 9422 opcode(0x23, 0x23);
9380 9423 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9381 9424 ins_pipe( ialu_reg_long_mem );
9382 9425 %}
9383 9426
9384 9427 // Or Long Register with Register
9385 9428 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9386 9429 match(Set dst (OrL dst src));
9387 9430 effect(KILL cr);
9388 9431 format %{ "OR $dst.lo,$src.lo\n\t"
9389 9432 "OR $dst.hi,$src.hi" %}
9390 9433 opcode(0x0B,0x0B);
9391 9434 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9392 9435 ins_pipe( ialu_reg_reg_long );
9393 9436 %}
9394 9437
9395 9438 // Or Long Register with Immediate
9396 9439 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9397 9440 match(Set dst (OrL dst src));
9398 9441 effect(KILL cr);
9399 9442 format %{ "OR $dst.lo,$src.lo\n\t"
9400 9443 "OR $dst.hi,$src.hi" %}
9401 9444 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */
9402 9445 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9403 9446 ins_pipe( ialu_reg_long );
9404 9447 %}
9405 9448
9406 9449 // Or Long Register with Memory
9407 9450 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9408 9451 match(Set dst (OrL dst (LoadL mem)));
9409 9452 effect(KILL cr);
9410 9453 ins_cost(125);
9411 9454 format %{ "OR $dst.lo,$mem\n\t"
9412 9455 "OR $dst.hi,$mem+4" %}
9413 9456 opcode(0x0B,0x0B);
9414 9457 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9415 9458 ins_pipe( ialu_reg_long_mem );
9416 9459 %}
9417 9460
9418 9461 // Xor Long Register with Register
9419 9462 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9420 9463 match(Set dst (XorL dst src));
9421 9464 effect(KILL cr);
9422 9465 format %{ "XOR $dst.lo,$src.lo\n\t"
9423 9466 "XOR $dst.hi,$src.hi" %}
9424 9467 opcode(0x33,0x33);
9425 9468 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9426 9469 ins_pipe( ialu_reg_reg_long );
9427 9470 %}
9428 9471
9429 9472 // Xor Long Register with Immediate -1
9430 9473 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9431 9474 match(Set dst (XorL dst imm));
9432 9475 format %{ "NOT $dst.lo\n\t"
9433 9476 "NOT $dst.hi" %}
9434 9477 ins_encode %{
9435 9478 __ notl($dst$$Register);
9436 9479 __ notl(HIGH_FROM_LOW($dst$$Register));
9437 9480 %}
9438 9481 ins_pipe( ialu_reg_long );
9439 9482 %}
9440 9483
9441 9484 // Xor Long Register with Immediate
9442 9485 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9443 9486 match(Set dst (XorL dst src));
9444 9487 effect(KILL cr);
9445 9488 format %{ "XOR $dst.lo,$src.lo\n\t"
9446 9489 "XOR $dst.hi,$src.hi" %}
9447 9490 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */
9448 9491 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9449 9492 ins_pipe( ialu_reg_long );
9450 9493 %}
9451 9494
9452 9495 // Xor Long Register with Memory
9453 9496 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9454 9497 match(Set dst (XorL dst (LoadL mem)));
9455 9498 effect(KILL cr);
9456 9499 ins_cost(125);
9457 9500 format %{ "XOR $dst.lo,$mem\n\t"
9458 9501 "XOR $dst.hi,$mem+4" %}
9459 9502 opcode(0x33,0x33);
9460 9503 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9461 9504 ins_pipe( ialu_reg_long_mem );
9462 9505 %}
9463 9506
9464 9507 // Shift Left Long by 1
9465 9508 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9466 9509 predicate(UseNewLongLShift);
9467 9510 match(Set dst (LShiftL dst cnt));
9468 9511 effect(KILL cr);
9469 9512 ins_cost(100);
9470 9513 format %{ "ADD $dst.lo,$dst.lo\n\t"
9471 9514 "ADC $dst.hi,$dst.hi" %}
9472 9515 ins_encode %{
9473 9516 __ addl($dst$$Register,$dst$$Register);
9474 9517 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9475 9518 %}
9476 9519 ins_pipe( ialu_reg_long );
9477 9520 %}
9478 9521
9479 9522 // Shift Left Long by 2
9480 9523 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9481 9524 predicate(UseNewLongLShift);
9482 9525 match(Set dst (LShiftL dst cnt));
9483 9526 effect(KILL cr);
9484 9527 ins_cost(100);
9485 9528 format %{ "ADD $dst.lo,$dst.lo\n\t"
9486 9529 "ADC $dst.hi,$dst.hi\n\t"
9487 9530 "ADD $dst.lo,$dst.lo\n\t"
9488 9531 "ADC $dst.hi,$dst.hi" %}
9489 9532 ins_encode %{
9490 9533 __ addl($dst$$Register,$dst$$Register);
9491 9534 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9492 9535 __ addl($dst$$Register,$dst$$Register);
9493 9536 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9494 9537 %}
9495 9538 ins_pipe( ialu_reg_long );
9496 9539 %}
9497 9540
9498 9541 // Shift Left Long by 3
9499 9542 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9500 9543 predicate(UseNewLongLShift);
9501 9544 match(Set dst (LShiftL dst cnt));
9502 9545 effect(KILL cr);
9503 9546 ins_cost(100);
9504 9547 format %{ "ADD $dst.lo,$dst.lo\n\t"
9505 9548 "ADC $dst.hi,$dst.hi\n\t"
9506 9549 "ADD $dst.lo,$dst.lo\n\t"
9507 9550 "ADC $dst.hi,$dst.hi\n\t"
9508 9551 "ADD $dst.lo,$dst.lo\n\t"
9509 9552 "ADC $dst.hi,$dst.hi" %}
9510 9553 ins_encode %{
9511 9554 __ addl($dst$$Register,$dst$$Register);
9512 9555 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9513 9556 __ addl($dst$$Register,$dst$$Register);
9514 9557 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9515 9558 __ addl($dst$$Register,$dst$$Register);
9516 9559 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9517 9560 %}
9518 9561 ins_pipe( ialu_reg_long );
9519 9562 %}
9520 9563
9521 9564 // Shift Left Long by 1-31
9522 9565 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9523 9566 match(Set dst (LShiftL dst cnt));
9524 9567 effect(KILL cr);
9525 9568 ins_cost(200);
9526 9569 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t"
9527 9570 "SHL $dst.lo,$cnt" %}
9528 9571 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */
9529 9572 ins_encode( move_long_small_shift(dst,cnt) );
9530 9573 ins_pipe( ialu_reg_long );
9531 9574 %}
9532 9575
9533 9576 // Shift Left Long by 32-63
9534 9577 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9535 9578 match(Set dst (LShiftL dst cnt));
9536 9579 effect(KILL cr);
9537 9580 ins_cost(300);
9538 9581 format %{ "MOV $dst.hi,$dst.lo\n"
9539 9582 "\tSHL $dst.hi,$cnt-32\n"
9540 9583 "\tXOR $dst.lo,$dst.lo" %}
9541 9584 opcode(0xC1, 0x4); /* C1 /4 ib */
9542 9585 ins_encode( move_long_big_shift_clr(dst,cnt) );
9543 9586 ins_pipe( ialu_reg_long );
9544 9587 %}
9545 9588
9546 9589 // Shift Left Long by variable
9547 9590 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9548 9591 match(Set dst (LShiftL dst shift));
9549 9592 effect(KILL cr);
9550 9593 ins_cost(500+200);
9551 9594 size(17);
9552 9595 format %{ "TEST $shift,32\n\t"
9553 9596 "JEQ,s small\n\t"
9554 9597 "MOV $dst.hi,$dst.lo\n\t"
9555 9598 "XOR $dst.lo,$dst.lo\n"
9556 9599 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t"
9557 9600 "SHL $dst.lo,$shift" %}
9558 9601 ins_encode( shift_left_long( dst, shift ) );
9559 9602 ins_pipe( pipe_slow );
9560 9603 %}
9561 9604
9562 9605 // Shift Right Long by 1-31
9563 9606 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9564 9607 match(Set dst (URShiftL dst cnt));
9565 9608 effect(KILL cr);
9566 9609 ins_cost(200);
9567 9610 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9568 9611 "SHR $dst.hi,$cnt" %}
9569 9612 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */
9570 9613 ins_encode( move_long_small_shift(dst,cnt) );
9571 9614 ins_pipe( ialu_reg_long );
9572 9615 %}
9573 9616
9574 9617 // Shift Right Long by 32-63
9575 9618 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9576 9619 match(Set dst (URShiftL dst cnt));
9577 9620 effect(KILL cr);
9578 9621 ins_cost(300);
9579 9622 format %{ "MOV $dst.lo,$dst.hi\n"
9580 9623 "\tSHR $dst.lo,$cnt-32\n"
9581 9624 "\tXOR $dst.hi,$dst.hi" %}
9582 9625 opcode(0xC1, 0x5); /* C1 /5 ib */
9583 9626 ins_encode( move_long_big_shift_clr(dst,cnt) );
9584 9627 ins_pipe( ialu_reg_long );
9585 9628 %}
9586 9629
9587 9630 // Shift Right Long by variable
9588 9631 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9589 9632 match(Set dst (URShiftL dst shift));
9590 9633 effect(KILL cr);
9591 9634 ins_cost(600);
9592 9635 size(17);
9593 9636 format %{ "TEST $shift,32\n\t"
9594 9637 "JEQ,s small\n\t"
9595 9638 "MOV $dst.lo,$dst.hi\n\t"
9596 9639 "XOR $dst.hi,$dst.hi\n"
9597 9640 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9598 9641 "SHR $dst.hi,$shift" %}
9599 9642 ins_encode( shift_right_long( dst, shift ) );
9600 9643 ins_pipe( pipe_slow );
9601 9644 %}
9602 9645
9603 9646 // Shift Right Long by 1-31
9604 9647 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9605 9648 match(Set dst (RShiftL dst cnt));
9606 9649 effect(KILL cr);
9607 9650 ins_cost(200);
9608 9651 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9609 9652 "SAR $dst.hi,$cnt" %}
9610 9653 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */
9611 9654 ins_encode( move_long_small_shift(dst,cnt) );
9612 9655 ins_pipe( ialu_reg_long );
9613 9656 %}
9614 9657
9615 9658 // Shift Right Long by 32-63
9616 9659 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9617 9660 match(Set dst (RShiftL dst cnt));
9618 9661 effect(KILL cr);
9619 9662 ins_cost(300);
9620 9663 format %{ "MOV $dst.lo,$dst.hi\n"
9621 9664 "\tSAR $dst.lo,$cnt-32\n"
9622 9665 "\tSAR $dst.hi,31" %}
9623 9666 opcode(0xC1, 0x7); /* C1 /7 ib */
9624 9667 ins_encode( move_long_big_shift_sign(dst,cnt) );
9625 9668 ins_pipe( ialu_reg_long );
9626 9669 %}
9627 9670
9628 9671 // Shift Right arithmetic Long by variable
9629 9672 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9630 9673 match(Set dst (RShiftL dst shift));
9631 9674 effect(KILL cr);
9632 9675 ins_cost(600);
9633 9676 size(18);
9634 9677 format %{ "TEST $shift,32\n\t"
9635 9678 "JEQ,s small\n\t"
9636 9679 "MOV $dst.lo,$dst.hi\n\t"
9637 9680 "SAR $dst.hi,31\n"
9638 9681 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9639 9682 "SAR $dst.hi,$shift" %}
9640 9683 ins_encode( shift_right_arith_long( dst, shift ) );
9641 9684 ins_pipe( pipe_slow );
9642 9685 %}
9643 9686
9644 9687
9645 9688 //----------Double Instructions------------------------------------------------
9646 9689 // Double Math
9647 9690
9648 9691 // Compare & branch
9649 9692
9650 9693 // P6 version of float compare, sets condition codes in EFLAGS
9651 9694 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9652 9695 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9653 9696 match(Set cr (CmpD src1 src2));
9654 9697 effect(KILL rax);
9655 9698 ins_cost(150);
9656 9699 format %{ "FLD $src1\n\t"
9657 9700 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9658 9701 "JNP exit\n\t"
9659 9702 "MOV ah,1 // saw a NaN, set CF\n\t"
9660 9703 "SAHF\n"
9661 9704 "exit:\tNOP // avoid branch to branch" %}
9662 9705 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9663 9706 ins_encode( Push_Reg_D(src1),
9664 9707 OpcP, RegOpc(src2),
9665 9708 cmpF_P6_fixup );
9666 9709 ins_pipe( pipe_slow );
9667 9710 %}
9668 9711
9669 9712 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
9670 9713 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9671 9714 match(Set cr (CmpD src1 src2));
9672 9715 ins_cost(150);
9673 9716 format %{ "FLD $src1\n\t"
9674 9717 "FUCOMIP ST,$src2 // P6 instruction" %}
9675 9718 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9676 9719 ins_encode( Push_Reg_D(src1),
9677 9720 OpcP, RegOpc(src2));
9678 9721 ins_pipe( pipe_slow );
9679 9722 %}
9680 9723
9681 9724 // Compare & branch
9682 9725 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9683 9726 predicate(UseSSE<=1);
9684 9727 match(Set cr (CmpD src1 src2));
9685 9728 effect(KILL rax);
9686 9729 ins_cost(200);
9687 9730 format %{ "FLD $src1\n\t"
9688 9731 "FCOMp $src2\n\t"
9689 9732 "FNSTSW AX\n\t"
9690 9733 "TEST AX,0x400\n\t"
9691 9734 "JZ,s flags\n\t"
9692 9735 "MOV AH,1\t# unordered treat as LT\n"
9693 9736 "flags:\tSAHF" %}
9694 9737 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9695 9738 ins_encode( Push_Reg_D(src1),
9696 9739 OpcP, RegOpc(src2),
9697 9740 fpu_flags);
9698 9741 ins_pipe( pipe_slow );
9699 9742 %}
9700 9743
9701 9744 // Compare vs zero into -1,0,1
9702 9745 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
9703 9746 predicate(UseSSE<=1);
9704 9747 match(Set dst (CmpD3 src1 zero));
9705 9748 effect(KILL cr, KILL rax);
9706 9749 ins_cost(280);
9707 9750 format %{ "FTSTD $dst,$src1" %}
9708 9751 opcode(0xE4, 0xD9);
9709 9752 ins_encode( Push_Reg_D(src1),
9710 9753 OpcS, OpcP, PopFPU,
9711 9754 CmpF_Result(dst));
9712 9755 ins_pipe( pipe_slow );
9713 9756 %}
9714 9757
9715 9758 // Compare into -1,0,1
9716 9759 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
9717 9760 predicate(UseSSE<=1);
9718 9761 match(Set dst (CmpD3 src1 src2));
9719 9762 effect(KILL cr, KILL rax);
9720 9763 ins_cost(300);
9721 9764 format %{ "FCMPD $dst,$src1,$src2" %}
9722 9765 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9723 9766 ins_encode( Push_Reg_D(src1),
9724 9767 OpcP, RegOpc(src2),
9725 9768 CmpF_Result(dst));
9726 9769 ins_pipe( pipe_slow );
9727 9770 %}
9728 9771
9729 9772 // float compare and set condition codes in EFLAGS by XMM regs
9730 9773 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
9731 9774 predicate(UseSSE>=2);
9732 9775 match(Set cr (CmpD dst src));
9733 9776 effect(KILL rax);
9734 9777 ins_cost(125);
9735 9778 format %{ "COMISD $dst,$src\n"
9736 9779 "\tJNP exit\n"
9737 9780 "\tMOV ah,1 // saw a NaN, set CF\n"
9738 9781 "\tSAHF\n"
9739 9782 "exit:\tNOP // avoid branch to branch" %}
9740 9783 opcode(0x66, 0x0F, 0x2F);
9741 9784 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
9742 9785 ins_pipe( pipe_slow );
9743 9786 %}
9744 9787
9745 9788 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
9746 9789 predicate(UseSSE>=2);
9747 9790 match(Set cr (CmpD dst src));
9748 9791 ins_cost(100);
9749 9792 format %{ "COMISD $dst,$src" %}
9750 9793 opcode(0x66, 0x0F, 0x2F);
9751 9794 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
9752 9795 ins_pipe( pipe_slow );
9753 9796 %}
9754 9797
9755 9798 // float compare and set condition codes in EFLAGS by XMM regs
9756 9799 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
9757 9800 predicate(UseSSE>=2);
9758 9801 match(Set cr (CmpD dst (LoadD src)));
9759 9802 effect(KILL rax);
9760 9803 ins_cost(145);
9761 9804 format %{ "COMISD $dst,$src\n"
9762 9805 "\tJNP exit\n"
9763 9806 "\tMOV ah,1 // saw a NaN, set CF\n"
9764 9807 "\tSAHF\n"
9765 9808 "exit:\tNOP // avoid branch to branch" %}
9766 9809 opcode(0x66, 0x0F, 0x2F);
9767 9810 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
9768 9811 ins_pipe( pipe_slow );
9769 9812 %}
9770 9813
9771 9814 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
9772 9815 predicate(UseSSE>=2);
9773 9816 match(Set cr (CmpD dst (LoadD src)));
9774 9817 ins_cost(100);
9775 9818 format %{ "COMISD $dst,$src" %}
9776 9819 opcode(0x66, 0x0F, 0x2F);
9777 9820 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
9778 9821 ins_pipe( pipe_slow );
9779 9822 %}
9780 9823
9781 9824 // Compare into -1,0,1 in XMM
9782 9825 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
9783 9826 predicate(UseSSE>=2);
9784 9827 match(Set dst (CmpD3 src1 src2));
9785 9828 effect(KILL cr);
9786 9829 ins_cost(255);
9787 9830 format %{ "XOR $dst,$dst\n"
9788 9831 "\tCOMISD $src1,$src2\n"
9789 9832 "\tJP,s nan\n"
9790 9833 "\tJEQ,s exit\n"
9791 9834 "\tJA,s inc\n"
9792 9835 "nan:\tDEC $dst\n"
9793 9836 "\tJMP,s exit\n"
9794 9837 "inc:\tINC $dst\n"
9795 9838 "exit:"
9796 9839 %}
9797 9840 opcode(0x66, 0x0F, 0x2F);
9798 9841 ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
9799 9842 CmpX_Result(dst));
9800 9843 ins_pipe( pipe_slow );
9801 9844 %}
9802 9845
9803 9846 // Compare into -1,0,1 in XMM and memory
9804 9847 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
9805 9848 predicate(UseSSE>=2);
9806 9849 match(Set dst (CmpD3 src1 (LoadD mem)));
9807 9850 effect(KILL cr);
9808 9851 ins_cost(275);
9809 9852 format %{ "COMISD $src1,$mem\n"
9810 9853 "\tMOV $dst,0\t\t# do not blow flags\n"
9811 9854 "\tJP,s nan\n"
9812 9855 "\tJEQ,s exit\n"
9813 9856 "\tJA,s inc\n"
9814 9857 "nan:\tDEC $dst\n"
9815 9858 "\tJMP,s exit\n"
9816 9859 "inc:\tINC $dst\n"
9817 9860 "exit:"
9818 9861 %}
9819 9862 opcode(0x66, 0x0F, 0x2F);
9820 9863 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
9821 9864 LdImmI(dst,0x0), CmpX_Result(dst));
9822 9865 ins_pipe( pipe_slow );
9823 9866 %}
9824 9867
9825 9868
9826 9869 instruct subD_reg(regD dst, regD src) %{
9827 9870 predicate (UseSSE <=1);
9828 9871 match(Set dst (SubD dst src));
9829 9872
9830 9873 format %{ "FLD $src\n\t"
9831 9874 "DSUBp $dst,ST" %}
9832 9875 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9833 9876 ins_cost(150);
9834 9877 ins_encode( Push_Reg_D(src),
9835 9878 OpcP, RegOpc(dst) );
9836 9879 ins_pipe( fpu_reg_reg );
9837 9880 %}
9838 9881
9839 9882 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9840 9883 predicate (UseSSE <=1);
9841 9884 match(Set dst (RoundDouble (SubD src1 src2)));
9842 9885 ins_cost(250);
9843 9886
9844 9887 format %{ "FLD $src2\n\t"
9845 9888 "DSUB ST,$src1\n\t"
9846 9889 "FSTP_D $dst\t# D-round" %}
9847 9890 opcode(0xD8, 0x5);
9848 9891 ins_encode( Push_Reg_D(src2),
9849 9892 OpcP, RegOpc(src1), Pop_Mem_D(dst) );
9850 9893 ins_pipe( fpu_mem_reg_reg );
9851 9894 %}
9852 9895
9853 9896
9854 9897 instruct subD_reg_mem(regD dst, memory src) %{
9855 9898 predicate (UseSSE <=1);
9856 9899 match(Set dst (SubD dst (LoadD src)));
9857 9900 ins_cost(150);
9858 9901
9859 9902 format %{ "FLD $src\n\t"
9860 9903 "DSUBp $dst,ST" %}
9861 9904 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9862 9905 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9863 9906 OpcP, RegOpc(dst) );
9864 9907 ins_pipe( fpu_reg_mem );
9865 9908 %}
9866 9909
9867 9910 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
9868 9911 predicate (UseSSE<=1);
9869 9912 match(Set dst (AbsD src));
9870 9913 ins_cost(100);
9871 9914 format %{ "FABS" %}
9872 9915 opcode(0xE1, 0xD9);
9873 9916 ins_encode( OpcS, OpcP );
9874 9917 ins_pipe( fpu_reg_reg );
9875 9918 %}
9876 9919
9877 9920 instruct absXD_reg( regXD dst ) %{
9878 9921 predicate(UseSSE>=2);
9879 9922 match(Set dst (AbsD dst));
9880 9923 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
9881 9924 ins_encode( AbsXD_encoding(dst));
9882 9925 ins_pipe( pipe_slow );
9883 9926 %}
9884 9927
9885 9928 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
9886 9929 predicate(UseSSE<=1);
9887 9930 match(Set dst (NegD src));
9888 9931 ins_cost(100);
9889 9932 format %{ "FCHS" %}
9890 9933 opcode(0xE0, 0xD9);
9891 9934 ins_encode( OpcS, OpcP );
9892 9935 ins_pipe( fpu_reg_reg );
9893 9936 %}
9894 9937
9895 9938 instruct negXD_reg( regXD dst ) %{
9896 9939 predicate(UseSSE>=2);
9897 9940 match(Set dst (NegD dst));
9898 9941 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
9899 9942 ins_encode %{
9900 9943 __ xorpd($dst$$XMMRegister,
9901 9944 ExternalAddress((address)double_signflip_pool));
9902 9945 %}
9903 9946 ins_pipe( pipe_slow );
9904 9947 %}
9905 9948
9906 9949 instruct addD_reg(regD dst, regD src) %{
9907 9950 predicate(UseSSE<=1);
9908 9951 match(Set dst (AddD dst src));
9909 9952 format %{ "FLD $src\n\t"
9910 9953 "DADD $dst,ST" %}
9911 9954 size(4);
9912 9955 ins_cost(150);
9913 9956 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9914 9957 ins_encode( Push_Reg_D(src),
9915 9958 OpcP, RegOpc(dst) );
9916 9959 ins_pipe( fpu_reg_reg );
9917 9960 %}
9918 9961
9919 9962
9920 9963 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9921 9964 predicate(UseSSE<=1);
9922 9965 match(Set dst (RoundDouble (AddD src1 src2)));
9923 9966 ins_cost(250);
9924 9967
9925 9968 format %{ "FLD $src2\n\t"
9926 9969 "DADD ST,$src1\n\t"
9927 9970 "FSTP_D $dst\t# D-round" %}
9928 9971 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9929 9972 ins_encode( Push_Reg_D(src2),
9930 9973 OpcP, RegOpc(src1), Pop_Mem_D(dst) );
9931 9974 ins_pipe( fpu_mem_reg_reg );
9932 9975 %}
9933 9976
9934 9977
9935 9978 instruct addD_reg_mem(regD dst, memory src) %{
9936 9979 predicate(UseSSE<=1);
9937 9980 match(Set dst (AddD dst (LoadD src)));
9938 9981 ins_cost(150);
9939 9982
9940 9983 format %{ "FLD $src\n\t"
9941 9984 "DADDp $dst,ST" %}
9942 9985 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9943 9986 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9944 9987 OpcP, RegOpc(dst) );
9945 9988 ins_pipe( fpu_reg_mem );
9946 9989 %}
9947 9990
9948 9991 // add-to-memory
9949 9992 instruct addD_mem_reg(memory dst, regD src) %{
9950 9993 predicate(UseSSE<=1);
9951 9994 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9952 9995 ins_cost(150);
9953 9996
9954 9997 format %{ "FLD_D $dst\n\t"
9955 9998 "DADD ST,$src\n\t"
9956 9999 "FST_D $dst" %}
9957 10000 opcode(0xDD, 0x0);
9958 10001 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9959 10002 Opcode(0xD8), RegOpc(src),
9960 10003 set_instruction_start,
9961 10004 Opcode(0xDD), RMopc_Mem(0x03,dst) );
9962 10005 ins_pipe( fpu_reg_mem );
9963 10006 %}
9964 10007
9965 10008 instruct addD_reg_imm1(regD dst, immD1 src) %{
9966 10009 predicate(UseSSE<=1);
9967 10010 match(Set dst (AddD dst src));
9968 10011 ins_cost(125);
9969 10012 format %{ "FLD1\n\t"
9970 10013 "DADDp $dst,ST" %}
9971 10014 opcode(0xDE, 0x00);
9972 10015 ins_encode( LdImmD(src),
9973 10016 OpcP, RegOpc(dst) );
9974 10017 ins_pipe( fpu_reg );
9975 10018 %}
9976 10019
9977 10020 instruct addD_reg_imm(regD dst, immD src) %{
9978 10021 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9979 10022 match(Set dst (AddD dst src));
9980 10023 ins_cost(200);
9981 10024 format %{ "FLD_D [$src]\n\t"
9982 10025 "DADDp $dst,ST" %}
9983 10026 opcode(0xDE, 0x00); /* DE /0 */
9984 10027 ins_encode( LdImmD(src),
9985 10028 OpcP, RegOpc(dst));
9986 10029 ins_pipe( fpu_reg_mem );
9987 10030 %}
9988 10031
9989 10032 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
9990 10033 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9991 10034 match(Set dst (RoundDouble (AddD src con)));
9992 10035 ins_cost(200);
9993 10036 format %{ "FLD_D [$con]\n\t"
9994 10037 "DADD ST,$src\n\t"
9995 10038 "FSTP_D $dst\t# D-round" %}
9996 10039 opcode(0xD8, 0x00); /* D8 /0 */
9997 10040 ins_encode( LdImmD(con),
9998 10041 OpcP, RegOpc(src), Pop_Mem_D(dst));
9999 10042 ins_pipe( fpu_mem_reg_con );
10000 10043 %}
10001 10044
10002 10045 // Add two double precision floating point values in xmm
10003 10046 instruct addXD_reg(regXD dst, regXD src) %{
10004 10047 predicate(UseSSE>=2);
10005 10048 match(Set dst (AddD dst src));
10006 10049 format %{ "ADDSD $dst,$src" %}
10007 10050 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10008 10051 ins_pipe( pipe_slow );
10009 10052 %}
10010 10053
10011 10054 instruct addXD_imm(regXD dst, immXD con) %{
10012 10055 predicate(UseSSE>=2);
10013 10056 match(Set dst (AddD dst con));
10014 10057 format %{ "ADDSD $dst,[$con]" %}
10015 10058 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) );
10016 10059 ins_pipe( pipe_slow );
10017 10060 %}
10018 10061
10019 10062 instruct addXD_mem(regXD dst, memory mem) %{
10020 10063 predicate(UseSSE>=2);
10021 10064 match(Set dst (AddD dst (LoadD mem)));
10022 10065 format %{ "ADDSD $dst,$mem" %}
10023 10066 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10024 10067 ins_pipe( pipe_slow );
10025 10068 %}
10026 10069
10027 10070 // Sub two double precision floating point values in xmm
10028 10071 instruct subXD_reg(regXD dst, regXD src) %{
10029 10072 predicate(UseSSE>=2);
10030 10073 match(Set dst (SubD dst src));
10031 10074 format %{ "SUBSD $dst,$src" %}
10032 10075 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10033 10076 ins_pipe( pipe_slow );
10034 10077 %}
10035 10078
10036 10079 instruct subXD_imm(regXD dst, immXD con) %{
10037 10080 predicate(UseSSE>=2);
10038 10081 match(Set dst (SubD dst con));
10039 10082 format %{ "SUBSD $dst,[$con]" %}
10040 10083 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) );
10041 10084 ins_pipe( pipe_slow );
10042 10085 %}
10043 10086
10044 10087 instruct subXD_mem(regXD dst, memory mem) %{
10045 10088 predicate(UseSSE>=2);
10046 10089 match(Set dst (SubD dst (LoadD mem)));
10047 10090 format %{ "SUBSD $dst,$mem" %}
10048 10091 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10049 10092 ins_pipe( pipe_slow );
10050 10093 %}
10051 10094
10052 10095 // Mul two double precision floating point values in xmm
10053 10096 instruct mulXD_reg(regXD dst, regXD src) %{
10054 10097 predicate(UseSSE>=2);
10055 10098 match(Set dst (MulD dst src));
10056 10099 format %{ "MULSD $dst,$src" %}
10057 10100 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10058 10101 ins_pipe( pipe_slow );
10059 10102 %}
10060 10103
10061 10104 instruct mulXD_imm(regXD dst, immXD con) %{
10062 10105 predicate(UseSSE>=2);
10063 10106 match(Set dst (MulD dst con));
10064 10107 format %{ "MULSD $dst,[$con]" %}
10065 10108 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) );
10066 10109 ins_pipe( pipe_slow );
10067 10110 %}
10068 10111
10069 10112 instruct mulXD_mem(regXD dst, memory mem) %{
10070 10113 predicate(UseSSE>=2);
10071 10114 match(Set dst (MulD dst (LoadD mem)));
10072 10115 format %{ "MULSD $dst,$mem" %}
10073 10116 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10074 10117 ins_pipe( pipe_slow );
10075 10118 %}
10076 10119
10077 10120 // Div two double precision floating point values in xmm
10078 10121 instruct divXD_reg(regXD dst, regXD src) %{
10079 10122 predicate(UseSSE>=2);
10080 10123 match(Set dst (DivD dst src));
10081 10124 format %{ "DIVSD $dst,$src" %}
10082 10125 opcode(0xF2, 0x0F, 0x5E);
10083 10126 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10084 10127 ins_pipe( pipe_slow );
10085 10128 %}
10086 10129
10087 10130 instruct divXD_imm(regXD dst, immXD con) %{
10088 10131 predicate(UseSSE>=2);
10089 10132 match(Set dst (DivD dst con));
10090 10133 format %{ "DIVSD $dst,[$con]" %}
10091 10134 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con));
10092 10135 ins_pipe( pipe_slow );
10093 10136 %}
10094 10137
10095 10138 instruct divXD_mem(regXD dst, memory mem) %{
10096 10139 predicate(UseSSE>=2);
10097 10140 match(Set dst (DivD dst (LoadD mem)));
10098 10141 format %{ "DIVSD $dst,$mem" %}
10099 10142 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10100 10143 ins_pipe( pipe_slow );
10101 10144 %}
10102 10145
10103 10146
10104 10147 instruct mulD_reg(regD dst, regD src) %{
10105 10148 predicate(UseSSE<=1);
10106 10149 match(Set dst (MulD dst src));
10107 10150 format %{ "FLD $src\n\t"
10108 10151 "DMULp $dst,ST" %}
10109 10152 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10110 10153 ins_cost(150);
10111 10154 ins_encode( Push_Reg_D(src),
10112 10155 OpcP, RegOpc(dst) );
10113 10156 ins_pipe( fpu_reg_reg );
10114 10157 %}
10115 10158
10116 10159 // Strict FP instruction biases argument before multiply then
10117 10160 // biases result to avoid double rounding of subnormals.
10118 10161 //
10119 10162 // scale arg1 by multiplying arg1 by 2^(-15360)
10120 10163 // load arg2
10121 10164 // multiply scaled arg1 by arg2
10122 10165 // rescale product by 2^(15360)
10123 10166 //
10124 10167 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
10125 10168 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10126 10169 match(Set dst (MulD dst src));
10127 10170 ins_cost(1); // Select this instruction for all strict FP double multiplies
10128 10171
10129 10172 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10130 10173 "DMULp $dst,ST\n\t"
10131 10174 "FLD $src\n\t"
10132 10175 "DMULp $dst,ST\n\t"
10133 10176 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10134 10177 "DMULp $dst,ST\n\t" %}
10135 10178 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10136 10179 ins_encode( strictfp_bias1(dst),
10137 10180 Push_Reg_D(src),
10138 10181 OpcP, RegOpc(dst),
10139 10182 strictfp_bias2(dst) );
10140 10183 ins_pipe( fpu_reg_reg );
10141 10184 %}
10142 10185
10143 10186 instruct mulD_reg_imm(regD dst, immD src) %{
10144 10187 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10145 10188 match(Set dst (MulD dst src));
10146 10189 ins_cost(200);
10147 10190 format %{ "FLD_D [$src]\n\t"
10148 10191 "DMULp $dst,ST" %}
10149 10192 opcode(0xDE, 0x1); /* DE /1 */
10150 10193 ins_encode( LdImmD(src),
10151 10194 OpcP, RegOpc(dst) );
10152 10195 ins_pipe( fpu_reg_mem );
10153 10196 %}
10154 10197
10155 10198
10156 10199 instruct mulD_reg_mem(regD dst, memory src) %{
10157 10200 predicate( UseSSE<=1 );
10158 10201 match(Set dst (MulD dst (LoadD src)));
10159 10202 ins_cost(200);
10160 10203 format %{ "FLD_D $src\n\t"
10161 10204 "DMULp $dst,ST" %}
10162 10205 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
10163 10206 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10164 10207 OpcP, RegOpc(dst) );
10165 10208 ins_pipe( fpu_reg_mem );
10166 10209 %}
10167 10210
10168 10211 //
10169 10212 // Cisc-alternate to reg-reg multiply
10170 10213 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
10171 10214 predicate( UseSSE<=1 );
10172 10215 match(Set dst (MulD src (LoadD mem)));
10173 10216 ins_cost(250);
10174 10217 format %{ "FLD_D $mem\n\t"
10175 10218 "DMUL ST,$src\n\t"
10176 10219 "FSTP_D $dst" %}
10177 10220 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
10178 10221 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10179 10222 OpcReg_F(src),
10180 10223 Pop_Reg_D(dst) );
10181 10224 ins_pipe( fpu_reg_reg_mem );
10182 10225 %}
10183 10226
10184 10227
10185 10228 // MACRO3 -- addD a mulD
10186 10229 // This instruction is a '2-address' instruction in that the result goes
10187 10230 // back to src2. This eliminates a move from the macro; possibly the
10188 10231 // register allocator will have to add it back (and maybe not).
10189 10232 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
10190 10233 predicate( UseSSE<=1 );
10191 10234 match(Set src2 (AddD (MulD src0 src1) src2));
10192 10235 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10193 10236 "DMUL ST,$src1\n\t"
10194 10237 "DADDp $src2,ST" %}
10195 10238 ins_cost(250);
10196 10239 opcode(0xDD); /* LoadD DD /0 */
10197 10240 ins_encode( Push_Reg_F(src0),
10198 10241 FMul_ST_reg(src1),
10199 10242 FAddP_reg_ST(src2) );
10200 10243 ins_pipe( fpu_reg_reg_reg );
10201 10244 %}
10202 10245
10203 10246
10204 10247 // MACRO3 -- subD a mulD
10205 10248 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
10206 10249 predicate( UseSSE<=1 );
10207 10250 match(Set src2 (SubD (MulD src0 src1) src2));
10208 10251 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10209 10252 "DMUL ST,$src1\n\t"
10210 10253 "DSUBRp $src2,ST" %}
10211 10254 ins_cost(250);
10212 10255 ins_encode( Push_Reg_F(src0),
10213 10256 FMul_ST_reg(src1),
10214 10257 Opcode(0xDE), Opc_plus(0xE0,src2));
10215 10258 ins_pipe( fpu_reg_reg_reg );
10216 10259 %}
10217 10260
10218 10261
10219 10262 instruct divD_reg(regD dst, regD src) %{
10220 10263 predicate( UseSSE<=1 );
10221 10264 match(Set dst (DivD dst src));
10222 10265
10223 10266 format %{ "FLD $src\n\t"
10224 10267 "FDIVp $dst,ST" %}
10225 10268 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10226 10269 ins_cost(150);
10227 10270 ins_encode( Push_Reg_D(src),
10228 10271 OpcP, RegOpc(dst) );
10229 10272 ins_pipe( fpu_reg_reg );
10230 10273 %}
10231 10274
10232 10275 // Strict FP instruction biases argument before division then
10233 10276 // biases result, to avoid double rounding of subnormals.
10234 10277 //
10235 10278 // scale dividend by multiplying dividend by 2^(-15360)
10236 10279 // load divisor
10237 10280 // divide scaled dividend by divisor
10238 10281 // rescale quotient by 2^(15360)
10239 10282 //
10240 10283 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
10241 10284 predicate (UseSSE<=1);
10242 10285 match(Set dst (DivD dst src));
10243 10286 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10244 10287 ins_cost(01);
10245 10288
10246 10289 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10247 10290 "DMULp $dst,ST\n\t"
10248 10291 "FLD $src\n\t"
10249 10292 "FDIVp $dst,ST\n\t"
10250 10293 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10251 10294 "DMULp $dst,ST\n\t" %}
10252 10295 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10253 10296 ins_encode( strictfp_bias1(dst),
10254 10297 Push_Reg_D(src),
10255 10298 OpcP, RegOpc(dst),
10256 10299 strictfp_bias2(dst) );
10257 10300 ins_pipe( fpu_reg_reg );
10258 10301 %}
10259 10302
10260 10303 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10261 10304 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10262 10305 match(Set dst (RoundDouble (DivD src1 src2)));
10263 10306
10264 10307 format %{ "FLD $src1\n\t"
10265 10308 "FDIV ST,$src2\n\t"
10266 10309 "FSTP_D $dst\t# D-round" %}
10267 10310 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10268 10311 ins_encode( Push_Reg_D(src1),
10269 10312 OpcP, RegOpc(src2), Pop_Mem_D(dst) );
10270 10313 ins_pipe( fpu_mem_reg_reg );
10271 10314 %}
10272 10315
10273 10316
10274 10317 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
10275 10318 predicate(UseSSE<=1);
10276 10319 match(Set dst (ModD dst src));
10277 10320 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
10278 10321
10279 10322 format %{ "DMOD $dst,$src" %}
10280 10323 ins_cost(250);
10281 10324 ins_encode(Push_Reg_Mod_D(dst, src),
10282 10325 emitModD(),
10283 10326 Push_Result_Mod_D(src),
10284 10327 Pop_Reg_D(dst));
10285 10328 ins_pipe( pipe_slow );
10286 10329 %}
10287 10330
10288 10331 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
10289 10332 predicate(UseSSE>=2);
10290 10333 match(Set dst (ModD src0 src1));
10291 10334 effect(KILL rax, KILL cr);
10292 10335
10293 10336 format %{ "SUB ESP,8\t # DMOD\n"
10294 10337 "\tMOVSD [ESP+0],$src1\n"
10295 10338 "\tFLD_D [ESP+0]\n"
10296 10339 "\tMOVSD [ESP+0],$src0\n"
10297 10340 "\tFLD_D [ESP+0]\n"
10298 10341 "loop:\tFPREM\n"
10299 10342 "\tFWAIT\n"
10300 10343 "\tFNSTSW AX\n"
10301 10344 "\tSAHF\n"
10302 10345 "\tJP loop\n"
10303 10346 "\tFSTP_D [ESP+0]\n"
10304 10347 "\tMOVSD $dst,[ESP+0]\n"
10305 10348 "\tADD ESP,8\n"
10306 10349 "\tFSTP ST0\t # Restore FPU Stack"
10307 10350 %}
10308 10351 ins_cost(250);
10309 10352 ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
10310 10353 ins_pipe( pipe_slow );
10311 10354 %}
10312 10355
10313 10356 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
10314 10357 predicate (UseSSE<=1);
10315 10358 match(Set dst (SinD src));
10316 10359 ins_cost(1800);
10317 10360 format %{ "DSIN $dst" %}
10318 10361 opcode(0xD9, 0xFE);
10319 10362 ins_encode( OpcP, OpcS );
10320 10363 ins_pipe( pipe_slow );
10321 10364 %}
10322 10365
10323 10366 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
10324 10367 predicate (UseSSE>=2);
10325 10368 match(Set dst (SinD dst));
10326 10369 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10327 10370 ins_cost(1800);
10328 10371 format %{ "DSIN $dst" %}
10329 10372 opcode(0xD9, 0xFE);
10330 10373 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10331 10374 ins_pipe( pipe_slow );
10332 10375 %}
10333 10376
10334 10377 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
10335 10378 predicate (UseSSE<=1);
10336 10379 match(Set dst (CosD src));
10337 10380 ins_cost(1800);
10338 10381 format %{ "DCOS $dst" %}
10339 10382 opcode(0xD9, 0xFF);
10340 10383 ins_encode( OpcP, OpcS );
10341 10384 ins_pipe( pipe_slow );
10342 10385 %}
10343 10386
10344 10387 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
10345 10388 predicate (UseSSE>=2);
10346 10389 match(Set dst (CosD dst));
10347 10390 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10348 10391 ins_cost(1800);
10349 10392 format %{ "DCOS $dst" %}
10350 10393 opcode(0xD9, 0xFF);
10351 10394 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10352 10395 ins_pipe( pipe_slow );
10353 10396 %}
10354 10397
10355 10398 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10356 10399 predicate (UseSSE<=1);
10357 10400 match(Set dst(TanD src));
10358 10401 format %{ "DTAN $dst" %}
10359 10402 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
10360 10403 Opcode(0xDD), Opcode(0xD8)); // fstp st
10361 10404 ins_pipe( pipe_slow );
10362 10405 %}
10363 10406
10364 10407 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10365 10408 predicate (UseSSE>=2);
10366 10409 match(Set dst(TanD dst));
10367 10410 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10368 10411 format %{ "DTAN $dst" %}
10369 10412 ins_encode( Push_SrcXD(dst),
10370 10413 Opcode(0xD9), Opcode(0xF2), // fptan
10371 10414 Opcode(0xDD), Opcode(0xD8), // fstp st
10372 10415 Push_ResultXD(dst) );
10373 10416 ins_pipe( pipe_slow );
10374 10417 %}
10375 10418
10376 10419 instruct atanD_reg(regD dst, regD src) %{
10377 10420 predicate (UseSSE<=1);
10378 10421 match(Set dst(AtanD dst src));
10379 10422 format %{ "DATA $dst,$src" %}
10380 10423 opcode(0xD9, 0xF3);
10381 10424 ins_encode( Push_Reg_D(src),
10382 10425 OpcP, OpcS, RegOpc(dst) );
10383 10426 ins_pipe( pipe_slow );
10384 10427 %}
10385 10428
10386 10429 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10387 10430 predicate (UseSSE>=2);
10388 10431 match(Set dst(AtanD dst src));
10389 10432 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10390 10433 format %{ "DATA $dst,$src" %}
10391 10434 opcode(0xD9, 0xF3);
10392 10435 ins_encode( Push_SrcXD(src),
10393 10436 OpcP, OpcS, Push_ResultXD(dst) );
10394 10437 ins_pipe( pipe_slow );
10395 10438 %}
10396 10439
10397 10440 instruct sqrtD_reg(regD dst, regD src) %{
10398 10441 predicate (UseSSE<=1);
10399 10442 match(Set dst (SqrtD src));
10400 10443 format %{ "DSQRT $dst,$src" %}
10401 10444 opcode(0xFA, 0xD9);
10402 10445 ins_encode( Push_Reg_D(src),
10403 10446 OpcS, OpcP, Pop_Reg_D(dst) );
10404 10447 ins_pipe( pipe_slow );
10405 10448 %}
10406 10449
10407 10450 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10408 10451 predicate (UseSSE<=1);
10409 10452 match(Set Y (PowD X Y)); // Raise X to the Yth power
10410 10453 effect(KILL rax, KILL rbx, KILL rcx);
10411 10454 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10412 10455 "FLD_D $X\n\t"
10413 10456 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
10414 10457
10415 10458 "FDUP \t\t\t# Q Q\n\t"
10416 10459 "FRNDINT\t\t\t# int(Q) Q\n\t"
10417 10460 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10418 10461 "FISTP dword [ESP]\n\t"
10419 10462 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10420 10463 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10421 10464 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10422 10465 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10423 10466 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10424 10467 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10425 10468 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10426 10469 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10427 10470 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10428 10471 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10429 10472 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10430 10473 "MOV [ESP+0],0\n\t"
10431 10474 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10432 10475
10433 10476 "ADD ESP,8"
10434 10477 %}
10435 10478 ins_encode( push_stack_temp_qword,
10436 10479 Push_Reg_D(X),
10437 10480 Opcode(0xD9), Opcode(0xF1), // fyl2x
10438 10481 pow_exp_core_encoding,
10439 10482 pop_stack_temp_qword);
10440 10483 ins_pipe( pipe_slow );
10441 10484 %}
10442 10485
10443 10486 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10444 10487 predicate (UseSSE>=2);
10445 10488 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
10446 10489 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10447 10490 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10448 10491 "MOVSD [ESP],$src1\n\t"
10449 10492 "FLD FPR1,$src1\n\t"
10450 10493 "MOVSD [ESP],$src0\n\t"
10451 10494 "FLD FPR1,$src0\n\t"
10452 10495 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
10453 10496
10454 10497 "FDUP \t\t\t# Q Q\n\t"
10455 10498 "FRNDINT\t\t\t# int(Q) Q\n\t"
10456 10499 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10457 10500 "FISTP dword [ESP]\n\t"
10458 10501 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10459 10502 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10460 10503 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10461 10504 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10462 10505 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10463 10506 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10464 10507 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10465 10508 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10466 10509 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10467 10510 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10468 10511 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10469 10512 "MOV [ESP+0],0\n\t"
10470 10513 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10471 10514
10472 10515 "FST_D [ESP]\n\t"
10473 10516 "MOVSD $dst,[ESP]\n\t"
10474 10517 "ADD ESP,8"
10475 10518 %}
10476 10519 ins_encode( push_stack_temp_qword,
10477 10520 push_xmm_to_fpr1(src1),
10478 10521 push_xmm_to_fpr1(src0),
10479 10522 Opcode(0xD9), Opcode(0xF1), // fyl2x
10480 10523 pow_exp_core_encoding,
10481 10524 Push_ResultXD(dst) );
10482 10525 ins_pipe( pipe_slow );
10483 10526 %}
10484 10527
10485 10528
10486 10529 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10487 10530 predicate (UseSSE<=1);
10488 10531 match(Set dpr1 (ExpD dpr1));
10489 10532 effect(KILL rax, KILL rbx, KILL rcx);
10490 10533 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding"
10491 10534 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10492 10535 "FMULP \t\t\t# Q=X*log2(e)\n\t"
10493 10536
10494 10537 "FDUP \t\t\t# Q Q\n\t"
10495 10538 "FRNDINT\t\t\t# int(Q) Q\n\t"
10496 10539 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10497 10540 "FISTP dword [ESP]\n\t"
10498 10541 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10499 10542 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10500 10543 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10501 10544 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10502 10545 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10503 10546 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10504 10547 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10505 10548 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10506 10549 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10507 10550 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10508 10551 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10509 10552 "MOV [ESP+0],0\n\t"
10510 10553 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10511 10554
10512 10555 "ADD ESP,8"
10513 10556 %}
10514 10557 ins_encode( push_stack_temp_qword,
10515 10558 Opcode(0xD9), Opcode(0xEA), // fldl2e
10516 10559 Opcode(0xDE), Opcode(0xC9), // fmulp
10517 10560 pow_exp_core_encoding,
10518 10561 pop_stack_temp_qword);
10519 10562 ins_pipe( pipe_slow );
10520 10563 %}
10521 10564
10522 10565 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10523 10566 predicate (UseSSE>=2);
10524 10567 match(Set dst (ExpD src));
10525 10568 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10526 10569 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t"
10527 10570 "MOVSD [ESP],$src\n\t"
10528 10571 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10529 10572 "FMULP \t\t\t# Q=X*log2(e) X\n\t"
10530 10573
10531 10574 "FDUP \t\t\t# Q Q\n\t"
10532 10575 "FRNDINT\t\t\t# int(Q) Q\n\t"
10533 10576 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10534 10577 "FISTP dword [ESP]\n\t"
10535 10578 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10536 10579 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10537 10580 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10538 10581 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10539 10582 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10540 10583 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10541 10584 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10542 10585 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10543 10586 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10544 10587 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10545 10588 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10546 10589 "MOV [ESP+0],0\n\t"
10547 10590 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10548 10591
10549 10592 "FST_D [ESP]\n\t"
10550 10593 "MOVSD $dst,[ESP]\n\t"
10551 10594 "ADD ESP,8"
10552 10595 %}
10553 10596 ins_encode( Push_SrcXD(src),
10554 10597 Opcode(0xD9), Opcode(0xEA), // fldl2e
10555 10598 Opcode(0xDE), Opcode(0xC9), // fmulp
10556 10599 pow_exp_core_encoding,
10557 10600 Push_ResultXD(dst) );
10558 10601 ins_pipe( pipe_slow );
10559 10602 %}
10560 10603
10561 10604
10562 10605
10563 10606 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10564 10607 predicate (UseSSE<=1);
10565 10608 // The source Double operand on FPU stack
10566 10609 match(Set dst (Log10D src));
10567 10610 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10568 10611 // fxch ; swap ST(0) with ST(1)
10569 10612 // fyl2x ; compute log_10(2) * log_2(x)
10570 10613 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10571 10614 "FXCH \n\t"
10572 10615 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10573 10616 %}
10574 10617 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10575 10618 Opcode(0xD9), Opcode(0xC9), // fxch
10576 10619 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10577 10620
10578 10621 ins_pipe( pipe_slow );
10579 10622 %}
10580 10623
10581 10624 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10582 10625 predicate (UseSSE>=2);
10583 10626 effect(KILL cr);
10584 10627 match(Set dst (Log10D src));
10585 10628 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10586 10629 // fyl2x ; compute log_10(2) * log_2(x)
10587 10630 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10588 10631 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10589 10632 %}
10590 10633 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10591 10634 Push_SrcXD(src),
10592 10635 Opcode(0xD9), Opcode(0xF1), // fyl2x
10593 10636 Push_ResultXD(dst));
10594 10637
10595 10638 ins_pipe( pipe_slow );
10596 10639 %}
10597 10640
10598 10641 instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10599 10642 predicate (UseSSE<=1);
10600 10643 // The source Double operand on FPU stack
10601 10644 match(Set dst (LogD src));
10602 10645 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10603 10646 // fxch ; swap ST(0) with ST(1)
10604 10647 // fyl2x ; compute log_e(2) * log_2(x)
10605 10648 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10606 10649 "FXCH \n\t"
10607 10650 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10608 10651 %}
10609 10652 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10610 10653 Opcode(0xD9), Opcode(0xC9), // fxch
10611 10654 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10612 10655
10613 10656 ins_pipe( pipe_slow );
10614 10657 %}
10615 10658
10616 10659 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10617 10660 predicate (UseSSE>=2);
10618 10661 effect(KILL cr);
10619 10662 // The source and result Double operands in XMM registers
10620 10663 match(Set dst (LogD src));
10621 10664 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10622 10665 // fyl2x ; compute log_e(2) * log_2(x)
10623 10666 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10624 10667 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10625 10668 %}
10626 10669 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10627 10670 Push_SrcXD(src),
10628 10671 Opcode(0xD9), Opcode(0xF1), // fyl2x
10629 10672 Push_ResultXD(dst));
10630 10673 ins_pipe( pipe_slow );
10631 10674 %}
10632 10675
10633 10676 //-------------Float Instructions-------------------------------
10634 10677 // Float Math
10635 10678
10636 10679 // Code for float compare:
10637 10680 // fcompp();
10638 10681 // fwait(); fnstsw_ax();
10639 10682 // sahf();
10640 10683 // movl(dst, unordered_result);
10641 10684 // jcc(Assembler::parity, exit);
10642 10685 // movl(dst, less_result);
10643 10686 // jcc(Assembler::below, exit);
10644 10687 // movl(dst, equal_result);
10645 10688 // jcc(Assembler::equal, exit);
10646 10689 // movl(dst, greater_result);
10647 10690 // exit:
10648 10691
10649 10692 // P6 version of float compare, sets condition codes in EFLAGS
10650 10693 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10651 10694 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10652 10695 match(Set cr (CmpF src1 src2));
10653 10696 effect(KILL rax);
10654 10697 ins_cost(150);
10655 10698 format %{ "FLD $src1\n\t"
10656 10699 "FUCOMIP ST,$src2 // P6 instruction\n\t"
10657 10700 "JNP exit\n\t"
10658 10701 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
10659 10702 "SAHF\n"
10660 10703 "exit:\tNOP // avoid branch to branch" %}
10661 10704 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10662 10705 ins_encode( Push_Reg_D(src1),
10663 10706 OpcP, RegOpc(src2),
10664 10707 cmpF_P6_fixup );
10665 10708 ins_pipe( pipe_slow );
10666 10709 %}
10667 10710
10668 10711 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
10669 10712 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10670 10713 match(Set cr (CmpF src1 src2));
10671 10714 ins_cost(100);
10672 10715 format %{ "FLD $src1\n\t"
10673 10716 "FUCOMIP ST,$src2 // P6 instruction" %}
10674 10717 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10675 10718 ins_encode( Push_Reg_D(src1),
10676 10719 OpcP, RegOpc(src2));
10677 10720 ins_pipe( pipe_slow );
10678 10721 %}
10679 10722
10680 10723
10681 10724 // Compare & branch
10682 10725 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10683 10726 predicate(UseSSE == 0);
10684 10727 match(Set cr (CmpF src1 src2));
10685 10728 effect(KILL rax);
10686 10729 ins_cost(200);
10687 10730 format %{ "FLD $src1\n\t"
10688 10731 "FCOMp $src2\n\t"
10689 10732 "FNSTSW AX\n\t"
10690 10733 "TEST AX,0x400\n\t"
10691 10734 "JZ,s flags\n\t"
10692 10735 "MOV AH,1\t# unordered treat as LT\n"
10693 10736 "flags:\tSAHF" %}
10694 10737 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10695 10738 ins_encode( Push_Reg_D(src1),
10696 10739 OpcP, RegOpc(src2),
10697 10740 fpu_flags);
10698 10741 ins_pipe( pipe_slow );
10699 10742 %}
10700 10743
10701 10744 // Compare vs zero into -1,0,1
10702 10745 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
10703 10746 predicate(UseSSE == 0);
10704 10747 match(Set dst (CmpF3 src1 zero));
10705 10748 effect(KILL cr, KILL rax);
10706 10749 ins_cost(280);
10707 10750 format %{ "FTSTF $dst,$src1" %}
10708 10751 opcode(0xE4, 0xD9);
10709 10752 ins_encode( Push_Reg_D(src1),
10710 10753 OpcS, OpcP, PopFPU,
10711 10754 CmpF_Result(dst));
10712 10755 ins_pipe( pipe_slow );
10713 10756 %}
10714 10757
10715 10758 // Compare into -1,0,1
10716 10759 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
10717 10760 predicate(UseSSE == 0);
10718 10761 match(Set dst (CmpF3 src1 src2));
10719 10762 effect(KILL cr, KILL rax);
10720 10763 ins_cost(300);
10721 10764 format %{ "FCMPF $dst,$src1,$src2" %}
10722 10765 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10723 10766 ins_encode( Push_Reg_D(src1),
10724 10767 OpcP, RegOpc(src2),
10725 10768 CmpF_Result(dst));
10726 10769 ins_pipe( pipe_slow );
10727 10770 %}
10728 10771
10729 10772 // float compare and set condition codes in EFLAGS by XMM regs
10730 10773 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
10731 10774 predicate(UseSSE>=1);
10732 10775 match(Set cr (CmpF dst src));
10733 10776 effect(KILL rax);
10734 10777 ins_cost(145);
10735 10778 format %{ "COMISS $dst,$src\n"
10736 10779 "\tJNP exit\n"
10737 10780 "\tMOV ah,1 // saw a NaN, set CF\n"
10738 10781 "\tSAHF\n"
10739 10782 "exit:\tNOP // avoid branch to branch" %}
10740 10783 opcode(0x0F, 0x2F);
10741 10784 ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
10742 10785 ins_pipe( pipe_slow );
10743 10786 %}
10744 10787
10745 10788 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
10746 10789 predicate(UseSSE>=1);
10747 10790 match(Set cr (CmpF dst src));
10748 10791 ins_cost(100);
10749 10792 format %{ "COMISS $dst,$src" %}
10750 10793 opcode(0x0F, 0x2F);
10751 10794 ins_encode(OpcP, OpcS, RegReg(dst, src));
10752 10795 ins_pipe( pipe_slow );
10753 10796 %}
10754 10797
10755 10798 // float compare and set condition codes in EFLAGS by XMM regs
10756 10799 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
10757 10800 predicate(UseSSE>=1);
10758 10801 match(Set cr (CmpF dst (LoadF src)));
10759 10802 effect(KILL rax);
10760 10803 ins_cost(165);
10761 10804 format %{ "COMISS $dst,$src\n"
10762 10805 "\tJNP exit\n"
10763 10806 "\tMOV ah,1 // saw a NaN, set CF\n"
10764 10807 "\tSAHF\n"
10765 10808 "exit:\tNOP // avoid branch to branch" %}
10766 10809 opcode(0x0F, 0x2F);
10767 10810 ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
10768 10811 ins_pipe( pipe_slow );
10769 10812 %}
10770 10813
10771 10814 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
10772 10815 predicate(UseSSE>=1);
10773 10816 match(Set cr (CmpF dst (LoadF src)));
10774 10817 ins_cost(100);
10775 10818 format %{ "COMISS $dst,$src" %}
10776 10819 opcode(0x0F, 0x2F);
10777 10820 ins_encode(OpcP, OpcS, RegMem(dst, src));
10778 10821 ins_pipe( pipe_slow );
10779 10822 %}
10780 10823
10781 10824 // Compare into -1,0,1 in XMM
10782 10825 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
10783 10826 predicate(UseSSE>=1);
10784 10827 match(Set dst (CmpF3 src1 src2));
10785 10828 effect(KILL cr);
10786 10829 ins_cost(255);
10787 10830 format %{ "XOR $dst,$dst\n"
10788 10831 "\tCOMISS $src1,$src2\n"
10789 10832 "\tJP,s nan\n"
10790 10833 "\tJEQ,s exit\n"
10791 10834 "\tJA,s inc\n"
10792 10835 "nan:\tDEC $dst\n"
10793 10836 "\tJMP,s exit\n"
10794 10837 "inc:\tINC $dst\n"
10795 10838 "exit:"
10796 10839 %}
10797 10840 opcode(0x0F, 0x2F);
10798 10841 ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
10799 10842 ins_pipe( pipe_slow );
10800 10843 %}
10801 10844
10802 10845 // Compare into -1,0,1 in XMM and memory
10803 10846 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
10804 10847 predicate(UseSSE>=1);
10805 10848 match(Set dst (CmpF3 src1 (LoadF mem)));
10806 10849 effect(KILL cr);
10807 10850 ins_cost(275);
10808 10851 format %{ "COMISS $src1,$mem\n"
10809 10852 "\tMOV $dst,0\t\t# do not blow flags\n"
10810 10853 "\tJP,s nan\n"
10811 10854 "\tJEQ,s exit\n"
10812 10855 "\tJA,s inc\n"
10813 10856 "nan:\tDEC $dst\n"
10814 10857 "\tJMP,s exit\n"
10815 10858 "inc:\tINC $dst\n"
10816 10859 "exit:"
10817 10860 %}
10818 10861 opcode(0x0F, 0x2F);
10819 10862 ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
10820 10863 ins_pipe( pipe_slow );
10821 10864 %}
10822 10865
10823 10866 // Spill to obtain 24-bit precision
10824 10867 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
10825 10868 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10826 10869 match(Set dst (SubF src1 src2));
10827 10870
10828 10871 format %{ "FSUB $dst,$src1 - $src2" %}
10829 10872 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10830 10873 ins_encode( Push_Reg_F(src1),
10831 10874 OpcReg_F(src2),
10832 10875 Pop_Mem_F(dst) );
10833 10876 ins_pipe( fpu_mem_reg_reg );
10834 10877 %}
10835 10878 //
10836 10879 // This instruction does not round to 24-bits
10837 10880 instruct subF_reg(regF dst, regF src) %{
10838 10881 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10839 10882 match(Set dst (SubF dst src));
10840 10883
10841 10884 format %{ "FSUB $dst,$src" %}
10842 10885 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10843 10886 ins_encode( Push_Reg_F(src),
10844 10887 OpcP, RegOpc(dst) );
10845 10888 ins_pipe( fpu_reg_reg );
10846 10889 %}
10847 10890
10848 10891 // Spill to obtain 24-bit precision
10849 10892 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
10850 10893 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10851 10894 match(Set dst (AddF src1 src2));
10852 10895
10853 10896 format %{ "FADD $dst,$src1,$src2" %}
10854 10897 opcode(0xD8, 0x0); /* D8 C0+i */
10855 10898 ins_encode( Push_Reg_F(src2),
10856 10899 OpcReg_F(src1),
10857 10900 Pop_Mem_F(dst) );
10858 10901 ins_pipe( fpu_mem_reg_reg );
10859 10902 %}
10860 10903 //
10861 10904 // This instruction does not round to 24-bits
10862 10905 instruct addF_reg(regF dst, regF src) %{
10863 10906 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10864 10907 match(Set dst (AddF dst src));
10865 10908
10866 10909 format %{ "FLD $src\n\t"
10867 10910 "FADDp $dst,ST" %}
10868 10911 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10869 10912 ins_encode( Push_Reg_F(src),
10870 10913 OpcP, RegOpc(dst) );
10871 10914 ins_pipe( fpu_reg_reg );
10872 10915 %}
10873 10916
10874 10917 // Add two single precision floating point values in xmm
10875 10918 instruct addX_reg(regX dst, regX src) %{
10876 10919 predicate(UseSSE>=1);
10877 10920 match(Set dst (AddF dst src));
10878 10921 format %{ "ADDSS $dst,$src" %}
10879 10922 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10880 10923 ins_pipe( pipe_slow );
10881 10924 %}
10882 10925
10883 10926 instruct addX_imm(regX dst, immXF con) %{
10884 10927 predicate(UseSSE>=1);
10885 10928 match(Set dst (AddF dst con));
10886 10929 format %{ "ADDSS $dst,[$con]" %}
10887 10930 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) );
10888 10931 ins_pipe( pipe_slow );
10889 10932 %}
10890 10933
10891 10934 instruct addX_mem(regX dst, memory mem) %{
10892 10935 predicate(UseSSE>=1);
10893 10936 match(Set dst (AddF dst (LoadF mem)));
10894 10937 format %{ "ADDSS $dst,$mem" %}
10895 10938 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
10896 10939 ins_pipe( pipe_slow );
10897 10940 %}
10898 10941
10899 10942 // Subtract two single precision floating point values in xmm
10900 10943 instruct subX_reg(regX dst, regX src) %{
10901 10944 predicate(UseSSE>=1);
10902 10945 match(Set dst (SubF dst src));
10903 10946 format %{ "SUBSS $dst,$src" %}
10904 10947 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10905 10948 ins_pipe( pipe_slow );
10906 10949 %}
10907 10950
10908 10951 instruct subX_imm(regX dst, immXF con) %{
10909 10952 predicate(UseSSE>=1);
10910 10953 match(Set dst (SubF dst con));
10911 10954 format %{ "SUBSS $dst,[$con]" %}
10912 10955 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) );
10913 10956 ins_pipe( pipe_slow );
10914 10957 %}
10915 10958
10916 10959 instruct subX_mem(regX dst, memory mem) %{
10917 10960 predicate(UseSSE>=1);
10918 10961 match(Set dst (SubF dst (LoadF mem)));
10919 10962 format %{ "SUBSS $dst,$mem" %}
10920 10963 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10921 10964 ins_pipe( pipe_slow );
10922 10965 %}
10923 10966
10924 10967 // Multiply two single precision floating point values in xmm
10925 10968 instruct mulX_reg(regX dst, regX src) %{
10926 10969 predicate(UseSSE>=1);
10927 10970 match(Set dst (MulF dst src));
10928 10971 format %{ "MULSS $dst,$src" %}
10929 10972 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10930 10973 ins_pipe( pipe_slow );
10931 10974 %}
10932 10975
10933 10976 instruct mulX_imm(regX dst, immXF con) %{
10934 10977 predicate(UseSSE>=1);
10935 10978 match(Set dst (MulF dst con));
10936 10979 format %{ "MULSS $dst,[$con]" %}
10937 10980 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) );
10938 10981 ins_pipe( pipe_slow );
10939 10982 %}
10940 10983
10941 10984 instruct mulX_mem(regX dst, memory mem) %{
10942 10985 predicate(UseSSE>=1);
10943 10986 match(Set dst (MulF dst (LoadF mem)));
10944 10987 format %{ "MULSS $dst,$mem" %}
10945 10988 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10946 10989 ins_pipe( pipe_slow );
10947 10990 %}
10948 10991
10949 10992 // Divide two single precision floating point values in xmm
10950 10993 instruct divX_reg(regX dst, regX src) %{
10951 10994 predicate(UseSSE>=1);
10952 10995 match(Set dst (DivF dst src));
10953 10996 format %{ "DIVSS $dst,$src" %}
10954 10997 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10955 10998 ins_pipe( pipe_slow );
10956 10999 %}
10957 11000
10958 11001 instruct divX_imm(regX dst, immXF con) %{
10959 11002 predicate(UseSSE>=1);
10960 11003 match(Set dst (DivF dst con));
10961 11004 format %{ "DIVSS $dst,[$con]" %}
10962 11005 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) );
10963 11006 ins_pipe( pipe_slow );
10964 11007 %}
10965 11008
10966 11009 instruct divX_mem(regX dst, memory mem) %{
10967 11010 predicate(UseSSE>=1);
10968 11011 match(Set dst (DivF dst (LoadF mem)));
10969 11012 format %{ "DIVSS $dst,$mem" %}
10970 11013 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10971 11014 ins_pipe( pipe_slow );
10972 11015 %}
10973 11016
10974 11017 // Get the square root of a single precision floating point values in xmm
10975 11018 instruct sqrtX_reg(regX dst, regX src) %{
10976 11019 predicate(UseSSE>=1);
10977 11020 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10978 11021 format %{ "SQRTSS $dst,$src" %}
10979 11022 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
10980 11023 ins_pipe( pipe_slow );
10981 11024 %}
10982 11025
10983 11026 instruct sqrtX_mem(regX dst, memory mem) %{
10984 11027 predicate(UseSSE>=1);
10985 11028 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
10986 11029 format %{ "SQRTSS $dst,$mem" %}
10987 11030 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
10988 11031 ins_pipe( pipe_slow );
10989 11032 %}
10990 11033
10991 11034 // Get the square root of a double precision floating point values in xmm
10992 11035 instruct sqrtXD_reg(regXD dst, regXD src) %{
10993 11036 predicate(UseSSE>=2);
10994 11037 match(Set dst (SqrtD src));
10995 11038 format %{ "SQRTSD $dst,$src" %}
10996 11039 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
10997 11040 ins_pipe( pipe_slow );
10998 11041 %}
10999 11042
11000 11043 instruct sqrtXD_mem(regXD dst, memory mem) %{
11001 11044 predicate(UseSSE>=2);
11002 11045 match(Set dst (SqrtD (LoadD mem)));
11003 11046 format %{ "SQRTSD $dst,$mem" %}
11004 11047 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11005 11048 ins_pipe( pipe_slow );
11006 11049 %}
11007 11050
11008 11051 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11009 11052 predicate(UseSSE==0);
11010 11053 match(Set dst (AbsF src));
11011 11054 ins_cost(100);
11012 11055 format %{ "FABS" %}
11013 11056 opcode(0xE1, 0xD9);
11014 11057 ins_encode( OpcS, OpcP );
11015 11058 ins_pipe( fpu_reg_reg );
11016 11059 %}
11017 11060
11018 11061 instruct absX_reg(regX dst ) %{
11019 11062 predicate(UseSSE>=1);
11020 11063 match(Set dst (AbsF dst));
11021 11064 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11022 11065 ins_encode( AbsXF_encoding(dst));
11023 11066 ins_pipe( pipe_slow );
11024 11067 %}
11025 11068
11026 11069 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11027 11070 predicate(UseSSE==0);
11028 11071 match(Set dst (NegF src));
11029 11072 ins_cost(100);
11030 11073 format %{ "FCHS" %}
11031 11074 opcode(0xE0, 0xD9);
11032 11075 ins_encode( OpcS, OpcP );
11033 11076 ins_pipe( fpu_reg_reg );
11034 11077 %}
11035 11078
11036 11079 instruct negX_reg( regX dst ) %{
11037 11080 predicate(UseSSE>=1);
11038 11081 match(Set dst (NegF dst));
11039 11082 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
11040 11083 ins_encode( NegXF_encoding(dst));
11041 11084 ins_pipe( pipe_slow );
11042 11085 %}
11043 11086
11044 11087 // Cisc-alternate to addF_reg
11045 11088 // Spill to obtain 24-bit precision
11046 11089 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11047 11090 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11048 11091 match(Set dst (AddF src1 (LoadF src2)));
11049 11092
11050 11093 format %{ "FLD $src2\n\t"
11051 11094 "FADD ST,$src1\n\t"
11052 11095 "FSTP_S $dst" %}
11053 11096 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11054 11097 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11055 11098 OpcReg_F(src1),
11056 11099 Pop_Mem_F(dst) );
11057 11100 ins_pipe( fpu_mem_reg_mem );
11058 11101 %}
11059 11102 //
11060 11103 // Cisc-alternate to addF_reg
11061 11104 // This instruction does not round to 24-bits
11062 11105 instruct addF_reg_mem(regF dst, memory src) %{
11063 11106 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11064 11107 match(Set dst (AddF dst (LoadF src)));
11065 11108
11066 11109 format %{ "FADD $dst,$src" %}
11067 11110 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
11068 11111 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
11069 11112 OpcP, RegOpc(dst) );
11070 11113 ins_pipe( fpu_reg_mem );
11071 11114 %}
11072 11115
11073 11116 // // Following two instructions for _222_mpegaudio
11074 11117 // Spill to obtain 24-bit precision
11075 11118 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
11076 11119 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11077 11120 match(Set dst (AddF src1 src2));
11078 11121
11079 11122 format %{ "FADD $dst,$src1,$src2" %}
11080 11123 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11081 11124 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11082 11125 OpcReg_F(src2),
11083 11126 Pop_Mem_F(dst) );
11084 11127 ins_pipe( fpu_mem_reg_mem );
11085 11128 %}
11086 11129
11087 11130 // Cisc-spill variant
11088 11131 // Spill to obtain 24-bit precision
11089 11132 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11090 11133 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11091 11134 match(Set dst (AddF src1 (LoadF src2)));
11092 11135
11093 11136 format %{ "FADD $dst,$src1,$src2 cisc" %}
11094 11137 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11095 11138 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11096 11139 set_instruction_start,
11097 11140 OpcP, RMopc_Mem(secondary,src1),
11098 11141 Pop_Mem_F(dst) );
11099 11142 ins_pipe( fpu_mem_mem_mem );
11100 11143 %}
11101 11144
11102 11145 // Spill to obtain 24-bit precision
11103 11146 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11104 11147 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11105 11148 match(Set dst (AddF src1 src2));
11106 11149
11107 11150 format %{ "FADD $dst,$src1,$src2" %}
11108 11151 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
11109 11152 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11110 11153 set_instruction_start,
11111 11154 OpcP, RMopc_Mem(secondary,src1),
11112 11155 Pop_Mem_F(dst) );
11113 11156 ins_pipe( fpu_mem_mem_mem );
11114 11157 %}
11115 11158
11116 11159
11117 11160 // Spill to obtain 24-bit precision
11118 11161 instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
11119 11162 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11120 11163 match(Set dst (AddF src1 src2));
11121 11164 format %{ "FLD $src1\n\t"
11122 11165 "FADD $src2\n\t"
11123 11166 "FSTP_S $dst" %}
11124 11167 opcode(0xD8, 0x00); /* D8 /0 */
11125 11168 ins_encode( Push_Reg_F(src1),
11126 11169 Opc_MemImm_F(src2),
11127 11170 Pop_Mem_F(dst));
11128 11171 ins_pipe( fpu_mem_reg_con );
11129 11172 %}
11130 11173 //
11131 11174 // This instruction does not round to 24-bits
11132 11175 instruct addF_reg_imm(regF dst, regF src1, immF src2) %{
11133 11176 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11134 11177 match(Set dst (AddF src1 src2));
11135 11178 format %{ "FLD $src1\n\t"
11136 11179 "FADD $src2\n\t"
11137 11180 "FSTP_S $dst" %}
11138 11181 opcode(0xD8, 0x00); /* D8 /0 */
11139 11182 ins_encode( Push_Reg_F(src1),
11140 11183 Opc_MemImm_F(src2),
11141 11184 Pop_Reg_F(dst));
11142 11185 ins_pipe( fpu_reg_reg_con );
11143 11186 %}
11144 11187
11145 11188 // Spill to obtain 24-bit precision
11146 11189 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
11147 11190 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11148 11191 match(Set dst (MulF src1 src2));
11149 11192
11150 11193 format %{ "FLD $src1\n\t"
11151 11194 "FMUL $src2\n\t"
11152 11195 "FSTP_S $dst" %}
11153 11196 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11154 11197 ins_encode( Push_Reg_F(src1),
11155 11198 OpcReg_F(src2),
11156 11199 Pop_Mem_F(dst) );
11157 11200 ins_pipe( fpu_mem_reg_reg );
11158 11201 %}
11159 11202 //
11160 11203 // This instruction does not round to 24-bits
11161 11204 instruct mulF_reg(regF dst, regF src1, regF src2) %{
11162 11205 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11163 11206 match(Set dst (MulF src1 src2));
11164 11207
11165 11208 format %{ "FLD $src1\n\t"
11166 11209 "FMUL $src2\n\t"
11167 11210 "FSTP_S $dst" %}
11168 11211 opcode(0xD8, 0x1); /* D8 C8+i */
11169 11212 ins_encode( Push_Reg_F(src2),
11170 11213 OpcReg_F(src1),
11171 11214 Pop_Reg_F(dst) );
11172 11215 ins_pipe( fpu_reg_reg_reg );
11173 11216 %}
11174 11217
11175 11218
11176 11219 // Spill to obtain 24-bit precision
11177 11220 // Cisc-alternate to reg-reg multiply
11178 11221 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11179 11222 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11180 11223 match(Set dst (MulF src1 (LoadF src2)));
11181 11224
11182 11225 format %{ "FLD_S $src2\n\t"
11183 11226 "FMUL $src1\n\t"
11184 11227 "FSTP_S $dst" %}
11185 11228 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
11186 11229 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11187 11230 OpcReg_F(src1),
11188 11231 Pop_Mem_F(dst) );
11189 11232 ins_pipe( fpu_mem_reg_mem );
11190 11233 %}
11191 11234 //
11192 11235 // This instruction does not round to 24-bits
11193 11236 // Cisc-alternate to reg-reg multiply
11194 11237 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
11195 11238 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11196 11239 match(Set dst (MulF src1 (LoadF src2)));
11197 11240
11198 11241 format %{ "FMUL $dst,$src1,$src2" %}
11199 11242 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
11200 11243 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11201 11244 OpcReg_F(src1),
11202 11245 Pop_Reg_F(dst) );
11203 11246 ins_pipe( fpu_reg_reg_mem );
11204 11247 %}
11205 11248
11206 11249 // Spill to obtain 24-bit precision
11207 11250 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11208 11251 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11209 11252 match(Set dst (MulF src1 src2));
11210 11253
11211 11254 format %{ "FMUL $dst,$src1,$src2" %}
11212 11255 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
11213 11256 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11214 11257 set_instruction_start,
11215 11258 OpcP, RMopc_Mem(secondary,src1),
11216 11259 Pop_Mem_F(dst) );
11217 11260 ins_pipe( fpu_mem_mem_mem );
11218 11261 %}
11219 11262
11220 11263 // Spill to obtain 24-bit precision
11221 11264 instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
11222 11265 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11223 11266 match(Set dst (MulF src1 src2));
11224 11267
11225 11268 format %{ "FMULc $dst,$src1,$src2" %}
11226 11269 opcode(0xD8, 0x1); /* D8 /1*/
11227 11270 ins_encode( Push_Reg_F(src1),
11228 11271 Opc_MemImm_F(src2),
11229 11272 Pop_Mem_F(dst));
11230 11273 ins_pipe( fpu_mem_reg_con );
11231 11274 %}
11232 11275 //
11233 11276 // This instruction does not round to 24-bits
11234 11277 instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{
11235 11278 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11236 11279 match(Set dst (MulF src1 src2));
11237 11280
11238 11281 format %{ "FMULc $dst. $src1, $src2" %}
11239 11282 opcode(0xD8, 0x1); /* D8 /1*/
11240 11283 ins_encode( Push_Reg_F(src1),
11241 11284 Opc_MemImm_F(src2),
11242 11285 Pop_Reg_F(dst));
11243 11286 ins_pipe( fpu_reg_reg_con );
11244 11287 %}
11245 11288
11246 11289
11247 11290 //
11248 11291 // MACRO1 -- subsume unshared load into mulF
11249 11292 // This instruction does not round to 24-bits
11250 11293 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
11251 11294 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11252 11295 match(Set dst (MulF (LoadF mem1) src));
11253 11296
11254 11297 format %{ "FLD $mem1 ===MACRO1===\n\t"
11255 11298 "FMUL ST,$src\n\t"
11256 11299 "FSTP $dst" %}
11257 11300 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
11258 11301 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11259 11302 OpcReg_F(src),
11260 11303 Pop_Reg_F(dst) );
11261 11304 ins_pipe( fpu_reg_reg_mem );
11262 11305 %}
11263 11306 //
11264 11307 // MACRO2 -- addF a mulF which subsumed an unshared load
11265 11308 // This instruction does not round to 24-bits
11266 11309 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
11267 11310 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11268 11311 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11269 11312 ins_cost(95);
11270 11313
11271 11314 format %{ "FLD $mem1 ===MACRO2===\n\t"
11272 11315 "FMUL ST,$src1 subsume mulF left load\n\t"
11273 11316 "FADD ST,$src2\n\t"
11274 11317 "FSTP $dst" %}
11275 11318 opcode(0xD9); /* LoadF D9 /0 */
11276 11319 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11277 11320 FMul_ST_reg(src1),
11278 11321 FAdd_ST_reg(src2),
11279 11322 Pop_Reg_F(dst) );
11280 11323 ins_pipe( fpu_reg_mem_reg_reg );
11281 11324 %}
11282 11325
11283 11326 // MACRO3 -- addF a mulF
11284 11327 // This instruction does not round to 24-bits. It is a '2-address'
11285 11328 // instruction in that the result goes back to src2. This eliminates
11286 11329 // a move from the macro; possibly the register allocator will have
11287 11330 // to add it back (and maybe not).
11288 11331 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
11289 11332 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11290 11333 match(Set src2 (AddF (MulF src0 src1) src2));
11291 11334
11292 11335 format %{ "FLD $src0 ===MACRO3===\n\t"
11293 11336 "FMUL ST,$src1\n\t"
11294 11337 "FADDP $src2,ST" %}
11295 11338 opcode(0xD9); /* LoadF D9 /0 */
11296 11339 ins_encode( Push_Reg_F(src0),
11297 11340 FMul_ST_reg(src1),
11298 11341 FAddP_reg_ST(src2) );
11299 11342 ins_pipe( fpu_reg_reg_reg );
11300 11343 %}
11301 11344
11302 11345 // MACRO4 -- divF subF
11303 11346 // This instruction does not round to 24-bits
11304 11347 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
11305 11348 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11306 11349 match(Set dst (DivF (SubF src2 src1) src3));
11307 11350
11308 11351 format %{ "FLD $src2 ===MACRO4===\n\t"
11309 11352 "FSUB ST,$src1\n\t"
11310 11353 "FDIV ST,$src3\n\t"
11311 11354 "FSTP $dst" %}
11312 11355 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11313 11356 ins_encode( Push_Reg_F(src2),
11314 11357 subF_divF_encode(src1,src3),
11315 11358 Pop_Reg_F(dst) );
11316 11359 ins_pipe( fpu_reg_reg_reg_reg );
11317 11360 %}
11318 11361
11319 11362 // Spill to obtain 24-bit precision
11320 11363 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
11321 11364 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11322 11365 match(Set dst (DivF src1 src2));
11323 11366
11324 11367 format %{ "FDIV $dst,$src1,$src2" %}
11325 11368 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11326 11369 ins_encode( Push_Reg_F(src1),
11327 11370 OpcReg_F(src2),
11328 11371 Pop_Mem_F(dst) );
11329 11372 ins_pipe( fpu_mem_reg_reg );
11330 11373 %}
11331 11374 //
11332 11375 // This instruction does not round to 24-bits
11333 11376 instruct divF_reg(regF dst, regF src) %{
11334 11377 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11335 11378 match(Set dst (DivF dst src));
11336 11379
11337 11380 format %{ "FDIV $dst,$src" %}
11338 11381 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11339 11382 ins_encode( Push_Reg_F(src),
11340 11383 OpcP, RegOpc(dst) );
11341 11384 ins_pipe( fpu_reg_reg );
11342 11385 %}
11343 11386
11344 11387
11345 11388 // Spill to obtain 24-bit precision
11346 11389 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11347 11390 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11348 11391 match(Set dst (ModF src1 src2));
11349 11392 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11350 11393
11351 11394 format %{ "FMOD $dst,$src1,$src2" %}
11352 11395 ins_encode( Push_Reg_Mod_D(src1, src2),
11353 11396 emitModD(),
11354 11397 Push_Result_Mod_D(src2),
11355 11398 Pop_Mem_F(dst));
11356 11399 ins_pipe( pipe_slow );
11357 11400 %}
11358 11401 //
11359 11402 // This instruction does not round to 24-bits
11360 11403 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11361 11404 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11362 11405 match(Set dst (ModF dst src));
11363 11406 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11364 11407
11365 11408 format %{ "FMOD $dst,$src" %}
11366 11409 ins_encode(Push_Reg_Mod_D(dst, src),
11367 11410 emitModD(),
11368 11411 Push_Result_Mod_D(src),
11369 11412 Pop_Reg_F(dst));
11370 11413 ins_pipe( pipe_slow );
11371 11414 %}
11372 11415
11373 11416 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11374 11417 predicate(UseSSE>=1);
11375 11418 match(Set dst (ModF src0 src1));
11376 11419 effect(KILL rax, KILL cr);
11377 11420 format %{ "SUB ESP,4\t # FMOD\n"
11378 11421 "\tMOVSS [ESP+0],$src1\n"
11379 11422 "\tFLD_S [ESP+0]\n"
11380 11423 "\tMOVSS [ESP+0],$src0\n"
11381 11424 "\tFLD_S [ESP+0]\n"
11382 11425 "loop:\tFPREM\n"
11383 11426 "\tFWAIT\n"
11384 11427 "\tFNSTSW AX\n"
11385 11428 "\tSAHF\n"
11386 11429 "\tJP loop\n"
11387 11430 "\tFSTP_S [ESP+0]\n"
11388 11431 "\tMOVSS $dst,[ESP+0]\n"
11389 11432 "\tADD ESP,4\n"
11390 11433 "\tFSTP ST0\t # Restore FPU Stack"
11391 11434 %}
11392 11435 ins_cost(250);
11393 11436 ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11394 11437 ins_pipe( pipe_slow );
11395 11438 %}
11396 11439
11397 11440
11398 11441 //----------Arithmetic Conversion Instructions---------------------------------
11399 11442 // The conversions operations are all Alpha sorted. Please keep it that way!
11400 11443
11401 11444 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11402 11445 predicate(UseSSE==0);
11403 11446 match(Set dst (RoundFloat src));
11404 11447 ins_cost(125);
11405 11448 format %{ "FST_S $dst,$src\t# F-round" %}
11406 11449 ins_encode( Pop_Mem_Reg_F(dst, src) );
11407 11450 ins_pipe( fpu_mem_reg );
11408 11451 %}
11409 11452
11410 11453 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11411 11454 predicate(UseSSE<=1);
11412 11455 match(Set dst (RoundDouble src));
11413 11456 ins_cost(125);
11414 11457 format %{ "FST_D $dst,$src\t# D-round" %}
11415 11458 ins_encode( Pop_Mem_Reg_D(dst, src) );
11416 11459 ins_pipe( fpu_mem_reg );
11417 11460 %}
11418 11461
11419 11462 // Force rounding to 24-bit precision and 6-bit exponent
11420 11463 instruct convD2F_reg(stackSlotF dst, regD src) %{
11421 11464 predicate(UseSSE==0);
11422 11465 match(Set dst (ConvD2F src));
11423 11466 format %{ "FST_S $dst,$src\t# F-round" %}
11424 11467 expand %{
11425 11468 roundFloat_mem_reg(dst,src);
11426 11469 %}
11427 11470 %}
11428 11471
11429 11472 // Force rounding to 24-bit precision and 6-bit exponent
11430 11473 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11431 11474 predicate(UseSSE==1);
11432 11475 match(Set dst (ConvD2F src));
11433 11476 effect( KILL cr );
11434 11477 format %{ "SUB ESP,4\n\t"
11435 11478 "FST_S [ESP],$src\t# F-round\n\t"
11436 11479 "MOVSS $dst,[ESP]\n\t"
11437 11480 "ADD ESP,4" %}
11438 11481 ins_encode( D2X_encoding(dst, src) );
11439 11482 ins_pipe( pipe_slow );
11440 11483 %}
11441 11484
11442 11485 // Force rounding double precision to single precision
11443 11486 instruct convXD2X_reg(regX dst, regXD src) %{
11444 11487 predicate(UseSSE>=2);
11445 11488 match(Set dst (ConvD2F src));
11446 11489 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11447 11490 opcode(0xF2, 0x0F, 0x5A);
11448 11491 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11449 11492 ins_pipe( pipe_slow );
11450 11493 %}
11451 11494
11452 11495 instruct convF2D_reg_reg(regD dst, regF src) %{
11453 11496 predicate(UseSSE==0);
11454 11497 match(Set dst (ConvF2D src));
11455 11498 format %{ "FST_S $dst,$src\t# D-round" %}
11456 11499 ins_encode( Pop_Reg_Reg_D(dst, src));
11457 11500 ins_pipe( fpu_reg_reg );
11458 11501 %}
11459 11502
11460 11503 instruct convF2D_reg(stackSlotD dst, regF src) %{
11461 11504 predicate(UseSSE==1);
11462 11505 match(Set dst (ConvF2D src));
11463 11506 format %{ "FST_D $dst,$src\t# D-round" %}
11464 11507 expand %{
11465 11508 roundDouble_mem_reg(dst,src);
11466 11509 %}
11467 11510 %}
11468 11511
11469 11512 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11470 11513 predicate(UseSSE==1);
11471 11514 match(Set dst (ConvF2D src));
11472 11515 effect( KILL cr );
11473 11516 format %{ "SUB ESP,4\n\t"
11474 11517 "MOVSS [ESP] $src\n\t"
11475 11518 "FLD_S [ESP]\n\t"
11476 11519 "ADD ESP,4\n\t"
11477 11520 "FSTP $dst\t# D-round" %}
11478 11521 ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11479 11522 ins_pipe( pipe_slow );
11480 11523 %}
11481 11524
11482 11525 instruct convX2XD_reg(regXD dst, regX src) %{
11483 11526 predicate(UseSSE>=2);
11484 11527 match(Set dst (ConvF2D src));
11485 11528 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11486 11529 opcode(0xF3, 0x0F, 0x5A);
11487 11530 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11488 11531 ins_pipe( pipe_slow );
11489 11532 %}
11490 11533
11491 11534 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11492 11535 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11493 11536 predicate(UseSSE<=1);
11494 11537 match(Set dst (ConvD2I src));
11495 11538 effect( KILL tmp, KILL cr );
11496 11539 format %{ "FLD $src\t# Convert double to int \n\t"
11497 11540 "FLDCW trunc mode\n\t"
11498 11541 "SUB ESP,4\n\t"
11499 11542 "FISTp [ESP + #0]\n\t"
11500 11543 "FLDCW std/24-bit mode\n\t"
11501 11544 "POP EAX\n\t"
11502 11545 "CMP EAX,0x80000000\n\t"
11503 11546 "JNE,s fast\n\t"
11504 11547 "FLD_D $src\n\t"
11505 11548 "CALL d2i_wrapper\n"
11506 11549 "fast:" %}
11507 11550 ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11508 11551 ins_pipe( pipe_slow );
11509 11552 %}
11510 11553
11511 11554 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11512 11555 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11513 11556 predicate(UseSSE>=2);
11514 11557 match(Set dst (ConvD2I src));
11515 11558 effect( KILL tmp, KILL cr );
11516 11559 format %{ "CVTTSD2SI $dst, $src\n\t"
11517 11560 "CMP $dst,0x80000000\n\t"
11518 11561 "JNE,s fast\n\t"
11519 11562 "SUB ESP, 8\n\t"
11520 11563 "MOVSD [ESP], $src\n\t"
11521 11564 "FLD_D [ESP]\n\t"
11522 11565 "ADD ESP, 8\n\t"
11523 11566 "CALL d2i_wrapper\n"
11524 11567 "fast:" %}
11525 11568 opcode(0x1); // double-precision conversion
11526 11569 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11527 11570 ins_pipe( pipe_slow );
11528 11571 %}
11529 11572
11530 11573 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11531 11574 predicate(UseSSE<=1);
11532 11575 match(Set dst (ConvD2L src));
11533 11576 effect( KILL cr );
11534 11577 format %{ "FLD $src\t# Convert double to long\n\t"
11535 11578 "FLDCW trunc mode\n\t"
11536 11579 "SUB ESP,8\n\t"
11537 11580 "FISTp [ESP + #0]\n\t"
11538 11581 "FLDCW std/24-bit mode\n\t"
11539 11582 "POP EAX\n\t"
11540 11583 "POP EDX\n\t"
11541 11584 "CMP EDX,0x80000000\n\t"
11542 11585 "JNE,s fast\n\t"
11543 11586 "TEST EAX,EAX\n\t"
11544 11587 "JNE,s fast\n\t"
11545 11588 "FLD $src\n\t"
11546 11589 "CALL d2l_wrapper\n"
11547 11590 "fast:" %}
11548 11591 ins_encode( Push_Reg_D(src), D2L_encoding(src) );
11549 11592 ins_pipe( pipe_slow );
11550 11593 %}
11551 11594
11552 11595 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11553 11596 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11554 11597 predicate (UseSSE>=2);
11555 11598 match(Set dst (ConvD2L src));
11556 11599 effect( KILL cr );
11557 11600 format %{ "SUB ESP,8\t# Convert double to long\n\t"
11558 11601 "MOVSD [ESP],$src\n\t"
11559 11602 "FLD_D [ESP]\n\t"
11560 11603 "FLDCW trunc mode\n\t"
11561 11604 "FISTp [ESP + #0]\n\t"
11562 11605 "FLDCW std/24-bit mode\n\t"
11563 11606 "POP EAX\n\t"
11564 11607 "POP EDX\n\t"
11565 11608 "CMP EDX,0x80000000\n\t"
11566 11609 "JNE,s fast\n\t"
11567 11610 "TEST EAX,EAX\n\t"
11568 11611 "JNE,s fast\n\t"
11569 11612 "SUB ESP,8\n\t"
11570 11613 "MOVSD [ESP],$src\n\t"
11571 11614 "FLD_D [ESP]\n\t"
11572 11615 "CALL d2l_wrapper\n"
11573 11616 "fast:" %}
11574 11617 ins_encode( XD2L_encoding(src) );
11575 11618 ins_pipe( pipe_slow );
11576 11619 %}
11577 11620
11578 11621 // Convert a double to an int. Java semantics require we do complex
11579 11622 // manglations in the corner cases. So we set the rounding mode to
11580 11623 // 'zero', store the darned double down as an int, and reset the
11581 11624 // rounding mode to 'nearest'. The hardware stores a flag value down
11582 11625 // if we would overflow or converted a NAN; we check for this and
11583 11626 // and go the slow path if needed.
11584 11627 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11585 11628 predicate(UseSSE==0);
11586 11629 match(Set dst (ConvF2I src));
11587 11630 effect( KILL tmp, KILL cr );
11588 11631 format %{ "FLD $src\t# Convert float to int \n\t"
11589 11632 "FLDCW trunc mode\n\t"
11590 11633 "SUB ESP,4\n\t"
11591 11634 "FISTp [ESP + #0]\n\t"
11592 11635 "FLDCW std/24-bit mode\n\t"
11593 11636 "POP EAX\n\t"
11594 11637 "CMP EAX,0x80000000\n\t"
11595 11638 "JNE,s fast\n\t"
11596 11639 "FLD $src\n\t"
11597 11640 "CALL d2i_wrapper\n"
11598 11641 "fast:" %}
11599 11642 // D2I_encoding works for F2I
11600 11643 ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11601 11644 ins_pipe( pipe_slow );
11602 11645 %}
11603 11646
11604 11647 // Convert a float in xmm to an int reg.
11605 11648 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11606 11649 predicate(UseSSE>=1);
11607 11650 match(Set dst (ConvF2I src));
11608 11651 effect( KILL tmp, KILL cr );
11609 11652 format %{ "CVTTSS2SI $dst, $src\n\t"
11610 11653 "CMP $dst,0x80000000\n\t"
11611 11654 "JNE,s fast\n\t"
11612 11655 "SUB ESP, 4\n\t"
11613 11656 "MOVSS [ESP], $src\n\t"
11614 11657 "FLD [ESP]\n\t"
11615 11658 "ADD ESP, 4\n\t"
11616 11659 "CALL d2i_wrapper\n"
11617 11660 "fast:" %}
11618 11661 opcode(0x0); // single-precision conversion
11619 11662 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11620 11663 ins_pipe( pipe_slow );
11621 11664 %}
11622 11665
11623 11666 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11624 11667 predicate(UseSSE==0);
11625 11668 match(Set dst (ConvF2L src));
11626 11669 effect( KILL cr );
11627 11670 format %{ "FLD $src\t# Convert float to long\n\t"
11628 11671 "FLDCW trunc mode\n\t"
11629 11672 "SUB ESP,8\n\t"
11630 11673 "FISTp [ESP + #0]\n\t"
11631 11674 "FLDCW std/24-bit mode\n\t"
11632 11675 "POP EAX\n\t"
11633 11676 "POP EDX\n\t"
11634 11677 "CMP EDX,0x80000000\n\t"
11635 11678 "JNE,s fast\n\t"
11636 11679 "TEST EAX,EAX\n\t"
11637 11680 "JNE,s fast\n\t"
11638 11681 "FLD $src\n\t"
11639 11682 "CALL d2l_wrapper\n"
11640 11683 "fast:" %}
11641 11684 // D2L_encoding works for F2L
11642 11685 ins_encode( Push_Reg_F(src), D2L_encoding(src) );
11643 11686 ins_pipe( pipe_slow );
11644 11687 %}
11645 11688
11646 11689 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11647 11690 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
11648 11691 predicate (UseSSE>=1);
11649 11692 match(Set dst (ConvF2L src));
11650 11693 effect( KILL cr );
11651 11694 format %{ "SUB ESP,8\t# Convert float to long\n\t"
11652 11695 "MOVSS [ESP],$src\n\t"
11653 11696 "FLD_S [ESP]\n\t"
11654 11697 "FLDCW trunc mode\n\t"
11655 11698 "FISTp [ESP + #0]\n\t"
11656 11699 "FLDCW std/24-bit mode\n\t"
11657 11700 "POP EAX\n\t"
11658 11701 "POP EDX\n\t"
11659 11702 "CMP EDX,0x80000000\n\t"
11660 11703 "JNE,s fast\n\t"
11661 11704 "TEST EAX,EAX\n\t"
11662 11705 "JNE,s fast\n\t"
11663 11706 "SUB ESP,4\t# Convert float to long\n\t"
11664 11707 "MOVSS [ESP],$src\n\t"
11665 11708 "FLD_S [ESP]\n\t"
11666 11709 "ADD ESP,4\n\t"
11667 11710 "CALL d2l_wrapper\n"
11668 11711 "fast:" %}
11669 11712 ins_encode( X2L_encoding(src) );
11670 11713 ins_pipe( pipe_slow );
11671 11714 %}
11672 11715
11673 11716 instruct convI2D_reg(regD dst, stackSlotI src) %{
11674 11717 predicate( UseSSE<=1 );
11675 11718 match(Set dst (ConvI2D src));
11676 11719 format %{ "FILD $src\n\t"
11677 11720 "FSTP $dst" %}
11678 11721 opcode(0xDB, 0x0); /* DB /0 */
11679 11722 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
11680 11723 ins_pipe( fpu_reg_mem );
11681 11724 %}
11682 11725
11683 11726 instruct convI2XD_reg(regXD dst, eRegI src) %{
11684 11727 predicate( UseSSE>=2 && !UseXmmI2D );
11685 11728 match(Set dst (ConvI2D src));
11686 11729 format %{ "CVTSI2SD $dst,$src" %}
11687 11730 opcode(0xF2, 0x0F, 0x2A);
11688 11731 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11689 11732 ins_pipe( pipe_slow );
11690 11733 %}
11691 11734
11692 11735 instruct convI2XD_mem(regXD dst, memory mem) %{
11693 11736 predicate( UseSSE>=2 );
11694 11737 match(Set dst (ConvI2D (LoadI mem)));
11695 11738 format %{ "CVTSI2SD $dst,$mem" %}
11696 11739 opcode(0xF2, 0x0F, 0x2A);
11697 11740 ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
11698 11741 ins_pipe( pipe_slow );
11699 11742 %}
11700 11743
11701 11744 instruct convXI2XD_reg(regXD dst, eRegI src)
11702 11745 %{
11703 11746 predicate( UseSSE>=2 && UseXmmI2D );
11704 11747 match(Set dst (ConvI2D src));
11705 11748
11706 11749 format %{ "MOVD $dst,$src\n\t"
11707 11750 "CVTDQ2PD $dst,$dst\t# i2d" %}
11708 11751 ins_encode %{
11709 11752 __ movdl($dst$$XMMRegister, $src$$Register);
11710 11753 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11711 11754 %}
11712 11755 ins_pipe(pipe_slow); // XXX
11713 11756 %}
11714 11757
11715 11758 instruct convI2D_mem(regD dst, memory mem) %{
11716 11759 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11717 11760 match(Set dst (ConvI2D (LoadI mem)));
11718 11761 format %{ "FILD $mem\n\t"
11719 11762 "FSTP $dst" %}
11720 11763 opcode(0xDB); /* DB /0 */
11721 11764 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11722 11765 Pop_Reg_D(dst));
11723 11766 ins_pipe( fpu_reg_mem );
11724 11767 %}
11725 11768
11726 11769 // Convert a byte to a float; no rounding step needed.
11727 11770 instruct conv24I2F_reg(regF dst, stackSlotI src) %{
11728 11771 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11729 11772 match(Set dst (ConvI2F src));
11730 11773 format %{ "FILD $src\n\t"
11731 11774 "FSTP $dst" %}
11732 11775
11733 11776 opcode(0xDB, 0x0); /* DB /0 */
11734 11777 ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
11735 11778 ins_pipe( fpu_reg_mem );
11736 11779 %}
11737 11780
11738 11781 // In 24-bit mode, force exponent rounding by storing back out
11739 11782 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
11740 11783 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11741 11784 match(Set dst (ConvI2F src));
11742 11785 ins_cost(200);
11743 11786 format %{ "FILD $src\n\t"
11744 11787 "FSTP_S $dst" %}
11745 11788 opcode(0xDB, 0x0); /* DB /0 */
11746 11789 ins_encode( Push_Mem_I(src),
11747 11790 Pop_Mem_F(dst));
11748 11791 ins_pipe( fpu_mem_mem );
11749 11792 %}
11750 11793
11751 11794 // In 24-bit mode, force exponent rounding by storing back out
11752 11795 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
11753 11796 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11754 11797 match(Set dst (ConvI2F (LoadI mem)));
11755 11798 ins_cost(200);
11756 11799 format %{ "FILD $mem\n\t"
11757 11800 "FSTP_S $dst" %}
11758 11801 opcode(0xDB); /* DB /0 */
11759 11802 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11760 11803 Pop_Mem_F(dst));
11761 11804 ins_pipe( fpu_mem_mem );
11762 11805 %}
11763 11806
11764 11807 // This instruction does not round to 24-bits
11765 11808 instruct convI2F_reg(regF dst, stackSlotI src) %{
11766 11809 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11767 11810 match(Set dst (ConvI2F src));
11768 11811 format %{ "FILD $src\n\t"
11769 11812 "FSTP $dst" %}
11770 11813 opcode(0xDB, 0x0); /* DB /0 */
11771 11814 ins_encode( Push_Mem_I(src),
11772 11815 Pop_Reg_F(dst));
11773 11816 ins_pipe( fpu_reg_mem );
11774 11817 %}
11775 11818
11776 11819 // This instruction does not round to 24-bits
11777 11820 instruct convI2F_mem(regF dst, memory mem) %{
11778 11821 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11779 11822 match(Set dst (ConvI2F (LoadI mem)));
11780 11823 format %{ "FILD $mem\n\t"
11781 11824 "FSTP $dst" %}
11782 11825 opcode(0xDB); /* DB /0 */
11783 11826 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11784 11827 Pop_Reg_F(dst));
11785 11828 ins_pipe( fpu_reg_mem );
11786 11829 %}
11787 11830
11788 11831 // Convert an int to a float in xmm; no rounding step needed.
11789 11832 instruct convI2X_reg(regX dst, eRegI src) %{
11790 11833 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11791 11834 match(Set dst (ConvI2F src));
11792 11835 format %{ "CVTSI2SS $dst, $src" %}
11793 11836
11794 11837 opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */
11795 11838 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11796 11839 ins_pipe( pipe_slow );
11797 11840 %}
11798 11841
11799 11842 instruct convXI2X_reg(regX dst, eRegI src)
11800 11843 %{
11801 11844 predicate( UseSSE>=2 && UseXmmI2F );
11802 11845 match(Set dst (ConvI2F src));
11803 11846
11804 11847 format %{ "MOVD $dst,$src\n\t"
11805 11848 "CVTDQ2PS $dst,$dst\t# i2f" %}
11806 11849 ins_encode %{
11807 11850 __ movdl($dst$$XMMRegister, $src$$Register);
11808 11851 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11809 11852 %}
11810 11853 ins_pipe(pipe_slow); // XXX
11811 11854 %}
11812 11855
11813 11856 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
11814 11857 match(Set dst (ConvI2L src));
11815 11858 effect(KILL cr);
11816 11859 ins_cost(375);
11817 11860 format %{ "MOV $dst.lo,$src\n\t"
11818 11861 "MOV $dst.hi,$src\n\t"
11819 11862 "SAR $dst.hi,31" %}
11820 11863 ins_encode(convert_int_long(dst,src));
11821 11864 ins_pipe( ialu_reg_reg_long );
11822 11865 %}
11823 11866
11824 11867 // Zero-extend convert int to long
11825 11868 instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
11826 11869 match(Set dst (AndL (ConvI2L src) mask) );
11827 11870 effect( KILL flags );
11828 11871 ins_cost(250);
11829 11872 format %{ "MOV $dst.lo,$src\n\t"
11830 11873 "XOR $dst.hi,$dst.hi" %}
11831 11874 opcode(0x33); // XOR
11832 11875 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11833 11876 ins_pipe( ialu_reg_reg_long );
11834 11877 %}
11835 11878
11836 11879 // Zero-extend long
11837 11880 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11838 11881 match(Set dst (AndL src mask) );
11839 11882 effect( KILL flags );
11840 11883 ins_cost(250);
11841 11884 format %{ "MOV $dst.lo,$src.lo\n\t"
11842 11885 "XOR $dst.hi,$dst.hi\n\t" %}
11843 11886 opcode(0x33); // XOR
11844 11887 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11845 11888 ins_pipe( ialu_reg_reg_long );
11846 11889 %}
11847 11890
11848 11891 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11849 11892 predicate (UseSSE<=1);
11850 11893 match(Set dst (ConvL2D src));
11851 11894 effect( KILL cr );
11852 11895 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11853 11896 "PUSH $src.lo\n\t"
11854 11897 "FILD ST,[ESP + #0]\n\t"
11855 11898 "ADD ESP,8\n\t"
11856 11899 "FSTP_D $dst\t# D-round" %}
11857 11900 opcode(0xDF, 0x5); /* DF /5 */
11858 11901 ins_encode(convert_long_double(src), Pop_Mem_D(dst));
11859 11902 ins_pipe( pipe_slow );
11860 11903 %}
11861 11904
11862 11905 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
11863 11906 predicate (UseSSE>=2);
11864 11907 match(Set dst (ConvL2D src));
11865 11908 effect( KILL cr );
11866 11909 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11867 11910 "PUSH $src.lo\n\t"
11868 11911 "FILD_D [ESP]\n\t"
11869 11912 "FSTP_D [ESP]\n\t"
11870 11913 "MOVSD $dst,[ESP]\n\t"
11871 11914 "ADD ESP,8" %}
11872 11915 opcode(0xDF, 0x5); /* DF /5 */
11873 11916 ins_encode(convert_long_double2(src), Push_ResultXD(dst));
11874 11917 ins_pipe( pipe_slow );
11875 11918 %}
11876 11919
11877 11920 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
11878 11921 predicate (UseSSE>=1);
11879 11922 match(Set dst (ConvL2F src));
11880 11923 effect( KILL cr );
11881 11924 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11882 11925 "PUSH $src.lo\n\t"
11883 11926 "FILD_D [ESP]\n\t"
11884 11927 "FSTP_S [ESP]\n\t"
11885 11928 "MOVSS $dst,[ESP]\n\t"
11886 11929 "ADD ESP,8" %}
11887 11930 opcode(0xDF, 0x5); /* DF /5 */
11888 11931 ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
11889 11932 ins_pipe( pipe_slow );
11890 11933 %}
11891 11934
11892 11935 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11893 11936 match(Set dst (ConvL2F src));
11894 11937 effect( KILL cr );
11895 11938 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11896 11939 "PUSH $src.lo\n\t"
11897 11940 "FILD ST,[ESP + #0]\n\t"
11898 11941 "ADD ESP,8\n\t"
11899 11942 "FSTP_S $dst\t# F-round" %}
11900 11943 opcode(0xDF, 0x5); /* DF /5 */
11901 11944 ins_encode(convert_long_double(src), Pop_Mem_F(dst));
11902 11945 ins_pipe( pipe_slow );
11903 11946 %}
11904 11947
11905 11948 instruct convL2I_reg( eRegI dst, eRegL src ) %{
11906 11949 match(Set dst (ConvL2I src));
11907 11950 effect( DEF dst, USE src );
11908 11951 format %{ "MOV $dst,$src.lo" %}
11909 11952 ins_encode(enc_CopyL_Lo(dst,src));
11910 11953 ins_pipe( ialu_reg_reg );
11911 11954 %}
11912 11955
11913 11956
11914 11957 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
11915 11958 match(Set dst (MoveF2I src));
11916 11959 effect( DEF dst, USE src );
11917 11960 ins_cost(100);
11918 11961 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
11919 11962 opcode(0x8B);
11920 11963 ins_encode( OpcP, RegMem(dst,src));
11921 11964 ins_pipe( ialu_reg_mem );
11922 11965 %}
11923 11966
11924 11967 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11925 11968 predicate(UseSSE==0);
11926 11969 match(Set dst (MoveF2I src));
11927 11970 effect( DEF dst, USE src );
11928 11971
11929 11972 ins_cost(125);
11930 11973 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
11931 11974 ins_encode( Pop_Mem_Reg_F(dst, src) );
11932 11975 ins_pipe( fpu_mem_reg );
11933 11976 %}
11934 11977
11935 11978 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
11936 11979 predicate(UseSSE>=1);
11937 11980 match(Set dst (MoveF2I src));
11938 11981 effect( DEF dst, USE src );
11939 11982
11940 11983 ins_cost(95);
11941 11984 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
11942 11985 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
11943 11986 ins_pipe( pipe_slow );
11944 11987 %}
11945 11988
11946 11989 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
11947 11990 predicate(UseSSE>=2);
11948 11991 match(Set dst (MoveF2I src));
11949 11992 effect( DEF dst, USE src );
11950 11993 ins_cost(85);
11951 11994 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
11952 11995 ins_encode( MovX2I_reg(dst, src));
11953 11996 ins_pipe( pipe_slow );
11954 11997 %}
11955 11998
11956 11999 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
11957 12000 match(Set dst (MoveI2F src));
11958 12001 effect( DEF dst, USE src );
11959 12002
11960 12003 ins_cost(100);
11961 12004 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
11962 12005 opcode(0x89);
11963 12006 ins_encode( OpcPRegSS( dst, src ) );
11964 12007 ins_pipe( ialu_mem_reg );
11965 12008 %}
11966 12009
11967 12010
11968 12011 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11969 12012 predicate(UseSSE==0);
11970 12013 match(Set dst (MoveI2F src));
11971 12014 effect(DEF dst, USE src);
11972 12015
11973 12016 ins_cost(125);
11974 12017 format %{ "FLD_S $src\n\t"
11975 12018 "FSTP $dst\t# MoveI2F_stack_reg" %}
11976 12019 opcode(0xD9); /* D9 /0, FLD m32real */
11977 12020 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11978 12021 Pop_Reg_F(dst) );
11979 12022 ins_pipe( fpu_reg_mem );
11980 12023 %}
11981 12024
11982 12025 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
11983 12026 predicate(UseSSE>=1);
11984 12027 match(Set dst (MoveI2F src));
11985 12028 effect( DEF dst, USE src );
11986 12029
11987 12030 ins_cost(95);
11988 12031 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
11989 12032 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
11990 12033 ins_pipe( pipe_slow );
11991 12034 %}
11992 12035
11993 12036 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
11994 12037 predicate(UseSSE>=2);
11995 12038 match(Set dst (MoveI2F src));
11996 12039 effect( DEF dst, USE src );
11997 12040
11998 12041 ins_cost(85);
11999 12042 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
12000 12043 ins_encode( MovI2X_reg(dst, src) );
12001 12044 ins_pipe( pipe_slow );
12002 12045 %}
12003 12046
12004 12047 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12005 12048 match(Set dst (MoveD2L src));
12006 12049 effect(DEF dst, USE src);
12007 12050
12008 12051 ins_cost(250);
12009 12052 format %{ "MOV $dst.lo,$src\n\t"
12010 12053 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12011 12054 opcode(0x8B, 0x8B);
12012 12055 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12013 12056 ins_pipe( ialu_mem_long_reg );
12014 12057 %}
12015 12058
12016 12059 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12017 12060 predicate(UseSSE<=1);
12018 12061 match(Set dst (MoveD2L src));
12019 12062 effect(DEF dst, USE src);
12020 12063
12021 12064 ins_cost(125);
12022 12065 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
12023 12066 ins_encode( Pop_Mem_Reg_D(dst, src) );
12024 12067 ins_pipe( fpu_mem_reg );
12025 12068 %}
12026 12069
12027 12070 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12028 12071 predicate(UseSSE>=2);
12029 12072 match(Set dst (MoveD2L src));
12030 12073 effect(DEF dst, USE src);
12031 12074 ins_cost(95);
12032 12075
12033 12076 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
12034 12077 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
12035 12078 ins_pipe( pipe_slow );
12036 12079 %}
12037 12080
12038 12081 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12039 12082 predicate(UseSSE>=2);
12040 12083 match(Set dst (MoveD2L src));
12041 12084 effect(DEF dst, USE src, TEMP tmp);
12042 12085 ins_cost(85);
12043 12086 format %{ "MOVD $dst.lo,$src\n\t"
12044 12087 "PSHUFLW $tmp,$src,0x4E\n\t"
12045 12088 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12046 12089 ins_encode( MovXD2L_reg(dst, src, tmp) );
12047 12090 ins_pipe( pipe_slow );
12048 12091 %}
12049 12092
12050 12093 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12051 12094 match(Set dst (MoveL2D src));
12052 12095 effect(DEF dst, USE src);
12053 12096
12054 12097 ins_cost(200);
12055 12098 format %{ "MOV $dst,$src.lo\n\t"
12056 12099 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12057 12100 opcode(0x89, 0x89);
12058 12101 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12059 12102 ins_pipe( ialu_mem_long_reg );
12060 12103 %}
12061 12104
12062 12105
12063 12106 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12064 12107 predicate(UseSSE<=1);
12065 12108 match(Set dst (MoveL2D src));
12066 12109 effect(DEF dst, USE src);
12067 12110 ins_cost(125);
12068 12111
12069 12112 format %{ "FLD_D $src\n\t"
12070 12113 "FSTP $dst\t# MoveL2D_stack_reg" %}
12071 12114 opcode(0xDD); /* DD /0, FLD m64real */
12072 12115 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12073 12116 Pop_Reg_D(dst) );
12074 12117 ins_pipe( fpu_reg_mem );
12075 12118 %}
12076 12119
12077 12120
12078 12121 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12079 12122 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12080 12123 match(Set dst (MoveL2D src));
12081 12124 effect(DEF dst, USE src);
12082 12125
12083 12126 ins_cost(95);
12084 12127 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12085 12128 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12086 12129 ins_pipe( pipe_slow );
12087 12130 %}
12088 12131
12089 12132 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12090 12133 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12091 12134 match(Set dst (MoveL2D src));
12092 12135 effect(DEF dst, USE src);
12093 12136
12094 12137 ins_cost(95);
12095 12138 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12096 12139 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
12097 12140 ins_pipe( pipe_slow );
12098 12141 %}
12099 12142
12100 12143 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12101 12144 predicate(UseSSE>=2);
12102 12145 match(Set dst (MoveL2D src));
12103 12146 effect(TEMP dst, USE src, TEMP tmp);
12104 12147 ins_cost(85);
12105 12148 format %{ "MOVD $dst,$src.lo\n\t"
12106 12149 "MOVD $tmp,$src.hi\n\t"
12107 12150 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12108 12151 ins_encode( MovL2XD_reg(dst, src, tmp) );
12109 12152 ins_pipe( pipe_slow );
12110 12153 %}
12111 12154
12112 12155 // Replicate scalar to packed byte (1 byte) values in xmm
12113 12156 instruct Repl8B_reg(regXD dst, regXD src) %{
12114 12157 predicate(UseSSE>=2);
12115 12158 match(Set dst (Replicate8B src));
12116 12159 format %{ "MOVDQA $dst,$src\n\t"
12117 12160 "PUNPCKLBW $dst,$dst\n\t"
12118 12161 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12119 12162 ins_encode( pshufd_8x8(dst, src));
12120 12163 ins_pipe( pipe_slow );
12121 12164 %}
12122 12165
12123 12166 // Replicate scalar to packed byte (1 byte) values in xmm
12124 12167 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12125 12168 predicate(UseSSE>=2);
12126 12169 match(Set dst (Replicate8B src));
12127 12170 format %{ "MOVD $dst,$src\n\t"
12128 12171 "PUNPCKLBW $dst,$dst\n\t"
12129 12172 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12130 12173 ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
12131 12174 ins_pipe( pipe_slow );
12132 12175 %}
12133 12176
12134 12177 // Replicate scalar zero to packed byte (1 byte) values in xmm
12135 12178 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12136 12179 predicate(UseSSE>=2);
12137 12180 match(Set dst (Replicate8B zero));
12138 12181 format %{ "PXOR $dst,$dst\t! replicate8B" %}
12139 12182 ins_encode( pxor(dst, dst));
12140 12183 ins_pipe( fpu_reg_reg );
12141 12184 %}
12142 12185
12143 12186 // Replicate scalar to packed shore (2 byte) values in xmm
12144 12187 instruct Repl4S_reg(regXD dst, regXD src) %{
12145 12188 predicate(UseSSE>=2);
12146 12189 match(Set dst (Replicate4S src));
12147 12190 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12148 12191 ins_encode( pshufd_4x16(dst, src));
12149 12192 ins_pipe( fpu_reg_reg );
12150 12193 %}
12151 12194
12152 12195 // Replicate scalar to packed shore (2 byte) values in xmm
12153 12196 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12154 12197 predicate(UseSSE>=2);
12155 12198 match(Set dst (Replicate4S src));
12156 12199 format %{ "MOVD $dst,$src\n\t"
12157 12200 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12158 12201 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12159 12202 ins_pipe( fpu_reg_reg );
12160 12203 %}
12161 12204
12162 12205 // Replicate scalar zero to packed short (2 byte) values in xmm
12163 12206 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12164 12207 predicate(UseSSE>=2);
12165 12208 match(Set dst (Replicate4S zero));
12166 12209 format %{ "PXOR $dst,$dst\t! replicate4S" %}
12167 12210 ins_encode( pxor(dst, dst));
12168 12211 ins_pipe( fpu_reg_reg );
12169 12212 %}
12170 12213
12171 12214 // Replicate scalar to packed char (2 byte) values in xmm
12172 12215 instruct Repl4C_reg(regXD dst, regXD src) %{
12173 12216 predicate(UseSSE>=2);
12174 12217 match(Set dst (Replicate4C src));
12175 12218 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12176 12219 ins_encode( pshufd_4x16(dst, src));
12177 12220 ins_pipe( fpu_reg_reg );
12178 12221 %}
12179 12222
12180 12223 // Replicate scalar to packed char (2 byte) values in xmm
12181 12224 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12182 12225 predicate(UseSSE>=2);
12183 12226 match(Set dst (Replicate4C src));
12184 12227 format %{ "MOVD $dst,$src\n\t"
12185 12228 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12186 12229 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12187 12230 ins_pipe( fpu_reg_reg );
12188 12231 %}
12189 12232
12190 12233 // Replicate scalar zero to packed char (2 byte) values in xmm
12191 12234 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12192 12235 predicate(UseSSE>=2);
12193 12236 match(Set dst (Replicate4C zero));
12194 12237 format %{ "PXOR $dst,$dst\t! replicate4C" %}
12195 12238 ins_encode( pxor(dst, dst));
12196 12239 ins_pipe( fpu_reg_reg );
12197 12240 %}
12198 12241
12199 12242 // Replicate scalar to packed integer (4 byte) values in xmm
12200 12243 instruct Repl2I_reg(regXD dst, regXD src) %{
12201 12244 predicate(UseSSE>=2);
12202 12245 match(Set dst (Replicate2I src));
12203 12246 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12204 12247 ins_encode( pshufd(dst, src, 0x00));
12205 12248 ins_pipe( fpu_reg_reg );
12206 12249 %}
12207 12250
12208 12251 // Replicate scalar to packed integer (4 byte) values in xmm
12209 12252 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12210 12253 predicate(UseSSE>=2);
12211 12254 match(Set dst (Replicate2I src));
12212 12255 format %{ "MOVD $dst,$src\n\t"
12213 12256 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12214 12257 ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12215 12258 ins_pipe( fpu_reg_reg );
12216 12259 %}
12217 12260
12218 12261 // Replicate scalar zero to packed integer (2 byte) values in xmm
12219 12262 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12220 12263 predicate(UseSSE>=2);
12221 12264 match(Set dst (Replicate2I zero));
12222 12265 format %{ "PXOR $dst,$dst\t! replicate2I" %}
12223 12266 ins_encode( pxor(dst, dst));
12224 12267 ins_pipe( fpu_reg_reg );
12225 12268 %}
12226 12269
12227 12270 // Replicate scalar to packed single precision floating point values in xmm
12228 12271 instruct Repl2F_reg(regXD dst, regXD src) %{
12229 12272 predicate(UseSSE>=2);
12230 12273 match(Set dst (Replicate2F src));
12231 12274 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12232 12275 ins_encode( pshufd(dst, src, 0xe0));
12233 12276 ins_pipe( fpu_reg_reg );
12234 12277 %}
12235 12278
12236 12279 // Replicate scalar to packed single precision floating point values in xmm
12237 12280 instruct Repl2F_regX(regXD dst, regX src) %{
12238 12281 predicate(UseSSE>=2);
12239 12282 match(Set dst (Replicate2F src));
12240 12283 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12241 12284 ins_encode( pshufd(dst, src, 0xe0));
12242 12285 ins_pipe( fpu_reg_reg );
12243 12286 %}
12244 12287
12245 12288 // Replicate scalar to packed single precision floating point values in xmm
12246 12289 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12247 12290 predicate(UseSSE>=2);
12248 12291 match(Set dst (Replicate2F zero));
12249 12292 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12250 12293 ins_encode( pxor(dst, dst));
12251 12294 ins_pipe( fpu_reg_reg );
12252 12295 %}
12253 12296
12254 12297 // =======================================================================
12255 12298 // fast clearing of an array
12256 12299 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12257 12300 match(Set dummy (ClearArray cnt base));
12258 12301 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12259 12302 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
12260 12303 "XOR EAX,EAX\n\t"
12261 12304 "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12262 12305 opcode(0,0x4);
12263 12306 ins_encode( Opcode(0xD1), RegOpc(ECX),
12264 12307 OpcRegReg(0x33,EAX,EAX),
12265 12308 Opcode(0xF3), Opcode(0xAB) );
12266 12309 ins_pipe( pipe_slow );
12267 12310 %}
12268 12311
12269 12312 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2,
12270 12313 eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{
12271 12314 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12272 12315 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12273 12316
12274 12317 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1, $tmp2" %}
12275 12318 ins_encode %{
12276 12319 __ string_compare($str1$$Register, $str2$$Register,
12277 12320 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12278 12321 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12279 12322 %}
12280 12323 ins_pipe( pipe_slow );
12281 12324 %}
12282 12325
12283 12326 // fast string equals
12284 12327 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12285 12328 regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12286 12329 match(Set result (StrEquals (Binary str1 str2) cnt));
12287 12330 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12288 12331
12289 12332 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12290 12333 ins_encode %{
12291 12334 __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12292 12335 $cnt$$Register, $result$$Register, $tmp3$$Register,
12293 12336 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12294 12337 %}
12295 12338 ins_pipe( pipe_slow );
12296 12339 %}
12297 12340
12298 12341 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12299 12342 eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12300 12343 predicate(UseSSE42Intrinsics);
12301 12344 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12302 12345 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
12303 12346
12304 12347 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp2, $tmp1" %}
12305 12348 ins_encode %{
12306 12349 __ string_indexof($str1$$Register, $str2$$Register,
12307 12350 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12308 12351 $tmp1$$XMMRegister, $tmp2$$Register);
12309 12352 %}
12310 12353 ins_pipe( pipe_slow );
12311 12354 %}
12312 12355
12313 12356 // fast array equals
12314 12357 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12315 12358 regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12316 12359 %{
12317 12360 match(Set result (AryEq ary1 ary2));
12318 12361 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12319 12362 //ins_cost(300);
12320 12363
12321 12364 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12322 12365 ins_encode %{
12323 12366 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12324 12367 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12325 12368 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12326 12369 %}
12327 12370 ins_pipe( pipe_slow );
12328 12371 %}
12329 12372
12330 12373 //----------Control Flow Instructions------------------------------------------
12331 12374 // Signed compare Instructions
12332 12375 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12333 12376 match(Set cr (CmpI op1 op2));
12334 12377 effect( DEF cr, USE op1, USE op2 );
12335 12378 format %{ "CMP $op1,$op2" %}
12336 12379 opcode(0x3B); /* Opcode 3B /r */
12337 12380 ins_encode( OpcP, RegReg( op1, op2) );
12338 12381 ins_pipe( ialu_cr_reg_reg );
12339 12382 %}
12340 12383
12341 12384 instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12342 12385 match(Set cr (CmpI op1 op2));
12343 12386 effect( DEF cr, USE op1 );
12344 12387 format %{ "CMP $op1,$op2" %}
12345 12388 opcode(0x81,0x07); /* Opcode 81 /7 */
12346 12389 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
12347 12390 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12348 12391 ins_pipe( ialu_cr_reg_imm );
12349 12392 %}
12350 12393
12351 12394 // Cisc-spilled version of cmpI_eReg
12352 12395 instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
12353 12396 match(Set cr (CmpI op1 (LoadI op2)));
12354 12397
12355 12398 format %{ "CMP $op1,$op2" %}
12356 12399 ins_cost(500);
12357 12400 opcode(0x3B); /* Opcode 3B /r */
12358 12401 ins_encode( OpcP, RegMem( op1, op2) );
12359 12402 ins_pipe( ialu_cr_reg_mem );
12360 12403 %}
12361 12404
12362 12405 instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
12363 12406 match(Set cr (CmpI src zero));
12364 12407 effect( DEF cr, USE src );
12365 12408
12366 12409 format %{ "TEST $src,$src" %}
12367 12410 opcode(0x85);
12368 12411 ins_encode( OpcP, RegReg( src, src ) );
12369 12412 ins_pipe( ialu_cr_reg_imm );
12370 12413 %}
12371 12414
12372 12415 instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
12373 12416 match(Set cr (CmpI (AndI src con) zero));
12374 12417
12375 12418 format %{ "TEST $src,$con" %}
12376 12419 opcode(0xF7,0x00);
12377 12420 ins_encode( OpcP, RegOpc(src), Con32(con) );
12378 12421 ins_pipe( ialu_cr_reg_imm );
12379 12422 %}
12380 12423
12381 12424 instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
12382 12425 match(Set cr (CmpI (AndI src mem) zero));
12383 12426
12384 12427 format %{ "TEST $src,$mem" %}
12385 12428 opcode(0x85);
12386 12429 ins_encode( OpcP, RegMem( src, mem ) );
12387 12430 ins_pipe( ialu_cr_reg_mem );
12388 12431 %}
12389 12432
12390 12433 // Unsigned compare Instructions; really, same as signed except they
12391 12434 // produce an eFlagsRegU instead of eFlagsReg.
12392 12435 instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12393 12436 match(Set cr (CmpU op1 op2));
12394 12437
12395 12438 format %{ "CMPu $op1,$op2" %}
12396 12439 opcode(0x3B); /* Opcode 3B /r */
12397 12440 ins_encode( OpcP, RegReg( op1, op2) );
12398 12441 ins_pipe( ialu_cr_reg_reg );
12399 12442 %}
12400 12443
12401 12444 instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12402 12445 match(Set cr (CmpU op1 op2));
12403 12446
12404 12447 format %{ "CMPu $op1,$op2" %}
12405 12448 opcode(0x81,0x07); /* Opcode 81 /7 */
12406 12449 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12407 12450 ins_pipe( ialu_cr_reg_imm );
12408 12451 %}
12409 12452
12410 12453 // // Cisc-spilled version of cmpU_eReg
12411 12454 instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12412 12455 match(Set cr (CmpU op1 (LoadI op2)));
12413 12456
12414 12457 format %{ "CMPu $op1,$op2" %}
12415 12458 ins_cost(500);
12416 12459 opcode(0x3B); /* Opcode 3B /r */
12417 12460 ins_encode( OpcP, RegMem( op1, op2) );
12418 12461 ins_pipe( ialu_cr_reg_mem );
12419 12462 %}
12420 12463
12421 12464 // // Cisc-spilled version of cmpU_eReg
12422 12465 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12423 12466 // match(Set cr (CmpU (LoadI op1) op2));
12424 12467 //
12425 12468 // format %{ "CMPu $op1,$op2" %}
12426 12469 // ins_cost(500);
12427 12470 // opcode(0x39); /* Opcode 39 /r */
12428 12471 // ins_encode( OpcP, RegMem( op1, op2) );
12429 12472 //%}
12430 12473
12431 12474 instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12432 12475 match(Set cr (CmpU src zero));
12433 12476
12434 12477 format %{ "TESTu $src,$src" %}
12435 12478 opcode(0x85);
12436 12479 ins_encode( OpcP, RegReg( src, src ) );
12437 12480 ins_pipe( ialu_cr_reg_imm );
12438 12481 %}
12439 12482
12440 12483 // Unsigned pointer compare Instructions
12441 12484 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12442 12485 match(Set cr (CmpP op1 op2));
12443 12486
12444 12487 format %{ "CMPu $op1,$op2" %}
12445 12488 opcode(0x3B); /* Opcode 3B /r */
12446 12489 ins_encode( OpcP, RegReg( op1, op2) );
12447 12490 ins_pipe( ialu_cr_reg_reg );
12448 12491 %}
12449 12492
12450 12493 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12451 12494 match(Set cr (CmpP op1 op2));
12452 12495
12453 12496 format %{ "CMPu $op1,$op2" %}
12454 12497 opcode(0x81,0x07); /* Opcode 81 /7 */
12455 12498 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12456 12499 ins_pipe( ialu_cr_reg_imm );
12457 12500 %}
12458 12501
12459 12502 // // Cisc-spilled version of cmpP_eReg
12460 12503 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12461 12504 match(Set cr (CmpP op1 (LoadP op2)));
12462 12505
12463 12506 format %{ "CMPu $op1,$op2" %}
12464 12507 ins_cost(500);
12465 12508 opcode(0x3B); /* Opcode 3B /r */
12466 12509 ins_encode( OpcP, RegMem( op1, op2) );
12467 12510 ins_pipe( ialu_cr_reg_mem );
12468 12511 %}
12469 12512
12470 12513 // // Cisc-spilled version of cmpP_eReg
12471 12514 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12472 12515 // match(Set cr (CmpP (LoadP op1) op2));
12473 12516 //
12474 12517 // format %{ "CMPu $op1,$op2" %}
12475 12518 // ins_cost(500);
12476 12519 // opcode(0x39); /* Opcode 39 /r */
12477 12520 // ins_encode( OpcP, RegMem( op1, op2) );
12478 12521 //%}
12479 12522
12480 12523 // Compare raw pointer (used in out-of-heap check).
12481 12524 // Only works because non-oop pointers must be raw pointers
12482 12525 // and raw pointers have no anti-dependencies.
12483 12526 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12484 12527 predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12485 12528 match(Set cr (CmpP op1 (LoadP op2)));
12486 12529
12487 12530 format %{ "CMPu $op1,$op2" %}
12488 12531 opcode(0x3B); /* Opcode 3B /r */
12489 12532 ins_encode( OpcP, RegMem( op1, op2) );
12490 12533 ins_pipe( ialu_cr_reg_mem );
12491 12534 %}
12492 12535
12493 12536 //
12494 12537 // This will generate a signed flags result. This should be ok
12495 12538 // since any compare to a zero should be eq/neq.
12496 12539 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12497 12540 match(Set cr (CmpP src zero));
12498 12541
12499 12542 format %{ "TEST $src,$src" %}
12500 12543 opcode(0x85);
12501 12544 ins_encode( OpcP, RegReg( src, src ) );
12502 12545 ins_pipe( ialu_cr_reg_imm );
12503 12546 %}
12504 12547
12505 12548 // Cisc-spilled version of testP_reg
12506 12549 // This will generate a signed flags result. This should be ok
12507 12550 // since any compare to a zero should be eq/neq.
12508 12551 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12509 12552 match(Set cr (CmpP (LoadP op) zero));
12510 12553
12511 12554 format %{ "TEST $op,0xFFFFFFFF" %}
12512 12555 ins_cost(500);
12513 12556 opcode(0xF7); /* Opcode F7 /0 */
12514 12557 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12515 12558 ins_pipe( ialu_cr_reg_imm );
12516 12559 %}
12517 12560
12518 12561 // Yanked all unsigned pointer compare operations.
12519 12562 // Pointer compares are done with CmpP which is already unsigned.
12520 12563
12521 12564 //----------Max and Min--------------------------------------------------------
12522 12565 // Min Instructions
12523 12566 ////
12524 12567 // *** Min and Max using the conditional move are slower than the
12525 12568 // *** branch version on a Pentium III.
12526 12569 // // Conditional move for min
12527 12570 //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12528 12571 // effect( USE_DEF op2, USE op1, USE cr );
12529 12572 // format %{ "CMOVlt $op2,$op1\t! min" %}
12530 12573 // opcode(0x4C,0x0F);
12531 12574 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12532 12575 // ins_pipe( pipe_cmov_reg );
12533 12576 //%}
12534 12577 //
12535 12578 //// Min Register with Register (P6 version)
12536 12579 //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12537 12580 // predicate(VM_Version::supports_cmov() );
12538 12581 // match(Set op2 (MinI op1 op2));
12539 12582 // ins_cost(200);
12540 12583 // expand %{
12541 12584 // eFlagsReg cr;
12542 12585 // compI_eReg(cr,op1,op2);
12543 12586 // cmovI_reg_lt(op2,op1,cr);
12544 12587 // %}
12545 12588 //%}
12546 12589
12547 12590 // Min Register with Register (generic version)
12548 12591 instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12549 12592 match(Set dst (MinI dst src));
12550 12593 effect(KILL flags);
12551 12594 ins_cost(300);
12552 12595
12553 12596 format %{ "MIN $dst,$src" %}
12554 12597 opcode(0xCC);
12555 12598 ins_encode( min_enc(dst,src) );
12556 12599 ins_pipe( pipe_slow );
12557 12600 %}
12558 12601
12559 12602 // Max Register with Register
12560 12603 // *** Min and Max using the conditional move are slower than the
12561 12604 // *** branch version on a Pentium III.
12562 12605 // // Conditional move for max
12563 12606 //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12564 12607 // effect( USE_DEF op2, USE op1, USE cr );
12565 12608 // format %{ "CMOVgt $op2,$op1\t! max" %}
12566 12609 // opcode(0x4F,0x0F);
12567 12610 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12568 12611 // ins_pipe( pipe_cmov_reg );
12569 12612 //%}
12570 12613 //
12571 12614 // // Max Register with Register (P6 version)
12572 12615 //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12573 12616 // predicate(VM_Version::supports_cmov() );
12574 12617 // match(Set op2 (MaxI op1 op2));
12575 12618 // ins_cost(200);
12576 12619 // expand %{
12577 12620 // eFlagsReg cr;
12578 12621 // compI_eReg(cr,op1,op2);
12579 12622 // cmovI_reg_gt(op2,op1,cr);
12580 12623 // %}
12581 12624 //%}
12582 12625
12583 12626 // Max Register with Register (generic version)
12584 12627 instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12585 12628 match(Set dst (MaxI dst src));
12586 12629 effect(KILL flags);
12587 12630 ins_cost(300);
12588 12631
12589 12632 format %{ "MAX $dst,$src" %}
12590 12633 opcode(0xCC);
12591 12634 ins_encode( max_enc(dst,src) );
12592 12635 ins_pipe( pipe_slow );
12593 12636 %}
12594 12637
12595 12638 // ============================================================================
12596 12639 // Branch Instructions
12597 12640 // Jump Table
12598 12641 instruct jumpXtnd(eRegI switch_val) %{
12599 12642 match(Jump switch_val);
12600 12643 ins_cost(350);
12601 12644
12602 12645 format %{ "JMP [table_base](,$switch_val,1)\n\t" %}
12603 12646
12604 12647 ins_encode %{
12605 12648 address table_base = __ address_table_constant(_index2label);
12606 12649
12607 12650 // Jump to Address(table_base + switch_reg)
12608 12651 InternalAddress table(table_base);
12609 12652 Address index(noreg, $switch_val$$Register, Address::times_1);
12610 12653 __ jump(ArrayAddress(table, index));
12611 12654 %}
12612 12655 ins_pc_relative(1);
12613 12656 ins_pipe(pipe_jmp);
12614 12657 %}
12615 12658
12616 12659 // Jump Direct - Label defines a relative address from JMP+1
12617 12660 instruct jmpDir(label labl) %{
12618 12661 match(Goto);
12619 12662 effect(USE labl);
12620 12663
12621 12664 ins_cost(300);
12622 12665 format %{ "JMP $labl" %}
12623 12666 size(5);
12624 12667 opcode(0xE9);
12625 12668 ins_encode( OpcP, Lbl( labl ) );
12626 12669 ins_pipe( pipe_jmp );
12627 12670 ins_pc_relative(1);
12628 12671 %}
12629 12672
12630 12673 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12631 12674 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12632 12675 match(If cop cr);
12633 12676 effect(USE labl);
12634 12677
12635 12678 ins_cost(300);
12636 12679 format %{ "J$cop $labl" %}
12637 12680 size(6);
12638 12681 opcode(0x0F, 0x80);
12639 12682 ins_encode( Jcc( cop, labl) );
12640 12683 ins_pipe( pipe_jcc );
12641 12684 ins_pc_relative(1);
12642 12685 %}
12643 12686
12644 12687 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12645 12688 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12646 12689 match(CountedLoopEnd cop cr);
12647 12690 effect(USE labl);
12648 12691
12649 12692 ins_cost(300);
12650 12693 format %{ "J$cop $labl\t# Loop end" %}
12651 12694 size(6);
12652 12695 opcode(0x0F, 0x80);
12653 12696 ins_encode( Jcc( cop, labl) );
12654 12697 ins_pipe( pipe_jcc );
12655 12698 ins_pc_relative(1);
12656 12699 %}
12657 12700
12658 12701 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12659 12702 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12660 12703 match(CountedLoopEnd cop cmp);
12661 12704 effect(USE labl);
12662 12705
12663 12706 ins_cost(300);
12664 12707 format %{ "J$cop,u $labl\t# Loop end" %}
12665 12708 size(6);
12666 12709 opcode(0x0F, 0x80);
12667 12710 ins_encode( Jcc( cop, labl) );
12668 12711 ins_pipe( pipe_jcc );
12669 12712 ins_pc_relative(1);
12670 12713 %}
12671 12714
12672 12715 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12673 12716 match(CountedLoopEnd cop cmp);
12674 12717 effect(USE labl);
12675 12718
12676 12719 ins_cost(200);
12677 12720 format %{ "J$cop,u $labl\t# Loop end" %}
12678 12721 size(6);
12679 12722 opcode(0x0F, 0x80);
12680 12723 ins_encode( Jcc( cop, labl) );
12681 12724 ins_pipe( pipe_jcc );
12682 12725 ins_pc_relative(1);
12683 12726 %}
12684 12727
12685 12728 // Jump Direct Conditional - using unsigned comparison
12686 12729 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12687 12730 match(If cop cmp);
12688 12731 effect(USE labl);
12689 12732
12690 12733 ins_cost(300);
12691 12734 format %{ "J$cop,u $labl" %}
12692 12735 size(6);
12693 12736 opcode(0x0F, 0x80);
12694 12737 ins_encode(Jcc(cop, labl));
12695 12738 ins_pipe(pipe_jcc);
12696 12739 ins_pc_relative(1);
12697 12740 %}
12698 12741
12699 12742 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12700 12743 match(If cop cmp);
12701 12744 effect(USE labl);
12702 12745
12703 12746 ins_cost(200);
12704 12747 format %{ "J$cop,u $labl" %}
12705 12748 size(6);
12706 12749 opcode(0x0F, 0x80);
12707 12750 ins_encode(Jcc(cop, labl));
12708 12751 ins_pipe(pipe_jcc);
12709 12752 ins_pc_relative(1);
12710 12753 %}
12711 12754
12712 12755 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12713 12756 match(If cop cmp);
12714 12757 effect(USE labl);
12715 12758
12716 12759 ins_cost(200);
12717 12760 format %{ $$template
12718 12761 if ($cop$$cmpcode == Assembler::notEqual) {
12719 12762 $$emit$$"JP,u $labl\n\t"
12720 12763 $$emit$$"J$cop,u $labl"
12721 12764 } else {
12722 12765 $$emit$$"JP,u done\n\t"
12723 12766 $$emit$$"J$cop,u $labl\n\t"
12724 12767 $$emit$$"done:"
12725 12768 }
12726 12769 %}
12727 12770 size(12);
12728 12771 opcode(0x0F, 0x80);
12729 12772 ins_encode %{
12730 12773 Label* l = $labl$$label;
12731 12774 $$$emit8$primary;
12732 12775 emit_cc(cbuf, $secondary, Assembler::parity);
12733 12776 int parity_disp = -1;
12734 12777 bool ok = false;
12735 12778 if ($cop$$cmpcode == Assembler::notEqual) {
12736 12779 // the two jumps 6 bytes apart so the jump distances are too
12737 12780 parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12738 12781 } else if ($cop$$cmpcode == Assembler::equal) {
12739 12782 parity_disp = 6;
12740 12783 ok = true;
12741 12784 } else {
12742 12785 ShouldNotReachHere();
12743 12786 }
12744 12787 emit_d32(cbuf, parity_disp);
12745 12788 $$$emit8$primary;
12746 12789 emit_cc(cbuf, $secondary, $cop$$cmpcode);
12747 12790 int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12748 12791 emit_d32(cbuf, disp);
12749 12792 %}
12750 12793 ins_pipe(pipe_jcc);
12751 12794 ins_pc_relative(1);
12752 12795 %}
12753 12796
12754 12797 // ============================================================================
12755 12798 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12756 12799 // array for an instance of the superklass. Set a hidden internal cache on a
12757 12800 // hit (cache is checked with exposed code in gen_subtype_check()). Return
12758 12801 // NZ for a miss or zero for a hit. The encoding ALSO sets flags.
12759 12802 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12760 12803 match(Set result (PartialSubtypeCheck sub super));
12761 12804 effect( KILL rcx, KILL cr );
12762 12805
12763 12806 ins_cost(1100); // slightly larger than the next version
12764 12807 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12765 12808 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
12766 12809 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12767 12810 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12768 12811 "JNE,s miss\t\t# Missed: EDI not-zero\n\t"
12769 12812 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12770 12813 "XOR $result,$result\t\t Hit: EDI zero\n\t"
12771 12814 "miss:\t" %}
12772 12815
12773 12816 opcode(0x1); // Force a XOR of EDI
12774 12817 ins_encode( enc_PartialSubtypeCheck() );
12775 12818 ins_pipe( pipe_slow );
12776 12819 %}
12777 12820
12778 12821 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12779 12822 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12780 12823 effect( KILL rcx, KILL result );
12781 12824
12782 12825 ins_cost(1000);
12783 12826 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12784 12827 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
12785 12828 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12786 12829 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12787 12830 "JNE,s miss\t\t# Missed: flags NZ\n\t"
12788 12831 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12789 12832 "miss:\t" %}
12790 12833
12791 12834 opcode(0x0); // No need to XOR EDI
12792 12835 ins_encode( enc_PartialSubtypeCheck() );
12793 12836 ins_pipe( pipe_slow );
12794 12837 %}
12795 12838
12796 12839 // ============================================================================
12797 12840 // Branch Instructions -- short offset versions
12798 12841 //
12799 12842 // These instructions are used to replace jumps of a long offset (the default
12800 12843 // match) with jumps of a shorter offset. These instructions are all tagged
12801 12844 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12802 12845 // match rules in general matching. Instead, the ADLC generates a conversion
12803 12846 // method in the MachNode which can be used to do in-place replacement of the
12804 12847 // long variant with the shorter variant. The compiler will determine if a
12805 12848 // branch can be taken by the is_short_branch_offset() predicate in the machine
12806 12849 // specific code section of the file.
12807 12850
12808 12851 // Jump Direct - Label defines a relative address from JMP+1
12809 12852 instruct jmpDir_short(label labl) %{
12810 12853 match(Goto);
12811 12854 effect(USE labl);
12812 12855
12813 12856 ins_cost(300);
12814 12857 format %{ "JMP,s $labl" %}
12815 12858 size(2);
12816 12859 opcode(0xEB);
12817 12860 ins_encode( OpcP, LblShort( labl ) );
12818 12861 ins_pipe( pipe_jmp );
12819 12862 ins_pc_relative(1);
12820 12863 ins_short_branch(1);
12821 12864 %}
12822 12865
12823 12866 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12824 12867 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12825 12868 match(If cop cr);
12826 12869 effect(USE labl);
12827 12870
12828 12871 ins_cost(300);
12829 12872 format %{ "J$cop,s $labl" %}
12830 12873 size(2);
12831 12874 opcode(0x70);
12832 12875 ins_encode( JccShort( cop, labl) );
12833 12876 ins_pipe( pipe_jcc );
12834 12877 ins_pc_relative(1);
12835 12878 ins_short_branch(1);
12836 12879 %}
12837 12880
12838 12881 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12839 12882 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12840 12883 match(CountedLoopEnd cop cr);
12841 12884 effect(USE labl);
12842 12885
12843 12886 ins_cost(300);
12844 12887 format %{ "J$cop,s $labl\t# Loop end" %}
12845 12888 size(2);
12846 12889 opcode(0x70);
12847 12890 ins_encode( JccShort( cop, labl) );
12848 12891 ins_pipe( pipe_jcc );
12849 12892 ins_pc_relative(1);
12850 12893 ins_short_branch(1);
12851 12894 %}
12852 12895
12853 12896 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12854 12897 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12855 12898 match(CountedLoopEnd cop cmp);
12856 12899 effect(USE labl);
12857 12900
12858 12901 ins_cost(300);
12859 12902 format %{ "J$cop,us $labl\t# Loop end" %}
12860 12903 size(2);
12861 12904 opcode(0x70);
12862 12905 ins_encode( JccShort( cop, labl) );
12863 12906 ins_pipe( pipe_jcc );
12864 12907 ins_pc_relative(1);
12865 12908 ins_short_branch(1);
12866 12909 %}
12867 12910
12868 12911 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12869 12912 match(CountedLoopEnd cop cmp);
12870 12913 effect(USE labl);
12871 12914
12872 12915 ins_cost(300);
12873 12916 format %{ "J$cop,us $labl\t# Loop end" %}
12874 12917 size(2);
12875 12918 opcode(0x70);
12876 12919 ins_encode( JccShort( cop, labl) );
12877 12920 ins_pipe( pipe_jcc );
12878 12921 ins_pc_relative(1);
12879 12922 ins_short_branch(1);
12880 12923 %}
12881 12924
12882 12925 // Jump Direct Conditional - using unsigned comparison
12883 12926 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12884 12927 match(If cop cmp);
12885 12928 effect(USE labl);
12886 12929
12887 12930 ins_cost(300);
12888 12931 format %{ "J$cop,us $labl" %}
12889 12932 size(2);
12890 12933 opcode(0x70);
12891 12934 ins_encode( JccShort( cop, labl) );
12892 12935 ins_pipe( pipe_jcc );
12893 12936 ins_pc_relative(1);
12894 12937 ins_short_branch(1);
12895 12938 %}
12896 12939
12897 12940 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12898 12941 match(If cop cmp);
12899 12942 effect(USE labl);
12900 12943
12901 12944 ins_cost(300);
12902 12945 format %{ "J$cop,us $labl" %}
12903 12946 size(2);
12904 12947 opcode(0x70);
12905 12948 ins_encode( JccShort( cop, labl) );
12906 12949 ins_pipe( pipe_jcc );
12907 12950 ins_pc_relative(1);
12908 12951 ins_short_branch(1);
12909 12952 %}
12910 12953
12911 12954 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12912 12955 match(If cop cmp);
12913 12956 effect(USE labl);
12914 12957
12915 12958 ins_cost(300);
12916 12959 format %{ $$template
12917 12960 if ($cop$$cmpcode == Assembler::notEqual) {
12918 12961 $$emit$$"JP,u,s $labl\n\t"
12919 12962 $$emit$$"J$cop,u,s $labl"
12920 12963 } else {
12921 12964 $$emit$$"JP,u,s done\n\t"
12922 12965 $$emit$$"J$cop,u,s $labl\n\t"
12923 12966 $$emit$$"done:"
12924 12967 }
12925 12968 %}
12926 12969 size(4);
12927 12970 opcode(0x70);
12928 12971 ins_encode %{
12929 12972 Label* l = $labl$$label;
12930 12973 emit_cc(cbuf, $primary, Assembler::parity);
12931 12974 int parity_disp = -1;
12932 12975 if ($cop$$cmpcode == Assembler::notEqual) {
12933 12976 parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12934 12977 } else if ($cop$$cmpcode == Assembler::equal) {
12935 12978 parity_disp = 2;
12936 12979 } else {
12937 12980 ShouldNotReachHere();
12938 12981 }
12939 12982 emit_d8(cbuf, parity_disp);
12940 12983 emit_cc(cbuf, $primary, $cop$$cmpcode);
12941 12984 int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12942 12985 emit_d8(cbuf, disp);
12943 12986 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12944 12987 assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12945 12988 %}
12946 12989 ins_pipe(pipe_jcc);
12947 12990 ins_pc_relative(1);
12948 12991 ins_short_branch(1);
12949 12992 %}
12950 12993
12951 12994 // ============================================================================
12952 12995 // Long Compare
12953 12996 //
12954 12997 // Currently we hold longs in 2 registers. Comparing such values efficiently
12955 12998 // is tricky. The flavor of compare used depends on whether we are testing
12956 12999 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit.
12957 13000 // The GE test is the negated LT test. The LE test can be had by commuting
12958 13001 // the operands (yielding a GE test) and then negating; negate again for the
12959 13002 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the
12960 13003 // NE test is negated from that.
12961 13004
12962 13005 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12963 13006 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
12964 13007 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections
12965 13008 // are collapsed internally in the ADLC's dfa-gen code. The match for
12966 13009 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12967 13010 // foo match ends up with the wrong leaf. One fix is to not match both
12968 13011 // reg-reg and reg-zero forms of long-compare. This is unfortunate because
12969 13012 // both forms beat the trinary form of long-compare and both are very useful
12970 13013 // on Intel which has so few registers.
12971 13014
12972 13015 // Manifest a CmpL result in an integer register. Very painful.
12973 13016 // This is the test to avoid.
12974 13017 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12975 13018 match(Set dst (CmpL3 src1 src2));
12976 13019 effect( KILL flags );
12977 13020 ins_cost(1000);
12978 13021 format %{ "XOR $dst,$dst\n\t"
12979 13022 "CMP $src1.hi,$src2.hi\n\t"
12980 13023 "JLT,s m_one\n\t"
12981 13024 "JGT,s p_one\n\t"
12982 13025 "CMP $src1.lo,$src2.lo\n\t"
12983 13026 "JB,s m_one\n\t"
12984 13027 "JEQ,s done\n"
12985 13028 "p_one:\tINC $dst\n\t"
12986 13029 "JMP,s done\n"
12987 13030 "m_one:\tDEC $dst\n"
12988 13031 "done:" %}
12989 13032 ins_encode %{
12990 13033 Label p_one, m_one, done;
12991 13034 __ xorptr($dst$$Register, $dst$$Register);
12992 13035 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12993 13036 __ jccb(Assembler::less, m_one);
12994 13037 __ jccb(Assembler::greater, p_one);
12995 13038 __ cmpl($src1$$Register, $src2$$Register);
12996 13039 __ jccb(Assembler::below, m_one);
12997 13040 __ jccb(Assembler::equal, done);
12998 13041 __ bind(p_one);
12999 13042 __ incrementl($dst$$Register);
13000 13043 __ jmpb(done);
13001 13044 __ bind(m_one);
13002 13045 __ decrementl($dst$$Register);
13003 13046 __ bind(done);
13004 13047 %}
13005 13048 ins_pipe( pipe_slow );
13006 13049 %}
13007 13050
13008 13051 //======
13009 13052 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13010 13053 // compares. Can be used for LE or GT compares by reversing arguments.
13011 13054 // NOT GOOD FOR EQ/NE tests.
13012 13055 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13013 13056 match( Set flags (CmpL src zero ));
13014 13057 ins_cost(100);
13015 13058 format %{ "TEST $src.hi,$src.hi" %}
13016 13059 opcode(0x85);
13017 13060 ins_encode( OpcP, RegReg_Hi2( src, src ) );
13018 13061 ins_pipe( ialu_cr_reg_reg );
13019 13062 %}
13020 13063
13021 13064 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13022 13065 // compares. Can be used for LE or GT compares by reversing arguments.
13023 13066 // NOT GOOD FOR EQ/NE tests.
13024 13067 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13025 13068 match( Set flags (CmpL src1 src2 ));
13026 13069 effect( TEMP tmp );
13027 13070 ins_cost(300);
13028 13071 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13029 13072 "MOV $tmp,$src1.hi\n\t"
13030 13073 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %}
13031 13074 ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13032 13075 ins_pipe( ialu_cr_reg_reg );
13033 13076 %}
13034 13077
13035 13078 // Long compares reg < zero/req OR reg >= zero/req.
13036 13079 // Just a wrapper for a normal branch, plus the predicate test.
13037 13080 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13038 13081 match(If cmp flags);
13039 13082 effect(USE labl);
13040 13083 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13041 13084 expand %{
13042 13085 jmpCon(cmp,flags,labl); // JLT or JGE...
13043 13086 %}
13044 13087 %}
13045 13088
13046 13089 // Compare 2 longs and CMOVE longs.
13047 13090 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13048 13091 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13049 13092 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13050 13093 ins_cost(400);
13051 13094 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13052 13095 "CMOV$cmp $dst.hi,$src.hi" %}
13053 13096 opcode(0x0F,0x40);
13054 13097 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13055 13098 ins_pipe( pipe_cmov_reg_long );
13056 13099 %}
13057 13100
13058 13101 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13059 13102 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13060 13103 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13061 13104 ins_cost(500);
13062 13105 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13063 13106 "CMOV$cmp $dst.hi,$src.hi" %}
13064 13107 opcode(0x0F,0x40);
13065 13108 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13066 13109 ins_pipe( pipe_cmov_reg_long );
13067 13110 %}
13068 13111
13069 13112 // Compare 2 longs and CMOVE ints.
13070 13113 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
13071 13114 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13072 13115 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13073 13116 ins_cost(200);
13074 13117 format %{ "CMOV$cmp $dst,$src" %}
13075 13118 opcode(0x0F,0x40);
13076 13119 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13077 13120 ins_pipe( pipe_cmov_reg );
13078 13121 %}
13079 13122
13080 13123 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
13081 13124 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13082 13125 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13083 13126 ins_cost(250);
13084 13127 format %{ "CMOV$cmp $dst,$src" %}
13085 13128 opcode(0x0F,0x40);
13086 13129 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13087 13130 ins_pipe( pipe_cmov_mem );
13088 13131 %}
13089 13132
13090 13133 // Compare 2 longs and CMOVE ints.
13091 13134 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13092 13135 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13093 13136 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13094 13137 ins_cost(200);
13095 13138 format %{ "CMOV$cmp $dst,$src" %}
13096 13139 opcode(0x0F,0x40);
13097 13140 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13098 13141 ins_pipe( pipe_cmov_reg );
13099 13142 %}
13100 13143
13101 13144 // Compare 2 longs and CMOVE doubles
13102 13145 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13103 13146 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13104 13147 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13105 13148 ins_cost(200);
13106 13149 expand %{
13107 13150 fcmovD_regS(cmp,flags,dst,src);
13108 13151 %}
13109 13152 %}
13110 13153
13111 13154 // Compare 2 longs and CMOVE doubles
13112 13155 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
13113 13156 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13114 13157 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13115 13158 ins_cost(200);
13116 13159 expand %{
13117 13160 fcmovXD_regS(cmp,flags,dst,src);
13118 13161 %}
13119 13162 %}
13120 13163
13121 13164 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13122 13165 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13123 13166 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13124 13167 ins_cost(200);
13125 13168 expand %{
13126 13169 fcmovF_regS(cmp,flags,dst,src);
13127 13170 %}
13128 13171 %}
13129 13172
13130 13173 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
13131 13174 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13132 13175 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13133 13176 ins_cost(200);
13134 13177 expand %{
13135 13178 fcmovX_regS(cmp,flags,dst,src);
13136 13179 %}
13137 13180 %}
13138 13181
13139 13182 //======
13140 13183 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13141 13184 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
13142 13185 match( Set flags (CmpL src zero ));
13143 13186 effect(TEMP tmp);
13144 13187 ins_cost(200);
13145 13188 format %{ "MOV $tmp,$src.lo\n\t"
13146 13189 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13147 13190 ins_encode( long_cmp_flags0( src, tmp ) );
13148 13191 ins_pipe( ialu_reg_reg_long );
13149 13192 %}
13150 13193
13151 13194 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13152 13195 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13153 13196 match( Set flags (CmpL src1 src2 ));
13154 13197 ins_cost(200+300);
13155 13198 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13156 13199 "JNE,s skip\n\t"
13157 13200 "CMP $src1.hi,$src2.hi\n\t"
13158 13201 "skip:\t" %}
13159 13202 ins_encode( long_cmp_flags1( src1, src2 ) );
13160 13203 ins_pipe( ialu_cr_reg_reg );
13161 13204 %}
13162 13205
13163 13206 // Long compare reg == zero/reg OR reg != zero/reg
13164 13207 // Just a wrapper for a normal branch, plus the predicate test.
13165 13208 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13166 13209 match(If cmp flags);
13167 13210 effect(USE labl);
13168 13211 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13169 13212 expand %{
13170 13213 jmpCon(cmp,flags,labl); // JEQ or JNE...
13171 13214 %}
13172 13215 %}
13173 13216
13174 13217 // Compare 2 longs and CMOVE longs.
13175 13218 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13176 13219 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13177 13220 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13178 13221 ins_cost(400);
13179 13222 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13180 13223 "CMOV$cmp $dst.hi,$src.hi" %}
13181 13224 opcode(0x0F,0x40);
13182 13225 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13183 13226 ins_pipe( pipe_cmov_reg_long );
13184 13227 %}
13185 13228
13186 13229 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13187 13230 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13188 13231 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13189 13232 ins_cost(500);
13190 13233 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13191 13234 "CMOV$cmp $dst.hi,$src.hi" %}
13192 13235 opcode(0x0F,0x40);
13193 13236 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13194 13237 ins_pipe( pipe_cmov_reg_long );
13195 13238 %}
13196 13239
13197 13240 // Compare 2 longs and CMOVE ints.
13198 13241 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
13199 13242 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13200 13243 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13201 13244 ins_cost(200);
13202 13245 format %{ "CMOV$cmp $dst,$src" %}
13203 13246 opcode(0x0F,0x40);
13204 13247 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13205 13248 ins_pipe( pipe_cmov_reg );
13206 13249 %}
13207 13250
13208 13251 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
13209 13252 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13210 13253 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13211 13254 ins_cost(250);
13212 13255 format %{ "CMOV$cmp $dst,$src" %}
13213 13256 opcode(0x0F,0x40);
13214 13257 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13215 13258 ins_pipe( pipe_cmov_mem );
13216 13259 %}
13217 13260
13218 13261 // Compare 2 longs and CMOVE ints.
13219 13262 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13220 13263 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13221 13264 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13222 13265 ins_cost(200);
13223 13266 format %{ "CMOV$cmp $dst,$src" %}
13224 13267 opcode(0x0F,0x40);
13225 13268 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13226 13269 ins_pipe( pipe_cmov_reg );
13227 13270 %}
13228 13271
13229 13272 // Compare 2 longs and CMOVE doubles
13230 13273 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13231 13274 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13232 13275 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13233 13276 ins_cost(200);
13234 13277 expand %{
13235 13278 fcmovD_regS(cmp,flags,dst,src);
13236 13279 %}
13237 13280 %}
13238 13281
13239 13282 // Compare 2 longs and CMOVE doubles
13240 13283 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
13241 13284 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13242 13285 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13243 13286 ins_cost(200);
13244 13287 expand %{
13245 13288 fcmovXD_regS(cmp,flags,dst,src);
13246 13289 %}
13247 13290 %}
13248 13291
13249 13292 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13250 13293 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13251 13294 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13252 13295 ins_cost(200);
13253 13296 expand %{
13254 13297 fcmovF_regS(cmp,flags,dst,src);
13255 13298 %}
13256 13299 %}
13257 13300
13258 13301 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
13259 13302 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13260 13303 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13261 13304 ins_cost(200);
13262 13305 expand %{
13263 13306 fcmovX_regS(cmp,flags,dst,src);
13264 13307 %}
13265 13308 %}
13266 13309
13267 13310 //======
13268 13311 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13269 13312 // Same as cmpL_reg_flags_LEGT except must negate src
13270 13313 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13271 13314 match( Set flags (CmpL src zero ));
13272 13315 effect( TEMP tmp );
13273 13316 ins_cost(300);
13274 13317 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13275 13318 "CMP $tmp,$src.lo\n\t"
13276 13319 "SBB $tmp,$src.hi\n\t" %}
13277 13320 ins_encode( long_cmp_flags3(src, tmp) );
13278 13321 ins_pipe( ialu_reg_reg_long );
13279 13322 %}
13280 13323
13281 13324 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13282 13325 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
13283 13326 // requires a commuted test to get the same result.
13284 13327 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13285 13328 match( Set flags (CmpL src1 src2 ));
13286 13329 effect( TEMP tmp );
13287 13330 ins_cost(300);
13288 13331 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13289 13332 "MOV $tmp,$src2.hi\n\t"
13290 13333 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %}
13291 13334 ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13292 13335 ins_pipe( ialu_cr_reg_reg );
13293 13336 %}
13294 13337
13295 13338 // Long compares reg < zero/req OR reg >= zero/req.
13296 13339 // Just a wrapper for a normal branch, plus the predicate test
13297 13340 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13298 13341 match(If cmp flags);
13299 13342 effect(USE labl);
13300 13343 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13301 13344 ins_cost(300);
13302 13345 expand %{
13303 13346 jmpCon(cmp,flags,labl); // JGT or JLE...
13304 13347 %}
13305 13348 %}
13306 13349
13307 13350 // Compare 2 longs and CMOVE longs.
13308 13351 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13309 13352 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13310 13353 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13311 13354 ins_cost(400);
13312 13355 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13313 13356 "CMOV$cmp $dst.hi,$src.hi" %}
13314 13357 opcode(0x0F,0x40);
13315 13358 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13316 13359 ins_pipe( pipe_cmov_reg_long );
13317 13360 %}
13318 13361
13319 13362 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13320 13363 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13321 13364 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13322 13365 ins_cost(500);
13323 13366 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13324 13367 "CMOV$cmp $dst.hi,$src.hi+4" %}
13325 13368 opcode(0x0F,0x40);
13326 13369 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13327 13370 ins_pipe( pipe_cmov_reg_long );
13328 13371 %}
13329 13372
13330 13373 // Compare 2 longs and CMOVE ints.
13331 13374 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
13332 13375 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13333 13376 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13334 13377 ins_cost(200);
13335 13378 format %{ "CMOV$cmp $dst,$src" %}
13336 13379 opcode(0x0F,0x40);
13337 13380 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13338 13381 ins_pipe( pipe_cmov_reg );
13339 13382 %}
13340 13383
13341 13384 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
13342 13385 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13343 13386 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13344 13387 ins_cost(250);
13345 13388 format %{ "CMOV$cmp $dst,$src" %}
13346 13389 opcode(0x0F,0x40);
13347 13390 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13348 13391 ins_pipe( pipe_cmov_mem );
13349 13392 %}
13350 13393
13351 13394 // Compare 2 longs and CMOVE ptrs.
13352 13395 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13353 13396 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13354 13397 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13355 13398 ins_cost(200);
13356 13399 format %{ "CMOV$cmp $dst,$src" %}
13357 13400 opcode(0x0F,0x40);
13358 13401 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13359 13402 ins_pipe( pipe_cmov_reg );
13360 13403 %}
13361 13404
13362 13405 // Compare 2 longs and CMOVE doubles
13363 13406 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13364 13407 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13365 13408 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13366 13409 ins_cost(200);
13367 13410 expand %{
13368 13411 fcmovD_regS(cmp,flags,dst,src);
13369 13412 %}
13370 13413 %}
13371 13414
13372 13415 // Compare 2 longs and CMOVE doubles
13373 13416 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
13374 13417 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13375 13418 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13376 13419 ins_cost(200);
13377 13420 expand %{
13378 13421 fcmovXD_regS(cmp,flags,dst,src);
13379 13422 %}
13380 13423 %}
13381 13424
13382 13425 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13383 13426 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13384 13427 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13385 13428 ins_cost(200);
13386 13429 expand %{
13387 13430 fcmovF_regS(cmp,flags,dst,src);
13388 13431 %}
13389 13432 %}
13390 13433
13391 13434
13392 13435 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13393 13436 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13394 13437 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13395 13438 ins_cost(200);
13396 13439 expand %{
13397 13440 fcmovX_regS(cmp,flags,dst,src);
13398 13441 %}
↓ open down ↓ |
11600 lines elided |
↑ open up ↑ |
13399 13442 %}
13400 13443
13401 13444
13402 13445 // ============================================================================
13403 13446 // Procedure Call/Return Instructions
13404 13447 // Call Java Static Instruction
13405 13448 // Note: If this code changes, the corresponding ret_addr_offset() and
13406 13449 // compute_padding() functions will have to be adjusted.
13407 13450 instruct CallStaticJavaDirect(method meth) %{
13408 13451 match(CallStaticJava);
13452 + predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13409 13453 effect(USE meth);
13410 13454
13411 13455 ins_cost(300);
13412 13456 format %{ "CALL,static " %}
13413 13457 opcode(0xE8); /* E8 cd */
13414 13458 ins_encode( pre_call_FPU,
13415 13459 Java_Static_Call( meth ),
13416 13460 call_epilog,
13417 13461 post_call_FPU );
13418 13462 ins_pipe( pipe_slow );
13419 13463 ins_pc_relative(1);
13420 13464 ins_alignment(4);
13421 13465 %}
13422 13466
13467 +// Call Java Static Instruction (method handle version)
13468 +// Note: If this code changes, the corresponding ret_addr_offset() and
13469 +// compute_padding() functions will have to be adjusted.
13470 +instruct CallStaticJavaHandle(method meth, eBPRegP ebp) %{
13471 + match(CallStaticJava);
13472 + predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13473 + effect(USE meth);
13474 + // EBP is saved by all callees (for interpreter stack correction).
13475 + // We use it here for a similar purpose, in {preserve,restore}_SP.
13476 +
13477 + ins_cost(300);
13478 + format %{ "CALL,static/MethodHandle " %}
13479 + opcode(0xE8); /* E8 cd */
13480 + ins_encode( pre_call_FPU,
13481 + preserve_SP,
13482 + Java_Static_Call( meth ),
13483 + restore_SP,
13484 + call_epilog,
13485 + post_call_FPU );
13486 + ins_pipe( pipe_slow );
13487 + ins_pc_relative(1);
13488 + ins_alignment(4);
13489 +%}
13490 +
13423 13491 // Call Java Dynamic Instruction
13424 13492 // Note: If this code changes, the corresponding ret_addr_offset() and
13425 13493 // compute_padding() functions will have to be adjusted.
13426 13494 instruct CallDynamicJavaDirect(method meth) %{
13427 13495 match(CallDynamicJava);
13428 13496 effect(USE meth);
13429 13497
13430 13498 ins_cost(300);
13431 13499 format %{ "MOV EAX,(oop)-1\n\t"
13432 13500 "CALL,dynamic" %}
13433 13501 opcode(0xE8); /* E8 cd */
13434 13502 ins_encode( pre_call_FPU,
13435 13503 Java_Dynamic_Call( meth ),
13436 13504 call_epilog,
13437 13505 post_call_FPU );
13438 13506 ins_pipe( pipe_slow );
13439 13507 ins_pc_relative(1);
13440 13508 ins_alignment(4);
13441 13509 %}
13442 13510
13443 13511 // Call Runtime Instruction
13444 13512 instruct CallRuntimeDirect(method meth) %{
13445 13513 match(CallRuntime );
13446 13514 effect(USE meth);
13447 13515
13448 13516 ins_cost(300);
13449 13517 format %{ "CALL,runtime " %}
13450 13518 opcode(0xE8); /* E8 cd */
13451 13519 // Use FFREEs to clear entries in float stack
13452 13520 ins_encode( pre_call_FPU,
13453 13521 FFree_Float_Stack_All,
13454 13522 Java_To_Runtime( meth ),
13455 13523 post_call_FPU );
13456 13524 ins_pipe( pipe_slow );
13457 13525 ins_pc_relative(1);
13458 13526 %}
13459 13527
13460 13528 // Call runtime without safepoint
13461 13529 instruct CallLeafDirect(method meth) %{
13462 13530 match(CallLeaf);
13463 13531 effect(USE meth);
13464 13532
13465 13533 ins_cost(300);
13466 13534 format %{ "CALL_LEAF,runtime " %}
13467 13535 opcode(0xE8); /* E8 cd */
13468 13536 ins_encode( pre_call_FPU,
13469 13537 FFree_Float_Stack_All,
13470 13538 Java_To_Runtime( meth ),
13471 13539 Verify_FPU_For_Leaf, post_call_FPU );
13472 13540 ins_pipe( pipe_slow );
13473 13541 ins_pc_relative(1);
13474 13542 %}
13475 13543
13476 13544 instruct CallLeafNoFPDirect(method meth) %{
13477 13545 match(CallLeafNoFP);
13478 13546 effect(USE meth);
13479 13547
13480 13548 ins_cost(300);
13481 13549 format %{ "CALL_LEAF_NOFP,runtime " %}
13482 13550 opcode(0xE8); /* E8 cd */
13483 13551 ins_encode(Java_To_Runtime(meth));
13484 13552 ins_pipe( pipe_slow );
13485 13553 ins_pc_relative(1);
13486 13554 %}
13487 13555
13488 13556
13489 13557 // Return Instruction
13490 13558 // Remove the return address & jump to it.
13491 13559 instruct Ret() %{
13492 13560 match(Return);
13493 13561 format %{ "RET" %}
13494 13562 opcode(0xC3);
13495 13563 ins_encode(OpcP);
13496 13564 ins_pipe( pipe_jmp );
13497 13565 %}
13498 13566
13499 13567 // Tail Call; Jump from runtime stub to Java code.
13500 13568 // Also known as an 'interprocedural jump'.
13501 13569 // Target of jump will eventually return to caller.
13502 13570 // TailJump below removes the return address.
13503 13571 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13504 13572 match(TailCall jump_target method_oop );
13505 13573 ins_cost(300);
13506 13574 format %{ "JMP $jump_target \t# EBX holds method oop" %}
13507 13575 opcode(0xFF, 0x4); /* Opcode FF /4 */
13508 13576 ins_encode( OpcP, RegOpc(jump_target) );
13509 13577 ins_pipe( pipe_jmp );
13510 13578 %}
13511 13579
13512 13580
13513 13581 // Tail Jump; remove the return address; jump to target.
13514 13582 // TailCall above leaves the return address around.
13515 13583 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13516 13584 match( TailJump jump_target ex_oop );
13517 13585 ins_cost(300);
13518 13586 format %{ "POP EDX\t# pop return address into dummy\n\t"
13519 13587 "JMP $jump_target " %}
13520 13588 opcode(0xFF, 0x4); /* Opcode FF /4 */
13521 13589 ins_encode( enc_pop_rdx,
13522 13590 OpcP, RegOpc(jump_target) );
13523 13591 ins_pipe( pipe_jmp );
13524 13592 %}
13525 13593
13526 13594 // Create exception oop: created by stack-crawling runtime code.
13527 13595 // Created exception is now available to this handler, and is setup
13528 13596 // just prior to jumping to this handler. No code emitted.
13529 13597 instruct CreateException( eAXRegP ex_oop )
13530 13598 %{
13531 13599 match(Set ex_oop (CreateEx));
13532 13600
13533 13601 size(0);
13534 13602 // use the following format syntax
13535 13603 format %{ "# exception oop is in EAX; no code emitted" %}
13536 13604 ins_encode();
13537 13605 ins_pipe( empty );
13538 13606 %}
13539 13607
13540 13608
13541 13609 // Rethrow exception:
13542 13610 // The exception oop will come in the first argument position.
13543 13611 // Then JUMP (not call) to the rethrow stub code.
13544 13612 instruct RethrowException()
13545 13613 %{
13546 13614 match(Rethrow);
13547 13615
13548 13616 // use the following format syntax
13549 13617 format %{ "JMP rethrow_stub" %}
13550 13618 ins_encode(enc_rethrow);
13551 13619 ins_pipe( pipe_jmp );
13552 13620 %}
13553 13621
13554 13622 // inlined locking and unlocking
13555 13623
13556 13624
13557 13625 instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
13558 13626 match( Set cr (FastLock object box) );
13559 13627 effect( TEMP tmp, TEMP scr );
13560 13628 ins_cost(300);
13561 13629 format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
13562 13630 ins_encode( Fast_Lock(object,box,tmp,scr) );
13563 13631 ins_pipe( pipe_slow );
13564 13632 ins_pc_relative(1);
13565 13633 %}
13566 13634
13567 13635 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13568 13636 match( Set cr (FastUnlock object box) );
13569 13637 effect( TEMP tmp );
13570 13638 ins_cost(300);
13571 13639 format %{ "FASTUNLOCK $object, $box, $tmp" %}
13572 13640 ins_encode( Fast_Unlock(object,box,tmp) );
13573 13641 ins_pipe( pipe_slow );
13574 13642 ins_pc_relative(1);
13575 13643 %}
13576 13644
13577 13645
13578 13646
13579 13647 // ============================================================================
13580 13648 // Safepoint Instruction
13581 13649 instruct safePoint_poll(eFlagsReg cr) %{
13582 13650 match(SafePoint);
13583 13651 effect(KILL cr);
13584 13652
13585 13653 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13586 13654 // On SPARC that might be acceptable as we can generate the address with
13587 13655 // just a sethi, saving an or. By polling at offset 0 we can end up
13588 13656 // putting additional pressure on the index-0 in the D$. Because of
13589 13657 // alignment (just like the situation at hand) the lower indices tend
13590 13658 // to see more traffic. It'd be better to change the polling address
13591 13659 // to offset 0 of the last $line in the polling page.
13592 13660
13593 13661 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %}
13594 13662 ins_cost(125);
13595 13663 size(6) ;
13596 13664 ins_encode( Safepoint_Poll() );
13597 13665 ins_pipe( ialu_reg_mem );
13598 13666 %}
13599 13667
13600 13668 //----------PEEPHOLE RULES-----------------------------------------------------
13601 13669 // These must follow all instruction definitions as they use the names
13602 13670 // defined in the instructions definitions.
13603 13671 //
13604 13672 // peepmatch ( root_instr_name [preceding_instruction]* );
13605 13673 //
13606 13674 // peepconstraint %{
13607 13675 // (instruction_number.operand_name relational_op instruction_number.operand_name
13608 13676 // [, ...] );
13609 13677 // // instruction numbers are zero-based using left to right order in peepmatch
13610 13678 //
13611 13679 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
13612 13680 // // provide an instruction_number.operand_name for each operand that appears
13613 13681 // // in the replacement instruction's match rule
13614 13682 //
13615 13683 // ---------VM FLAGS---------------------------------------------------------
13616 13684 //
13617 13685 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13618 13686 //
13619 13687 // Each peephole rule is given an identifying number starting with zero and
13620 13688 // increasing by one in the order seen by the parser. An individual peephole
13621 13689 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13622 13690 // on the command-line.
13623 13691 //
13624 13692 // ---------CURRENT LIMITATIONS----------------------------------------------
13625 13693 //
13626 13694 // Only match adjacent instructions in same basic block
13627 13695 // Only equality constraints
13628 13696 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13629 13697 // Only one replacement instruction
13630 13698 //
13631 13699 // ---------EXAMPLE----------------------------------------------------------
13632 13700 //
13633 13701 // // pertinent parts of existing instructions in architecture description
13634 13702 // instruct movI(eRegI dst, eRegI src) %{
13635 13703 // match(Set dst (CopyI src));
13636 13704 // %}
13637 13705 //
13638 13706 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
13639 13707 // match(Set dst (AddI dst src));
13640 13708 // effect(KILL cr);
13641 13709 // %}
13642 13710 //
13643 13711 // // Change (inc mov) to lea
13644 13712 // peephole %{
13645 13713 // // increment preceeded by register-register move
13646 13714 // peepmatch ( incI_eReg movI );
13647 13715 // // require that the destination register of the increment
13648 13716 // // match the destination register of the move
13649 13717 // peepconstraint ( 0.dst == 1.dst );
13650 13718 // // construct a replacement instruction that sets
13651 13719 // // the destination to ( move's source register + one )
13652 13720 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13653 13721 // %}
13654 13722 //
13655 13723 // Implementation no longer uses movX instructions since
13656 13724 // machine-independent system no longer uses CopyX nodes.
13657 13725 //
13658 13726 // peephole %{
13659 13727 // peepmatch ( incI_eReg movI );
13660 13728 // peepconstraint ( 0.dst == 1.dst );
13661 13729 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13662 13730 // %}
13663 13731 //
13664 13732 // peephole %{
13665 13733 // peepmatch ( decI_eReg movI );
13666 13734 // peepconstraint ( 0.dst == 1.dst );
13667 13735 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13668 13736 // %}
13669 13737 //
13670 13738 // peephole %{
13671 13739 // peepmatch ( addI_eReg_imm movI );
13672 13740 // peepconstraint ( 0.dst == 1.dst );
13673 13741 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13674 13742 // %}
13675 13743 //
13676 13744 // peephole %{
13677 13745 // peepmatch ( addP_eReg_imm movP );
13678 13746 // peepconstraint ( 0.dst == 1.dst );
13679 13747 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13680 13748 // %}
13681 13749
13682 13750 // // Change load of spilled value to only a spill
13683 13751 // instruct storeI(memory mem, eRegI src) %{
13684 13752 // match(Set mem (StoreI mem src));
13685 13753 // %}
13686 13754 //
13687 13755 // instruct loadI(eRegI dst, memory mem) %{
13688 13756 // match(Set dst (LoadI mem));
13689 13757 // %}
13690 13758 //
13691 13759 peephole %{
13692 13760 peepmatch ( loadI storeI );
13693 13761 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13694 13762 peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13695 13763 %}
13696 13764
13697 13765 //----------SMARTSPILL RULES---------------------------------------------------
13698 13766 // These must follow all instruction definitions as they use the names
13699 13767 // defined in the instructions definitions.
↓ open down ↓ |
267 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX