Print this page
rev 1838 : 6961690: load oops from constant table on SPARC
Summary: oops should be loaded from the constant table of an nmethod instead of materializing them with a long code sequence.
Reviewed-by:
Split |
Close |
Expand all |
Collapse all |
--- old/src/cpu/x86/vm/x86_32.ad
+++ new/src/cpu/x86/vm/x86_32.ad
1 1 //
2 2 // Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
3 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 //
5 5 // This code is free software; you can redistribute it and/or modify it
6 6 // under the terms of the GNU General Public License version 2 only, as
7 7 // published by the Free Software Foundation.
8 8 //
9 9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 // version 2 for more details (a copy is included in the LICENSE file that
13 13 // accompanied this code).
14 14 //
15 15 // You should have received a copy of the GNU General Public License version
16 16 // 2 along with this work; if not, write to the Free Software Foundation,
17 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 //
19 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 // or visit www.oracle.com if you need additional information or have any
21 21 // questions.
22 22 //
23 23 //
24 24
25 25 // X86 Architecture Description File
26 26
27 27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 28 // This information is used by the matcher and the register allocator to
29 29 // describe individual registers and classes of registers within the target
30 30 // archtecture.
31 31
32 32 register %{
33 33 //----------Architecture Description Register Definitions----------------------
34 34 // General Registers
35 35 // "reg_def" name ( register save type, C convention save type,
36 36 // ideal register type, encoding );
37 37 // Register Save Types:
38 38 //
39 39 // NS = No-Save: The register allocator assumes that these registers
40 40 // can be used without saving upon entry to the method, &
41 41 // that they do not need to be saved at call sites.
42 42 //
43 43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 44 // can be used without saving upon entry to the method,
45 45 // but that they must be saved at call sites.
46 46 //
47 47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 48 // must be saved before using them upon entry to the
49 49 // method, but they do not need to be saved at call
50 50 // sites.
51 51 //
52 52 // AS = Always-Save: The register allocator assumes that these registers
53 53 // must be saved before using them upon entry to the
54 54 // method, & that they must be saved at call sites.
55 55 //
56 56 // Ideal Register Type is used to determine how to save & restore a
57 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 59 //
60 60 // The encoding number is the actual bit-pattern placed into the opcodes.
61 61
62 62 // General Registers
63 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
64 64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
65 65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
66 66
67 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
76 76
77 77 // Special Registers
78 78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
79 79
80 80 // Float registers. We treat TOS/FPR0 special. It is invisible to the
81 81 // allocator, and only shows up in the encodings.
82 82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
83 83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
84 84 // Ok so here's the trick FPR1 is really st(0) except in the midst
85 85 // of emission of assembly for a machnode. During the emission the fpu stack
86 86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
87 87 // the stack will not have this element so FPR1 == st(0) from the
88 88 // oopMap viewpoint. This same weirdness with numbering causes
89 89 // instruction encoding to have to play games with the register
90 90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
91 91 // where it does flt->flt moves to see an example
92 92 //
93 93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
94 94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
95 95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
96 96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
97 97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
98 98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
99 99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
100 100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
101 101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
102 102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
103 103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
104 104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
105 105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
106 106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
107 107
108 108 // XMM registers. 128-bit registers or 4 words each, labeled a-d.
109 109 // Word a in each register holds a Float, words ab hold a Double.
110 110 // We currently do not use the SIMD capabilities, so registers cd
111 111 // are unused at the moment.
112 112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
113 113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
114 114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
115 115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
116 116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
117 117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
118 118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
119 119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
120 120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
121 121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
122 122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
123 123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
124 124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
125 125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
126 126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
127 127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
128 128
129 129 // Specify priority of register selection within phases of register
130 130 // allocation. Highest priority is first. A useful heuristic is to
131 131 // give registers a low priority when they are required by machine
132 132 // instructions, like EAX and EDX. Registers which are used as
133 133 // pairs must fall on an even boundary (witness the FPR#L's in this list).
134 134 // For the Intel integer registers, the equivalent Long pairs are
135 135 // EDX:EAX, EBX:ECX, and EDI:EBP.
136 136 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
137 137 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
138 138 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
139 139 FPR6L, FPR6H, FPR7L, FPR7H );
140 140
141 141 alloc_class chunk1( XMM0a, XMM0b,
142 142 XMM1a, XMM1b,
143 143 XMM2a, XMM2b,
144 144 XMM3a, XMM3b,
145 145 XMM4a, XMM4b,
146 146 XMM5a, XMM5b,
147 147 XMM6a, XMM6b,
148 148 XMM7a, XMM7b, EFLAGS);
149 149
150 150
151 151 //----------Architecture Description Register Classes--------------------------
152 152 // Several register classes are automatically defined based upon information in
153 153 // this architecture description.
154 154 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
155 155 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )
156 156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
157 157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
158 158 //
159 159 // Class for all registers
160 160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
161 161 // Class for general registers
162 162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
163 163 // Class for general registers which may be used for implicit null checks on win95
164 164 // Also safe for use by tailjump. We don't want to allocate in rbp,
165 165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
166 166 // Class of "X" registers
167 167 reg_class x_reg(EBX, ECX, EDX, EAX);
168 168 // Class of registers that can appear in an address with no offset.
169 169 // EBP and ESP require an extra instruction byte for zero offset.
170 170 // Used in fast-unlock
171 171 reg_class p_reg(EDX, EDI, ESI, EBX);
172 172 // Class for general registers not including ECX
173 173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
174 174 // Class for general registers not including EAX
175 175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
176 176 // Class for general registers not including EAX or EBX.
177 177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
178 178 // Class of EAX (for multiply and divide operations)
179 179 reg_class eax_reg(EAX);
180 180 // Class of EBX (for atomic add)
181 181 reg_class ebx_reg(EBX);
182 182 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
183 183 reg_class ecx_reg(ECX);
184 184 // Class of EDX (for multiply and divide operations)
185 185 reg_class edx_reg(EDX);
186 186 // Class of EDI (for synchronization)
187 187 reg_class edi_reg(EDI);
188 188 // Class of ESI (for synchronization)
189 189 reg_class esi_reg(ESI);
190 190 // Singleton class for interpreter's stack pointer
191 191 reg_class ebp_reg(EBP);
192 192 // Singleton class for stack pointer
193 193 reg_class sp_reg(ESP);
194 194 // Singleton class for instruction pointer
195 195 // reg_class ip_reg(EIP);
196 196 // Singleton class for condition codes
197 197 reg_class int_flags(EFLAGS);
198 198 // Class of integer register pairs
199 199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
200 200 // Class of integer register pairs that aligns with calling convention
201 201 reg_class eadx_reg( EAX,EDX );
202 202 reg_class ebcx_reg( ECX,EBX );
203 203 // Not AX or DX, used in divides
204 204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
205 205
206 206 // Floating point registers. Notice FPR0 is not a choice.
207 207 // FPR0 is not ever allocated; we use clever encodings to fake
208 208 // a 2-address instructions out of Intels FP stack.
209 209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
210 210
211 211 // make a register class for SSE registers
212 212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
213 213
214 214 // make a double register class for SSE2 registers
215 215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
216 216 XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
217 217
218 218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
219 219 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
220 220 FPR7L,FPR7H );
221 221
222 222 reg_class flt_reg0( FPR1L );
223 223 reg_class dbl_reg0( FPR1L,FPR1H );
224 224 reg_class dbl_reg1( FPR2L,FPR2H );
225 225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
226 226 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
227 227
228 228 // XMM6 and XMM7 could be used as temporary registers for long, float and
229 229 // double values for SSE2.
230 230 reg_class xdb_reg6( XMM6a,XMM6b );
231 231 reg_class xdb_reg7( XMM7a,XMM7b );
232 232 %}
233 233
234 234
235 235 //----------SOURCE BLOCK-------------------------------------------------------
236 236 // This is a block of C++ code which provides values, functions, and
237 237 // definitions necessary in the rest of the architecture description
238 238 source_hpp %{
239 239 // Must be visible to the DFA in dfa_x86_32.cpp
240 240 extern bool is_operand_hi32_zero(Node* n);
241 241 %}
242 242
243 243 source %{
244 244 #define RELOC_IMM32 Assembler::imm_operand
245 245 #define RELOC_DISP32 Assembler::disp32_operand
246 246
247 247 #define __ _masm.
248 248
249 249 // How to find the high register of a Long pair, given the low register
250 250 #define HIGH_FROM_LOW(x) ((x)+2)
251 251
252 252 // These masks are used to provide 128-bit aligned bitmasks to the XMM
253 253 // instructions, to allow sign-masking or sign-bit flipping. They allow
254 254 // fast versions of NegF/NegD and AbsF/AbsD.
255 255
256 256 // Note: 'double' and 'long long' have 32-bits alignment on x86.
257 257 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
258 258 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
259 259 // of 128-bits operands for SSE instructions.
260 260 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
261 261 // Store the value to a 128-bits operand.
262 262 operand[0] = lo;
263 263 operand[1] = hi;
264 264 return operand;
265 265 }
266 266
267 267 // Buffer for 128-bits masks used by SSE instructions.
268 268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
269 269
270 270 // Static initialization during VM startup.
271 271 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
272 272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
273 273 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
274 274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
275 275
276 276 // Offset hacking within calls.
277 277 static int pre_call_FPU_size() {
278 278 if (Compile::current()->in_24_bit_fp_mode())
279 279 return 6; // fldcw
280 280 return 0;
281 281 }
282 282
283 283 static int preserve_SP_size() {
284 284 return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg)
285 285 }
286 286
287 287 // !!!!! Special hack to get all type of calls to specify the byte offset
288 288 // from the start of the call to the point where the return address
289 289 // will point.
290 290 int MachCallStaticJavaNode::ret_addr_offset() {
291 291 int offset = 5 + pre_call_FPU_size(); // 5 bytes from start of call to where return address points
292 292 if (_method_handle_invoke)
293 293 offset += preserve_SP_size();
294 294 return offset;
295 295 }
296 296
297 297 int MachCallDynamicJavaNode::ret_addr_offset() {
298 298 return 10 + pre_call_FPU_size(); // 10 bytes from start of call to where return address points
299 299 }
300 300
301 301 static int sizeof_FFree_Float_Stack_All = -1;
302 302
303 303 int MachCallRuntimeNode::ret_addr_offset() {
304 304 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
305 305 return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
306 306 }
307 307
308 308 // Indicate if the safepoint node needs the polling page as an input.
309 309 // Since x86 does have absolute addressing, it doesn't.
310 310 bool SafePointNode::needs_polling_address_input() {
311 311 return false;
312 312 }
313 313
314 314 //
315 315 // Compute padding required for nodes which need alignment
316 316 //
317 317
318 318 // The address of the call instruction needs to be 4-byte aligned to
319 319 // ensure that it does not span a cache line so that it can be patched.
320 320 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
321 321 current_offset += pre_call_FPU_size(); // skip fldcw, if any
322 322 current_offset += 1; // skip call opcode byte
323 323 return round_to(current_offset, alignment_required()) - current_offset;
324 324 }
325 325
326 326 // The address of the call instruction needs to be 4-byte aligned to
327 327 // ensure that it does not span a cache line so that it can be patched.
328 328 int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
329 329 current_offset += pre_call_FPU_size(); // skip fldcw, if any
330 330 current_offset += preserve_SP_size(); // skip mov rbp, rsp
331 331 current_offset += 1; // skip call opcode byte
332 332 return round_to(current_offset, alignment_required()) - current_offset;
333 333 }
334 334
335 335 // The address of the call instruction needs to be 4-byte aligned to
336 336 // ensure that it does not span a cache line so that it can be patched.
337 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
338 338 current_offset += pre_call_FPU_size(); // skip fldcw, if any
339 339 current_offset += 5; // skip MOV instruction
340 340 current_offset += 1; // skip call opcode byte
341 341 return round_to(current_offset, alignment_required()) - current_offset;
342 342 }
343 343
344 344 #ifndef PRODUCT
345 345 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
346 346 st->print("INT3");
347 347 }
348 348 #endif
349 349
350 350 // EMIT_RM()
351 351 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
352 352 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
353 353 cbuf.insts()->emit_int8(c);
354 354 }
355 355
356 356 // EMIT_CC()
357 357 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
358 358 unsigned char c = (unsigned char)( f1 | f2 );
359 359 cbuf.insts()->emit_int8(c);
360 360 }
361 361
362 362 // EMIT_OPCODE()
363 363 void emit_opcode(CodeBuffer &cbuf, int code) {
364 364 cbuf.insts()->emit_int8((unsigned char) code);
365 365 }
366 366
367 367 // EMIT_OPCODE() w/ relocation information
368 368 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
369 369 cbuf.relocate(cbuf.insts_mark() + offset, reloc);
370 370 emit_opcode(cbuf, code);
371 371 }
372 372
373 373 // EMIT_D8()
374 374 void emit_d8(CodeBuffer &cbuf, int d8) {
375 375 cbuf.insts()->emit_int8((unsigned char) d8);
376 376 }
377 377
378 378 // EMIT_D16()
379 379 void emit_d16(CodeBuffer &cbuf, int d16) {
380 380 cbuf.insts()->emit_int16(d16);
381 381 }
382 382
383 383 // EMIT_D32()
384 384 void emit_d32(CodeBuffer &cbuf, int d32) {
385 385 cbuf.insts()->emit_int32(d32);
386 386 }
387 387
388 388 // emit 32 bit value and construct relocation entry from relocInfo::relocType
389 389 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
390 390 int format) {
391 391 cbuf.relocate(cbuf.insts_mark(), reloc, format);
392 392 cbuf.insts()->emit_int32(d32);
393 393 }
394 394
395 395 // emit 32 bit value and construct relocation entry from RelocationHolder
396 396 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
397 397 int format) {
398 398 #ifdef ASSERT
399 399 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
400 400 assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
401 401 }
402 402 #endif
403 403 cbuf.relocate(cbuf.insts_mark(), rspec, format);
404 404 cbuf.insts()->emit_int32(d32);
405 405 }
406 406
407 407 // Access stack slot for load or store
408 408 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
409 409 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src])
410 410 if( -128 <= disp && disp <= 127 ) {
411 411 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte
412 412 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
413 413 emit_d8 (cbuf, disp); // Displacement // R/M byte
414 414 } else {
415 415 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte
416 416 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
417 417 emit_d32(cbuf, disp); // Displacement // R/M byte
418 418 }
419 419 }
420 420
421 421 // eRegI ereg, memory mem) %{ // emit_reg_mem
422 422 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
423 423 // There is no index & no scale, use form without SIB byte
424 424 if ((index == 0x4) &&
425 425 (scale == 0) && (base != ESP_enc)) {
426 426 // If no displacement, mode is 0x0; unless base is [EBP]
427 427 if ( (displace == 0) && (base != EBP_enc) ) {
428 428 emit_rm(cbuf, 0x0, reg_encoding, base);
429 429 }
430 430 else { // If 8-bit displacement, mode 0x1
431 431 if ((displace >= -128) && (displace <= 127)
432 432 && !(displace_is_oop) ) {
433 433 emit_rm(cbuf, 0x1, reg_encoding, base);
434 434 emit_d8(cbuf, displace);
435 435 }
436 436 else { // If 32-bit displacement
437 437 if (base == -1) { // Special flag for absolute address
438 438 emit_rm(cbuf, 0x0, reg_encoding, 0x5);
439 439 // (manual lies; no SIB needed here)
440 440 if ( displace_is_oop ) {
441 441 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
442 442 } else {
443 443 emit_d32 (cbuf, displace);
444 444 }
445 445 }
446 446 else { // Normal base + offset
447 447 emit_rm(cbuf, 0x2, reg_encoding, base);
448 448 if ( displace_is_oop ) {
449 449 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
450 450 } else {
451 451 emit_d32 (cbuf, displace);
452 452 }
453 453 }
454 454 }
455 455 }
456 456 }
457 457 else { // Else, encode with the SIB byte
458 458 // If no displacement, mode is 0x0; unless base is [EBP]
459 459 if (displace == 0 && (base != EBP_enc)) { // If no displacement
460 460 emit_rm(cbuf, 0x0, reg_encoding, 0x4);
461 461 emit_rm(cbuf, scale, index, base);
462 462 }
463 463 else { // If 8-bit displacement, mode 0x1
464 464 if ((displace >= -128) && (displace <= 127)
465 465 && !(displace_is_oop) ) {
466 466 emit_rm(cbuf, 0x1, reg_encoding, 0x4);
467 467 emit_rm(cbuf, scale, index, base);
468 468 emit_d8(cbuf, displace);
469 469 }
470 470 else { // If 32-bit displacement
471 471 if (base == 0x04 ) {
472 472 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
473 473 emit_rm(cbuf, scale, index, 0x04);
474 474 } else {
475 475 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
476 476 emit_rm(cbuf, scale, index, base);
477 477 }
478 478 if ( displace_is_oop ) {
479 479 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
480 480 } else {
481 481 emit_d32 (cbuf, displace);
482 482 }
483 483 }
484 484 }
485 485 }
486 486 }
487 487
488 488
489 489 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
490 490 if( dst_encoding == src_encoding ) {
491 491 // reg-reg copy, use an empty encoding
492 492 } else {
493 493 emit_opcode( cbuf, 0x8B );
494 494 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
495 495 }
496 496 }
497 497
498 498 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
499 499 if( dst_encoding == src_encoding ) {
↓ open down ↓ |
499 lines elided |
↑ open up ↑ |
500 500 // reg-reg copy, use an empty encoding
501 501 } else {
502 502 MacroAssembler _masm(&cbuf);
503 503
504 504 __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
505 505 }
506 506 }
507 507
508 508
509 509 //=============================================================================
510 +const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
511 +
512 +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
513 + emit_constant_table(cbuf);
514 + set_table_base_offset(0);
515 + // Empty encoding
516 +}
517 +
518 +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
519 + // Compute the size (even if it's zero) since
520 + // Compile::Shorten_branches needs the table to be emitted (which
521 + // happens in Compile::scratch_emit_size) to calculate the size for
522 + // MachConstantNode's.
523 + return MachNode::size(ra_);
524 +}
525 +
526 +#ifndef PRODUCT
527 +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
528 + st->print("# MachConstantBaseNode (empty encoding)");
529 +}
530 +#endif
531 +
532 +
533 +//=============================================================================
510 534 #ifndef PRODUCT
511 535 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
512 536 Compile* C = ra_->C;
513 537 if( C->in_24_bit_fp_mode() ) {
514 538 st->print("FLDCW 24 bit fpu control word");
515 539 st->print_cr(""); st->print("\t");
516 540 }
517 541
518 542 int framesize = C->frame_slots() << LogBytesPerInt;
519 543 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
520 544 // Remove two words for return addr and rbp,
521 545 framesize -= 2*wordSize;
522 546
523 547 // Calls to C2R adapters often do not accept exceptional returns.
524 548 // We require that their callers must bang for them. But be careful, because
525 549 // some VM calls (such as call site linkage) can use several kilobytes of
526 550 // stack. But the stack safety zone should account for that.
527 551 // See bugs 4446381, 4468289, 4497237.
528 552 if (C->need_stack_bang(framesize)) {
529 553 st->print_cr("# stack bang"); st->print("\t");
530 554 }
531 555 st->print_cr("PUSHL EBP"); st->print("\t");
532 556
533 557 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
534 558 st->print("PUSH 0xBADB100D\t# Majik cookie for stack depth check");
535 559 st->print_cr(""); st->print("\t");
536 560 framesize -= wordSize;
537 561 }
538 562
539 563 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
540 564 if (framesize) {
541 565 st->print("SUB ESP,%d\t# Create frame",framesize);
542 566 }
543 567 } else {
544 568 st->print("SUB ESP,%d\t# Create frame",framesize);
545 569 }
546 570 }
547 571 #endif
548 572
549 573
550 574 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
551 575 Compile* C = ra_->C;
552 576
553 577 if (UseSSE >= 2 && VerifyFPU) {
554 578 MacroAssembler masm(&cbuf);
555 579 masm.verify_FPU(0, "FPU stack must be clean on entry");
556 580 }
557 581
558 582 // WARNING: Initial instruction MUST be 5 bytes or longer so that
559 583 // NativeJump::patch_verified_entry will be able to patch out the entry
560 584 // code safely. The fldcw is ok at 6 bytes, the push to verify stack
561 585 // depth is ok at 5 bytes, the frame allocation can be either 3 or
562 586 // 6 bytes. So if we don't do the fldcw or the push then we must
563 587 // use the 6 byte frame allocation even if we have no frame. :-(
564 588 // If method sets FPU control word do it now
565 589 if( C->in_24_bit_fp_mode() ) {
566 590 MacroAssembler masm(&cbuf);
567 591 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
568 592 }
569 593
570 594 int framesize = C->frame_slots() << LogBytesPerInt;
571 595 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
572 596 // Remove two words for return addr and rbp,
573 597 framesize -= 2*wordSize;
574 598
575 599 // Calls to C2R adapters often do not accept exceptional returns.
576 600 // We require that their callers must bang for them. But be careful, because
577 601 // some VM calls (such as call site linkage) can use several kilobytes of
578 602 // stack. But the stack safety zone should account for that.
579 603 // See bugs 4446381, 4468289, 4497237.
580 604 if (C->need_stack_bang(framesize)) {
581 605 MacroAssembler masm(&cbuf);
582 606 masm.generate_stack_overflow_check(framesize);
583 607 }
584 608
585 609 // We always push rbp, so that on return to interpreter rbp, will be
586 610 // restored correctly and we can correct the stack.
587 611 emit_opcode(cbuf, 0x50 | EBP_enc);
588 612
589 613 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
590 614 emit_opcode(cbuf, 0x68); // push 0xbadb100d
591 615 emit_d32(cbuf, 0xbadb100d);
592 616 framesize -= wordSize;
593 617 }
594 618
595 619 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
596 620 if (framesize) {
597 621 emit_opcode(cbuf, 0x83); // sub SP,#framesize
598 622 emit_rm(cbuf, 0x3, 0x05, ESP_enc);
599 623 emit_d8(cbuf, framesize);
600 624 }
601 625 } else {
602 626 emit_opcode(cbuf, 0x81); // sub SP,#framesize
603 627 emit_rm(cbuf, 0x3, 0x05, ESP_enc);
604 628 emit_d32(cbuf, framesize);
605 629 }
606 630 C->set_frame_complete(cbuf.insts_size());
607 631
608 632 #ifdef ASSERT
609 633 if (VerifyStackAtCalls) {
610 634 Label L;
611 635 MacroAssembler masm(&cbuf);
612 636 masm.push(rax);
613 637 masm.mov(rax, rsp);
614 638 masm.andptr(rax, StackAlignmentInBytes-1);
615 639 masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
616 640 masm.pop(rax);
617 641 masm.jcc(Assembler::equal, L);
618 642 masm.stop("Stack is not properly aligned!");
619 643 masm.bind(L);
620 644 }
621 645 #endif
622 646
623 647 }
624 648
625 649 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
626 650 return MachNode::size(ra_); // too many variables; just compute it the hard way
627 651 }
628 652
629 653 int MachPrologNode::reloc() const {
630 654 return 0; // a large enough number
631 655 }
632 656
633 657 //=============================================================================
634 658 #ifndef PRODUCT
635 659 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
636 660 Compile *C = ra_->C;
637 661 int framesize = C->frame_slots() << LogBytesPerInt;
638 662 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
639 663 // Remove two words for return addr and rbp,
640 664 framesize -= 2*wordSize;
641 665
642 666 if( C->in_24_bit_fp_mode() ) {
643 667 st->print("FLDCW standard control word");
644 668 st->cr(); st->print("\t");
645 669 }
646 670 if( framesize ) {
647 671 st->print("ADD ESP,%d\t# Destroy frame",framesize);
648 672 st->cr(); st->print("\t");
649 673 }
650 674 st->print_cr("POPL EBP"); st->print("\t");
651 675 if( do_polling() && C->is_method_compilation() ) {
652 676 st->print("TEST PollPage,EAX\t! Poll Safepoint");
653 677 st->cr(); st->print("\t");
654 678 }
655 679 }
656 680 #endif
657 681
658 682 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
659 683 Compile *C = ra_->C;
660 684
661 685 // If method set FPU control word, restore to standard control word
662 686 if( C->in_24_bit_fp_mode() ) {
663 687 MacroAssembler masm(&cbuf);
664 688 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
665 689 }
666 690
667 691 int framesize = C->frame_slots() << LogBytesPerInt;
668 692 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
669 693 // Remove two words for return addr and rbp,
670 694 framesize -= 2*wordSize;
671 695
672 696 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
673 697
674 698 if( framesize >= 128 ) {
675 699 emit_opcode(cbuf, 0x81); // add SP, #framesize
676 700 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
677 701 emit_d32(cbuf, framesize);
678 702 }
679 703 else if( framesize ) {
680 704 emit_opcode(cbuf, 0x83); // add SP, #framesize
681 705 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
682 706 emit_d8(cbuf, framesize);
683 707 }
684 708
685 709 emit_opcode(cbuf, 0x58 | EBP_enc);
686 710
687 711 if( do_polling() && C->is_method_compilation() ) {
688 712 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
689 713 emit_opcode(cbuf,0x85);
690 714 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
691 715 emit_d32(cbuf, (intptr_t)os::get_polling_page());
692 716 }
693 717 }
694 718
695 719 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
696 720 Compile *C = ra_->C;
697 721 // If method set FPU control word, restore to standard control word
698 722 int size = C->in_24_bit_fp_mode() ? 6 : 0;
699 723 if( do_polling() && C->is_method_compilation() ) size += 6;
700 724
701 725 int framesize = C->frame_slots() << LogBytesPerInt;
702 726 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
703 727 // Remove two words for return addr and rbp,
704 728 framesize -= 2*wordSize;
705 729
706 730 size++; // popl rbp,
707 731
708 732 if( framesize >= 128 ) {
709 733 size += 6;
710 734 } else {
711 735 size += framesize ? 3 : 0;
712 736 }
713 737 return size;
714 738 }
715 739
716 740 int MachEpilogNode::reloc() const {
717 741 return 0; // a large enough number
718 742 }
719 743
720 744 const Pipeline * MachEpilogNode::pipeline() const {
721 745 return MachNode::pipeline_class();
722 746 }
723 747
724 748 int MachEpilogNode::safepoint_offset() const { return 0; }
725 749
726 750 //=============================================================================
727 751
728 752 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
729 753 static enum RC rc_class( OptoReg::Name reg ) {
730 754
731 755 if( !OptoReg::is_valid(reg) ) return rc_bad;
732 756 if (OptoReg::is_stack(reg)) return rc_stack;
733 757
734 758 VMReg r = OptoReg::as_VMReg(reg);
735 759 if (r->is_Register()) return rc_int;
736 760 if (r->is_FloatRegister()) {
737 761 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
738 762 return rc_float;
739 763 }
740 764 assert(r->is_XMMRegister(), "must be");
741 765 return rc_xmm;
742 766 }
743 767
744 768 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
745 769 int opcode, const char *op_str, int size, outputStream* st ) {
746 770 if( cbuf ) {
747 771 emit_opcode (*cbuf, opcode );
748 772 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
749 773 #ifndef PRODUCT
750 774 } else if( !do_size ) {
751 775 if( size != 0 ) st->print("\n\t");
752 776 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
753 777 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
754 778 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
755 779 } else { // FLD, FST, PUSH, POP
756 780 st->print("%s [ESP + #%d]",op_str,offset);
757 781 }
758 782 #endif
759 783 }
760 784 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
761 785 return size+3+offset_size;
762 786 }
763 787
764 788 // Helper for XMM registers. Extra opcode bits, limited syntax.
765 789 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
766 790 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
767 791 if( cbuf ) {
768 792 if( reg_lo+1 == reg_hi ) { // double move?
769 793 if( is_load && !UseXmmLoadAndClearUpper )
770 794 emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
771 795 else
772 796 emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
773 797 } else {
774 798 emit_opcode(*cbuf, 0xF3 );
775 799 }
776 800 emit_opcode(*cbuf, 0x0F );
777 801 if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
778 802 emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load
779 803 else
780 804 emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
781 805 encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
782 806 #ifndef PRODUCT
783 807 } else if( !do_size ) {
784 808 if( size != 0 ) st->print("\n\t");
785 809 if( reg_lo+1 == reg_hi ) { // double move?
786 810 if( is_load ) st->print("%s %s,[ESP + #%d]",
787 811 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
788 812 Matcher::regName[reg_lo], offset);
789 813 else st->print("MOVSD [ESP + #%d],%s",
790 814 offset, Matcher::regName[reg_lo]);
791 815 } else {
792 816 if( is_load ) st->print("MOVSS %s,[ESP + #%d]",
793 817 Matcher::regName[reg_lo], offset);
794 818 else st->print("MOVSS [ESP + #%d],%s",
795 819 offset, Matcher::regName[reg_lo]);
796 820 }
797 821 #endif
798 822 }
799 823 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
800 824 return size+5+offset_size;
801 825 }
802 826
803 827
804 828 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
805 829 int src_hi, int dst_hi, int size, outputStream* st ) {
806 830 if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
807 831 if( cbuf ) {
808 832 if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
809 833 emit_opcode(*cbuf, 0x66 );
810 834 }
811 835 emit_opcode(*cbuf, 0x0F );
812 836 emit_opcode(*cbuf, 0x28 );
813 837 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
814 838 #ifndef PRODUCT
815 839 } else if( !do_size ) {
816 840 if( size != 0 ) st->print("\n\t");
817 841 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
818 842 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
819 843 } else {
820 844 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
821 845 }
822 846 #endif
823 847 }
824 848 return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
825 849 } else {
826 850 if( cbuf ) {
827 851 emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
828 852 emit_opcode(*cbuf, 0x0F );
829 853 emit_opcode(*cbuf, 0x10 );
830 854 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
831 855 #ifndef PRODUCT
832 856 } else if( !do_size ) {
833 857 if( size != 0 ) st->print("\n\t");
834 858 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
835 859 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
836 860 } else {
837 861 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
838 862 }
839 863 #endif
840 864 }
841 865 return size+4;
842 866 }
843 867 }
844 868
845 869 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
846 870 int src_hi, int dst_hi, int size, outputStream* st ) {
847 871 // 32-bit
848 872 if (cbuf) {
849 873 emit_opcode(*cbuf, 0x66);
850 874 emit_opcode(*cbuf, 0x0F);
851 875 emit_opcode(*cbuf, 0x6E);
852 876 emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
853 877 #ifndef PRODUCT
854 878 } else if (!do_size) {
855 879 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
856 880 #endif
857 881 }
858 882 return 4;
859 883 }
860 884
861 885
862 886 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
863 887 int src_hi, int dst_hi, int size, outputStream* st ) {
864 888 // 32-bit
865 889 if (cbuf) {
866 890 emit_opcode(*cbuf, 0x66);
867 891 emit_opcode(*cbuf, 0x0F);
868 892 emit_opcode(*cbuf, 0x7E);
869 893 emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
870 894 #ifndef PRODUCT
871 895 } else if (!do_size) {
872 896 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
873 897 #endif
874 898 }
875 899 return 4;
876 900 }
877 901
878 902 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
879 903 if( cbuf ) {
880 904 emit_opcode(*cbuf, 0x8B );
881 905 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
882 906 #ifndef PRODUCT
883 907 } else if( !do_size ) {
884 908 if( size != 0 ) st->print("\n\t");
885 909 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
886 910 #endif
887 911 }
888 912 return size+2;
889 913 }
890 914
891 915 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
892 916 int offset, int size, outputStream* st ) {
893 917 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
894 918 if( cbuf ) {
895 919 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
896 920 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
897 921 #ifndef PRODUCT
898 922 } else if( !do_size ) {
899 923 if( size != 0 ) st->print("\n\t");
900 924 st->print("FLD %s",Matcher::regName[src_lo]);
901 925 #endif
902 926 }
903 927 size += 2;
904 928 }
905 929
906 930 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
907 931 const char *op_str;
908 932 int op;
909 933 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
910 934 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
911 935 op = 0xDD;
912 936 } else { // 32-bit store
913 937 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
914 938 op = 0xD9;
915 939 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
916 940 }
917 941
918 942 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
919 943 }
920 944
921 945 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
922 946 // Get registers to move
923 947 OptoReg::Name src_second = ra_->get_reg_second(in(1));
924 948 OptoReg::Name src_first = ra_->get_reg_first(in(1));
925 949 OptoReg::Name dst_second = ra_->get_reg_second(this );
926 950 OptoReg::Name dst_first = ra_->get_reg_first(this );
927 951
928 952 enum RC src_second_rc = rc_class(src_second);
929 953 enum RC src_first_rc = rc_class(src_first);
930 954 enum RC dst_second_rc = rc_class(dst_second);
931 955 enum RC dst_first_rc = rc_class(dst_first);
932 956
933 957 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
934 958
935 959 // Generate spill code!
936 960 int size = 0;
937 961
938 962 if( src_first == dst_first && src_second == dst_second )
939 963 return size; // Self copy, no move
940 964
941 965 // --------------------------------------
942 966 // Check for mem-mem move. push/pop to move.
943 967 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
944 968 if( src_second == dst_first ) { // overlapping stack copy ranges
945 969 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
946 970 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
947 971 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
948 972 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
949 973 }
950 974 // move low bits
951 975 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
952 976 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
953 977 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
954 978 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
955 979 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
956 980 }
957 981 return size;
958 982 }
959 983
960 984 // --------------------------------------
961 985 // Check for integer reg-reg copy
962 986 if( src_first_rc == rc_int && dst_first_rc == rc_int )
963 987 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
964 988
965 989 // Check for integer store
966 990 if( src_first_rc == rc_int && dst_first_rc == rc_stack )
967 991 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
968 992
969 993 // Check for integer load
970 994 if( dst_first_rc == rc_int && src_first_rc == rc_stack )
971 995 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
972 996
973 997 // Check for integer reg-xmm reg copy
974 998 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
975 999 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
976 1000 "no 64 bit integer-float reg moves" );
977 1001 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
978 1002 }
979 1003 // --------------------------------------
980 1004 // Check for float reg-reg copy
981 1005 if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
982 1006 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
983 1007 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
984 1008 if( cbuf ) {
985 1009
986 1010 // Note the mucking with the register encode to compensate for the 0/1
987 1011 // indexing issue mentioned in a comment in the reg_def sections
988 1012 // for FPR registers many lines above here.
989 1013
990 1014 if( src_first != FPR1L_num ) {
991 1015 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i)
992 1016 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
993 1017 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
994 1018 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
995 1019 } else {
996 1020 emit_opcode (*cbuf, 0xDD ); // FST ST(i)
997 1021 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
998 1022 }
999 1023 #ifndef PRODUCT
1000 1024 } else if( !do_size ) {
1001 1025 if( size != 0 ) st->print("\n\t");
1002 1026 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1003 1027 else st->print( "FST %s", Matcher::regName[dst_first]);
1004 1028 #endif
1005 1029 }
1006 1030 return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1007 1031 }
1008 1032
1009 1033 // Check for float store
1010 1034 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1011 1035 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1012 1036 }
1013 1037
1014 1038 // Check for float load
1015 1039 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1016 1040 int offset = ra_->reg2offset(src_first);
1017 1041 const char *op_str;
1018 1042 int op;
1019 1043 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1020 1044 op_str = "FLD_D";
1021 1045 op = 0xDD;
1022 1046 } else { // 32-bit load
1023 1047 op_str = "FLD_S";
1024 1048 op = 0xD9;
1025 1049 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1026 1050 }
1027 1051 if( cbuf ) {
1028 1052 emit_opcode (*cbuf, op );
1029 1053 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
1030 1054 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
1031 1055 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1032 1056 #ifndef PRODUCT
1033 1057 } else if( !do_size ) {
1034 1058 if( size != 0 ) st->print("\n\t");
1035 1059 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
1036 1060 #endif
1037 1061 }
1038 1062 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1039 1063 return size + 3+offset_size+2;
1040 1064 }
1041 1065
1042 1066 // Check for xmm reg-reg copy
1043 1067 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1044 1068 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1045 1069 (src_first+1 == src_second && dst_first+1 == dst_second),
1046 1070 "no non-adjacent float-moves" );
1047 1071 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1048 1072 }
1049 1073
1050 1074 // Check for xmm reg-integer reg copy
1051 1075 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1052 1076 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1053 1077 "no 64 bit float-integer reg moves" );
1054 1078 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1055 1079 }
1056 1080
1057 1081 // Check for xmm store
1058 1082 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1059 1083 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1060 1084 }
1061 1085
1062 1086 // Check for float xmm load
1063 1087 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1064 1088 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1065 1089 }
1066 1090
1067 1091 // Copy from float reg to xmm reg
1068 1092 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1069 1093 // copy to the top of stack from floating point reg
1070 1094 // and use LEA to preserve flags
1071 1095 if( cbuf ) {
1072 1096 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8]
1073 1097 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1074 1098 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1075 1099 emit_d8(*cbuf,0xF8);
1076 1100 #ifndef PRODUCT
1077 1101 } else if( !do_size ) {
1078 1102 if( size != 0 ) st->print("\n\t");
1079 1103 st->print("LEA ESP,[ESP-8]");
1080 1104 #endif
1081 1105 }
1082 1106 size += 4;
1083 1107
1084 1108 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1085 1109
1086 1110 // Copy from the temp memory to the xmm reg.
1087 1111 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1088 1112
1089 1113 if( cbuf ) {
1090 1114 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
1091 1115 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1092 1116 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1093 1117 emit_d8(*cbuf,0x08);
1094 1118 #ifndef PRODUCT
1095 1119 } else if( !do_size ) {
1096 1120 if( size != 0 ) st->print("\n\t");
1097 1121 st->print("LEA ESP,[ESP+8]");
1098 1122 #endif
1099 1123 }
1100 1124 size += 4;
1101 1125 return size;
1102 1126 }
1103 1127
1104 1128 assert( size > 0, "missed a case" );
1105 1129
1106 1130 // --------------------------------------------------------------------
1107 1131 // Check for second bits still needing moving.
1108 1132 if( src_second == dst_second )
1109 1133 return size; // Self copy; no move
1110 1134 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1111 1135
1112 1136 // Check for second word int-int move
1113 1137 if( src_second_rc == rc_int && dst_second_rc == rc_int )
1114 1138 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1115 1139
1116 1140 // Check for second word integer store
1117 1141 if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1118 1142 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1119 1143
1120 1144 // Check for second word integer load
1121 1145 if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1122 1146 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1123 1147
1124 1148
1125 1149 Unimplemented();
1126 1150 }
1127 1151
1128 1152 #ifndef PRODUCT
1129 1153 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1130 1154 implementation( NULL, ra_, false, st );
1131 1155 }
1132 1156 #endif
1133 1157
1134 1158 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1135 1159 implementation( &cbuf, ra_, false, NULL );
1136 1160 }
1137 1161
1138 1162 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1139 1163 return implementation( NULL, ra_, true, NULL );
1140 1164 }
1141 1165
1142 1166 //=============================================================================
1143 1167 #ifndef PRODUCT
1144 1168 void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1145 1169 st->print("NOP \t# %d bytes pad for loops and calls", _count);
1146 1170 }
1147 1171 #endif
1148 1172
1149 1173 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1150 1174 MacroAssembler _masm(&cbuf);
1151 1175 __ nop(_count);
1152 1176 }
1153 1177
1154 1178 uint MachNopNode::size(PhaseRegAlloc *) const {
1155 1179 return _count;
1156 1180 }
1157 1181
1158 1182
1159 1183 //=============================================================================
1160 1184 #ifndef PRODUCT
1161 1185 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1162 1186 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1163 1187 int reg = ra_->get_reg_first(this);
1164 1188 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
1165 1189 }
1166 1190 #endif
1167 1191
1168 1192 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1169 1193 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1170 1194 int reg = ra_->get_encode(this);
1171 1195 if( offset >= 128 ) {
1172 1196 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1173 1197 emit_rm(cbuf, 0x2, reg, 0x04);
1174 1198 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1175 1199 emit_d32(cbuf, offset);
1176 1200 }
1177 1201 else {
1178 1202 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1179 1203 emit_rm(cbuf, 0x1, reg, 0x04);
1180 1204 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1181 1205 emit_d8(cbuf, offset);
1182 1206 }
1183 1207 }
1184 1208
1185 1209 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1186 1210 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1187 1211 if( offset >= 128 ) {
1188 1212 return 7;
1189 1213 }
1190 1214 else {
1191 1215 return 4;
1192 1216 }
1193 1217 }
1194 1218
1195 1219 //=============================================================================
1196 1220
1197 1221 // emit call stub, compiled java to interpreter
1198 1222 void emit_java_to_interp(CodeBuffer &cbuf ) {
1199 1223 // Stub is fixed up when the corresponding call is converted from calling
1200 1224 // compiled code to calling interpreted code.
1201 1225 // mov rbx,0
1202 1226 // jmp -1
1203 1227
1204 1228 address mark = cbuf.insts_mark(); // get mark within main instrs section
1205 1229
1206 1230 // Note that the code buffer's insts_mark is always relative to insts.
1207 1231 // That's why we must use the macroassembler to generate a stub.
1208 1232 MacroAssembler _masm(&cbuf);
1209 1233
1210 1234 address base =
1211 1235 __ start_a_stub(Compile::MAX_stubs_size);
1212 1236 if (base == NULL) return; // CodeBuffer::expand failed
1213 1237 // static stub relocation stores the instruction address of the call
1214 1238 __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1215 1239 // static stub relocation also tags the methodOop in the code-stream.
1216 1240 __ movoop(rbx, (jobject)NULL); // method is zapped till fixup time
1217 1241 // This is recognized as unresolved by relocs/nativeInst/ic code
1218 1242 __ jump(RuntimeAddress(__ pc()));
1219 1243
1220 1244 __ end_a_stub();
1221 1245 // Update current stubs pointer and restore insts_end.
1222 1246 }
1223 1247 // size of call stub, compiled java to interpretor
1224 1248 uint size_java_to_interp() {
1225 1249 return 10; // movl; jmp
1226 1250 }
1227 1251 // relocation entries for call stub, compiled java to interpretor
1228 1252 uint reloc_java_to_interp() {
1229 1253 return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1230 1254 }
1231 1255
1232 1256 //=============================================================================
1233 1257 #ifndef PRODUCT
1234 1258 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1235 1259 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
1236 1260 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
1237 1261 st->print_cr("\tNOP");
1238 1262 st->print_cr("\tNOP");
1239 1263 if( !OptoBreakpoint )
1240 1264 st->print_cr("\tNOP");
1241 1265 }
1242 1266 #endif
1243 1267
1244 1268 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1245 1269 MacroAssembler masm(&cbuf);
1246 1270 #ifdef ASSERT
1247 1271 uint insts_size = cbuf.insts_size();
1248 1272 #endif
1249 1273 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1250 1274 masm.jump_cc(Assembler::notEqual,
1251 1275 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1252 1276 /* WARNING these NOPs are critical so that verified entry point is properly
1253 1277 aligned for patching by NativeJump::patch_verified_entry() */
1254 1278 int nops_cnt = 2;
1255 1279 if( !OptoBreakpoint ) // Leave space for int3
1256 1280 nops_cnt += 1;
1257 1281 masm.nop(nops_cnt);
1258 1282
1259 1283 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1260 1284 }
1261 1285
1262 1286 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1263 1287 return OptoBreakpoint ? 11 : 12;
1264 1288 }
1265 1289
1266 1290
1267 1291 //=============================================================================
1268 1292 uint size_exception_handler() {
1269 1293 // NativeCall instruction size is the same as NativeJump.
1270 1294 // exception handler starts out as jump and can be patched to
1271 1295 // a call be deoptimization. (4932387)
1272 1296 // Note that this value is also credited (in output.cpp) to
1273 1297 // the size of the code section.
1274 1298 return NativeJump::instruction_size;
1275 1299 }
1276 1300
1277 1301 // Emit exception handler code. Stuff framesize into a register
1278 1302 // and call a VM stub routine.
1279 1303 int emit_exception_handler(CodeBuffer& cbuf) {
1280 1304
1281 1305 // Note that the code buffer's insts_mark is always relative to insts.
1282 1306 // That's why we must use the macroassembler to generate a handler.
1283 1307 MacroAssembler _masm(&cbuf);
1284 1308 address base =
1285 1309 __ start_a_stub(size_exception_handler());
1286 1310 if (base == NULL) return 0; // CodeBuffer::expand failed
1287 1311 int offset = __ offset();
1288 1312 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1289 1313 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1290 1314 __ end_a_stub();
1291 1315 return offset;
1292 1316 }
1293 1317
1294 1318 uint size_deopt_handler() {
1295 1319 // NativeCall instruction size is the same as NativeJump.
1296 1320 // exception handler starts out as jump and can be patched to
1297 1321 // a call be deoptimization. (4932387)
1298 1322 // Note that this value is also credited (in output.cpp) to
1299 1323 // the size of the code section.
1300 1324 return 5 + NativeJump::instruction_size; // pushl(); jmp;
1301 1325 }
1302 1326
1303 1327 // Emit deopt handler code.
1304 1328 int emit_deopt_handler(CodeBuffer& cbuf) {
1305 1329
1306 1330 // Note that the code buffer's insts_mark is always relative to insts.
1307 1331 // That's why we must use the macroassembler to generate a handler.
1308 1332 MacroAssembler _masm(&cbuf);
1309 1333 address base =
1310 1334 __ start_a_stub(size_exception_handler());
1311 1335 if (base == NULL) return 0; // CodeBuffer::expand failed
1312 1336 int offset = __ offset();
↓ open down ↓ |
793 lines elided |
↑ open up ↑ |
1313 1337 InternalAddress here(__ pc());
1314 1338 __ pushptr(here.addr());
1315 1339
1316 1340 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1317 1341 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1318 1342 __ end_a_stub();
1319 1343 return offset;
1320 1344 }
1321 1345
1322 1346
1323 -static void emit_double_constant(CodeBuffer& cbuf, double x) {
1324 - int mark = cbuf.insts()->mark_off();
1325 - MacroAssembler _masm(&cbuf);
1326 - address double_address = __ double_constant(x);
1327 - cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift
1328 - emit_d32_reloc(cbuf,
1329 - (int)double_address,
1330 - internal_word_Relocation::spec(double_address),
1331 - RELOC_DISP32);
1332 -}
1333 -
1334 -static void emit_float_constant(CodeBuffer& cbuf, float x) {
1335 - int mark = cbuf.insts()->mark_off();
1336 - MacroAssembler _masm(&cbuf);
1337 - address float_address = __ float_constant(x);
1338 - cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift
1339 - emit_d32_reloc(cbuf,
1340 - (int)float_address,
1341 - internal_word_Relocation::spec(float_address),
1342 - RELOC_DISP32);
1343 -}
1344 -
1345 -
1346 1347 const bool Matcher::match_rule_supported(int opcode) {
1347 1348 if (!has_match_rule(opcode))
1348 1349 return false;
1349 1350
1350 1351 return true; // Per default match rules are supported.
1351 1352 }
1352 1353
1353 1354 int Matcher::regnum_to_fpu_offset(int regnum) {
1354 1355 return regnum - 32; // The FP registers are in the second chunk
1355 1356 }
1356 1357
1357 -bool is_positive_zero_float(jfloat f) {
1358 - return jint_cast(f) == jint_cast(0.0F);
1359 -}
1360 -
1361 -bool is_positive_one_float(jfloat f) {
1362 - return jint_cast(f) == jint_cast(1.0F);
1363 -}
1364 -
1365 -bool is_positive_zero_double(jdouble d) {
1366 - return jlong_cast(d) == jlong_cast(0.0);
1367 -}
1368 -
1369 -bool is_positive_one_double(jdouble d) {
1370 - return jlong_cast(d) == jlong_cast(1.0);
1371 -}
1372 -
1373 1358 // This is UltraSparc specific, true just means we have fast l2f conversion
1374 1359 const bool Matcher::convL2FSupported(void) {
1375 1360 return true;
1376 1361 }
1377 1362
1378 1363 // Vector width in bytes
1379 1364 const uint Matcher::vector_width_in_bytes(void) {
1380 1365 return UseSSE >= 2 ? 8 : 0;
1381 1366 }
1382 1367
1383 1368 // Vector ideal reg
1384 1369 const uint Matcher::vector_ideal_reg(void) {
1385 1370 return Op_RegD;
1386 1371 }
1387 1372
1388 1373 // Is this branch offset short enough that a short branch can be used?
1389 1374 //
1390 1375 // NOTE: If the platform does not provide any short branch variants, then
1391 1376 // this method should return false for offset 0.
1392 1377 bool Matcher::is_short_branch_offset(int rule, int offset) {
1393 1378 // the short version of jmpConUCF2 contains multiple branches,
1394 1379 // making the reach slightly less
1395 1380 if (rule == jmpConUCF2_rule)
1396 1381 return (-126 <= offset && offset <= 125);
1397 1382 return (-128 <= offset && offset <= 127);
1398 1383 }
1399 1384
1400 1385 const bool Matcher::isSimpleConstant64(jlong value) {
1401 1386 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1402 1387 return false;
1403 1388 }
1404 1389
1405 1390 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1406 1391 const bool Matcher::init_array_count_is_in_bytes = false;
1407 1392
1408 1393 // Threshold size for cleararray.
1409 1394 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1410 1395
1411 1396 // Should the Matcher clone shifts on addressing modes, expecting them to
1412 1397 // be subsumed into complex addressing expressions or compute them into
1413 1398 // registers? True for Intel but false for most RISCs
1414 1399 const bool Matcher::clone_shift_expressions = true;
1415 1400
1416 1401 bool Matcher::narrow_oop_use_complex_address() {
1417 1402 ShouldNotCallThis();
1418 1403 return true;
1419 1404 }
1420 1405
1421 1406
1422 1407 // Is it better to copy float constants, or load them directly from memory?
1423 1408 // Intel can load a float constant from a direct address, requiring no
1424 1409 // extra registers. Most RISCs will have to materialize an address into a
1425 1410 // register first, so they would do better to copy the constant from stack.
1426 1411 const bool Matcher::rematerialize_float_constants = true;
1427 1412
1428 1413 // If CPU can load and store mis-aligned doubles directly then no fixup is
1429 1414 // needed. Else we split the double into 2 integer pieces and move it
1430 1415 // piece-by-piece. Only happens when passing doubles into C code as the
1431 1416 // Java calling convention forces doubles to be aligned.
1432 1417 const bool Matcher::misaligned_doubles_ok = true;
1433 1418
1434 1419
1435 1420 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1436 1421 // Get the memory operand from the node
1437 1422 uint numopnds = node->num_opnds(); // Virtual call for number of operands
1438 1423 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far
1439 1424 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1440 1425 uint opcnt = 1; // First operand
1441 1426 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1442 1427 while( idx >= skipped+num_edges ) {
1443 1428 skipped += num_edges;
1444 1429 opcnt++; // Bump operand count
1445 1430 assert( opcnt < numopnds, "Accessing non-existent operand" );
1446 1431 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1447 1432 }
1448 1433
1449 1434 MachOper *memory = node->_opnds[opcnt];
1450 1435 MachOper *new_memory = NULL;
1451 1436 switch (memory->opcode()) {
1452 1437 case DIRECT:
1453 1438 case INDOFFSET32X:
1454 1439 // No transformation necessary.
1455 1440 return;
1456 1441 case INDIRECT:
1457 1442 new_memory = new (C) indirect_win95_safeOper( );
1458 1443 break;
1459 1444 case INDOFFSET8:
1460 1445 new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1461 1446 break;
1462 1447 case INDOFFSET32:
1463 1448 new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1464 1449 break;
1465 1450 case INDINDEXOFFSET:
1466 1451 new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1467 1452 break;
1468 1453 case INDINDEXSCALE:
1469 1454 new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1470 1455 break;
1471 1456 case INDINDEXSCALEOFFSET:
1472 1457 new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1473 1458 break;
1474 1459 case LOAD_LONG_INDIRECT:
1475 1460 case LOAD_LONG_INDOFFSET32:
1476 1461 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1477 1462 return;
1478 1463 default:
1479 1464 assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1480 1465 return;
1481 1466 }
1482 1467 node->_opnds[opcnt] = new_memory;
1483 1468 }
1484 1469
1485 1470 // Advertise here if the CPU requires explicit rounding operations
1486 1471 // to implement the UseStrictFP mode.
1487 1472 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1488 1473
1489 1474 // Are floats conerted to double when stored to stack during deoptimization?
1490 1475 // On x32 it is stored with convertion only when FPU is used for floats.
1491 1476 bool Matcher::float_in_double() { return (UseSSE == 0); }
1492 1477
1493 1478 // Do ints take an entire long register or just half?
1494 1479 const bool Matcher::int_in_long = false;
1495 1480
1496 1481 // Return whether or not this register is ever used as an argument. This
1497 1482 // function is used on startup to build the trampoline stubs in generateOptoStub.
1498 1483 // Registers not mentioned will be killed by the VM call in the trampoline, and
1499 1484 // arguments in those registers not be available to the callee.
1500 1485 bool Matcher::can_be_java_arg( int reg ) {
1501 1486 if( reg == ECX_num || reg == EDX_num ) return true;
1502 1487 if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1503 1488 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1504 1489 return false;
1505 1490 }
1506 1491
1507 1492 bool Matcher::is_spillable_arg( int reg ) {
1508 1493 return can_be_java_arg(reg);
1509 1494 }
1510 1495
1511 1496 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1512 1497 // Use hardware integer DIV instruction when
1513 1498 // it is faster than a code which use multiply.
1514 1499 // Only when constant divisor fits into 32 bit
1515 1500 // (min_jint is excluded to get only correct
1516 1501 // positive 32 bit values from negative).
1517 1502 return VM_Version::has_fast_idiv() &&
1518 1503 (divisor == (int)divisor && divisor != min_jint);
1519 1504 }
1520 1505
1521 1506 // Register for DIVI projection of divmodI
1522 1507 RegMask Matcher::divI_proj_mask() {
1523 1508 return EAX_REG_mask;
1524 1509 }
1525 1510
1526 1511 // Register for MODI projection of divmodI
1527 1512 RegMask Matcher::modI_proj_mask() {
1528 1513 return EDX_REG_mask;
1529 1514 }
1530 1515
1531 1516 // Register for DIVL projection of divmodL
1532 1517 RegMask Matcher::divL_proj_mask() {
1533 1518 ShouldNotReachHere();
1534 1519 return RegMask();
1535 1520 }
1536 1521
1537 1522 // Register for MODL projection of divmodL
1538 1523 RegMask Matcher::modL_proj_mask() {
1539 1524 ShouldNotReachHere();
1540 1525 return RegMask();
1541 1526 }
1542 1527
1543 1528 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1544 1529 return EBP_REG_mask;
1545 1530 }
1546 1531
1547 1532 // Returns true if the high 32 bits of the value is known to be zero.
1548 1533 bool is_operand_hi32_zero(Node* n) {
1549 1534 int opc = n->Opcode();
1550 1535 if (opc == Op_LoadUI2L) {
1551 1536 return true;
1552 1537 }
1553 1538 if (opc == Op_AndL) {
1554 1539 Node* o2 = n->in(2);
1555 1540 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1556 1541 return true;
1557 1542 }
1558 1543 }
1559 1544 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1560 1545 return true;
1561 1546 }
1562 1547 return false;
1563 1548 }
1564 1549
1565 1550 %}
1566 1551
1567 1552 //----------ENCODING BLOCK-----------------------------------------------------
1568 1553 // This block specifies the encoding classes used by the compiler to output
1569 1554 // byte streams. Encoding classes generate functions which are called by
1570 1555 // Machine Instruction Nodes in order to generate the bit encoding of the
1571 1556 // instruction. Operands specify their base encoding interface with the
1572 1557 // interface keyword. There are currently supported four interfaces,
1573 1558 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
1574 1559 // operand to generate a function which returns its register number when
1575 1560 // queried. CONST_INTER causes an operand to generate a function which
1576 1561 // returns the value of the constant when queried. MEMORY_INTER causes an
1577 1562 // operand to generate four functions which return the Base Register, the
1578 1563 // Index Register, the Scale Value, and the Offset Value of the operand when
1579 1564 // queried. COND_INTER causes an operand to generate six functions which
1580 1565 // return the encoding code (ie - encoding bits for the instruction)
1581 1566 // associated with each basic boolean condition for a conditional instruction.
1582 1567 // Instructions specify two basic values for encoding. They use the
1583 1568 // ins_encode keyword to specify their encoding class (which must be one of
1584 1569 // the class names specified in the encoding block), and they use the
1585 1570 // opcode keyword to specify, in order, their primary, secondary, and
1586 1571 // tertiary opcode. Only the opcode sections which a particular instruction
1587 1572 // needs for encoding need to be specified.
1588 1573 encode %{
1589 1574 // Build emit functions for each basic byte or larger field in the intel
1590 1575 // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1591 1576 // code in the enc_class source block. Emit functions will live in the
1592 1577 // main source block for now. In future, we can generalize this by
1593 1578 // adding a syntax that specifies the sizes of fields in an order,
1594 1579 // so that the adlc can build the emit functions automagically
1595 1580
1596 1581 // Emit primary opcode
1597 1582 enc_class OpcP %{
1598 1583 emit_opcode(cbuf, $primary);
1599 1584 %}
1600 1585
1601 1586 // Emit secondary opcode
1602 1587 enc_class OpcS %{
1603 1588 emit_opcode(cbuf, $secondary);
1604 1589 %}
1605 1590
1606 1591 // Emit opcode directly
1607 1592 enc_class Opcode(immI d8) %{
1608 1593 emit_opcode(cbuf, $d8$$constant);
1609 1594 %}
1610 1595
1611 1596 enc_class SizePrefix %{
1612 1597 emit_opcode(cbuf,0x66);
1613 1598 %}
1614 1599
1615 1600 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
1616 1601 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1617 1602 %}
1618 1603
1619 1604 enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many)
1620 1605 emit_opcode(cbuf,$opcode$$constant);
1621 1606 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1622 1607 %}
1623 1608
1624 1609 enc_class mov_r32_imm0( eRegI dst ) %{
1625 1610 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
1626 1611 emit_d32 ( cbuf, 0x0 ); // imm32==0x0
1627 1612 %}
1628 1613
1629 1614 enc_class cdq_enc %{
1630 1615 // Full implementation of Java idiv and irem; checks for
1631 1616 // special case as described in JVM spec., p.243 & p.271.
1632 1617 //
1633 1618 // normal case special case
1634 1619 //
1635 1620 // input : rax,: dividend min_int
1636 1621 // reg: divisor -1
1637 1622 //
1638 1623 // output: rax,: quotient (= rax, idiv reg) min_int
1639 1624 // rdx: remainder (= rax, irem reg) 0
1640 1625 //
1641 1626 // Code sequnce:
1642 1627 //
1643 1628 // 81 F8 00 00 00 80 cmp rax,80000000h
1644 1629 // 0F 85 0B 00 00 00 jne normal_case
1645 1630 // 33 D2 xor rdx,edx
1646 1631 // 83 F9 FF cmp rcx,0FFh
1647 1632 // 0F 84 03 00 00 00 je done
1648 1633 // normal_case:
1649 1634 // 99 cdq
1650 1635 // F7 F9 idiv rax,ecx
1651 1636 // done:
1652 1637 //
1653 1638 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1654 1639 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1655 1640 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h
1656 1641 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1657 1642 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1658 1643 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case
1659 1644 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx
1660 1645 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1661 1646 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1662 1647 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1663 1648 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done
1664 1649 // normal_case:
1665 1650 emit_opcode(cbuf,0x99); // cdq
1666 1651 // idiv (note: must be emitted by the user of this rule)
1667 1652 // normal:
1668 1653 %}
1669 1654
1670 1655 // Dense encoding for older common ops
1671 1656 enc_class Opc_plus(immI opcode, eRegI reg) %{
1672 1657 emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1673 1658 %}
1674 1659
1675 1660
1676 1661 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1677 1662 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1678 1663 // Check for 8-bit immediate, and set sign extend bit in opcode
1679 1664 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1680 1665 emit_opcode(cbuf, $primary | 0x02);
1681 1666 }
1682 1667 else { // If 32-bit immediate
1683 1668 emit_opcode(cbuf, $primary);
1684 1669 }
1685 1670 %}
1686 1671
1687 1672 enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m
1688 1673 // Emit primary opcode and set sign-extend bit
1689 1674 // Check for 8-bit immediate, and set sign extend bit in opcode
1690 1675 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1691 1676 emit_opcode(cbuf, $primary | 0x02); }
1692 1677 else { // If 32-bit immediate
1693 1678 emit_opcode(cbuf, $primary);
1694 1679 }
1695 1680 // Emit r/m byte with secondary opcode, after primary opcode.
1696 1681 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1697 1682 %}
1698 1683
1699 1684 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
1700 1685 // Check for 8-bit immediate, and set sign extend bit in opcode
1701 1686 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1702 1687 $$$emit8$imm$$constant;
1703 1688 }
1704 1689 else { // If 32-bit immediate
1705 1690 // Output immediate
1706 1691 $$$emit32$imm$$constant;
1707 1692 }
1708 1693 %}
1709 1694
1710 1695 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1711 1696 // Emit primary opcode and set sign-extend bit
1712 1697 // Check for 8-bit immediate, and set sign extend bit in opcode
1713 1698 int con = (int)$imm$$constant; // Throw away top bits
1714 1699 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1715 1700 // Emit r/m byte with secondary opcode, after primary opcode.
1716 1701 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1717 1702 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1718 1703 else emit_d32(cbuf,con);
1719 1704 %}
1720 1705
1721 1706 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1722 1707 // Emit primary opcode and set sign-extend bit
1723 1708 // Check for 8-bit immediate, and set sign extend bit in opcode
1724 1709 int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1725 1710 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1726 1711 // Emit r/m byte with tertiary opcode, after primary opcode.
1727 1712 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1728 1713 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1729 1714 else emit_d32(cbuf,con);
1730 1715 %}
1731 1716
1732 1717 enc_class Lbl (label labl) %{ // JMP, CALL
1733 1718 Label *l = $labl$$label;
1734 1719 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1735 1720 %}
1736 1721
1737 1722 enc_class LblShort (label labl) %{ // JMP, CALL
1738 1723 Label *l = $labl$$label;
1739 1724 int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1740 1725 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1741 1726 emit_d8(cbuf, disp);
1742 1727 %}
1743 1728
1744 1729 enc_class OpcSReg (eRegI dst) %{ // BSWAP
1745 1730 emit_cc(cbuf, $secondary, $dst$$reg );
1746 1731 %}
1747 1732
1748 1733 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1749 1734 int destlo = $dst$$reg;
1750 1735 int desthi = HIGH_FROM_LOW(destlo);
1751 1736 // bswap lo
1752 1737 emit_opcode(cbuf, 0x0F);
1753 1738 emit_cc(cbuf, 0xC8, destlo);
1754 1739 // bswap hi
1755 1740 emit_opcode(cbuf, 0x0F);
1756 1741 emit_cc(cbuf, 0xC8, desthi);
1757 1742 // xchg lo and hi
1758 1743 emit_opcode(cbuf, 0x87);
1759 1744 emit_rm(cbuf, 0x3, destlo, desthi);
1760 1745 %}
1761 1746
1762 1747 enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1763 1748 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1764 1749 %}
1765 1750
1766 1751 enc_class Jcc (cmpOp cop, label labl) %{ // JCC
1767 1752 Label *l = $labl$$label;
1768 1753 $$$emit8$primary;
1769 1754 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1770 1755 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1771 1756 %}
1772 1757
1773 1758 enc_class JccShort (cmpOp cop, label labl) %{ // JCC
1774 1759 Label *l = $labl$$label;
1775 1760 emit_cc(cbuf, $primary, $cop$$cmpcode);
1776 1761 int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1777 1762 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1778 1763 emit_d8(cbuf, disp);
1779 1764 %}
1780 1765
1781 1766 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1782 1767 $$$emit8$primary;
1783 1768 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1784 1769 %}
1785 1770
1786 1771 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1787 1772 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1788 1773 emit_d8(cbuf, op >> 8 );
1789 1774 emit_d8(cbuf, op & 255);
1790 1775 %}
1791 1776
1792 1777 // emulate a CMOV with a conditional branch around a MOV
1793 1778 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1794 1779 // Invert sense of branch from sense of CMOV
1795 1780 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1796 1781 emit_d8( cbuf, $brOffs$$constant );
1797 1782 %}
1798 1783
1799 1784 enc_class enc_PartialSubtypeCheck( ) %{
1800 1785 Register Redi = as_Register(EDI_enc); // result register
1801 1786 Register Reax = as_Register(EAX_enc); // super class
1802 1787 Register Recx = as_Register(ECX_enc); // killed
1803 1788 Register Resi = as_Register(ESI_enc); // sub class
1804 1789 Label miss;
1805 1790
1806 1791 MacroAssembler _masm(&cbuf);
1807 1792 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1808 1793 NULL, &miss,
1809 1794 /*set_cond_codes:*/ true);
1810 1795 if ($primary) {
1811 1796 __ xorptr(Redi, Redi);
1812 1797 }
1813 1798 __ bind(miss);
1814 1799 %}
1815 1800
1816 1801 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
1817 1802 MacroAssembler masm(&cbuf);
1818 1803 int start = masm.offset();
1819 1804 if (UseSSE >= 2) {
1820 1805 if (VerifyFPU) {
1821 1806 masm.verify_FPU(0, "must be empty in SSE2+ mode");
1822 1807 }
1823 1808 } else {
1824 1809 // External c_calling_convention expects the FPU stack to be 'clean'.
1825 1810 // Compiled code leaves it dirty. Do cleanup now.
1826 1811 masm.empty_FPU_stack();
1827 1812 }
1828 1813 if (sizeof_FFree_Float_Stack_All == -1) {
1829 1814 sizeof_FFree_Float_Stack_All = masm.offset() - start;
1830 1815 } else {
1831 1816 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1832 1817 }
1833 1818 %}
1834 1819
1835 1820 enc_class Verify_FPU_For_Leaf %{
1836 1821 if( VerifyFPU ) {
1837 1822 MacroAssembler masm(&cbuf);
1838 1823 masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1839 1824 }
1840 1825 %}
1841 1826
1842 1827 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1843 1828 // This is the instruction starting address for relocation info.
1844 1829 cbuf.set_insts_mark();
1845 1830 $$$emit8$primary;
1846 1831 // CALL directly to the runtime
1847 1832 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1848 1833 runtime_call_Relocation::spec(), RELOC_IMM32 );
1849 1834
1850 1835 if (UseSSE >= 2) {
1851 1836 MacroAssembler _masm(&cbuf);
1852 1837 BasicType rt = tf()->return_type();
1853 1838
1854 1839 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1855 1840 // A C runtime call where the return value is unused. In SSE2+
1856 1841 // mode the result needs to be removed from the FPU stack. It's
1857 1842 // likely that this function call could be removed by the
1858 1843 // optimizer if the C function is a pure function.
1859 1844 __ ffree(0);
1860 1845 } else if (rt == T_FLOAT) {
1861 1846 __ lea(rsp, Address(rsp, -4));
1862 1847 __ fstp_s(Address(rsp, 0));
1863 1848 __ movflt(xmm0, Address(rsp, 0));
1864 1849 __ lea(rsp, Address(rsp, 4));
1865 1850 } else if (rt == T_DOUBLE) {
1866 1851 __ lea(rsp, Address(rsp, -8));
1867 1852 __ fstp_d(Address(rsp, 0));
1868 1853 __ movdbl(xmm0, Address(rsp, 0));
1869 1854 __ lea(rsp, Address(rsp, 8));
1870 1855 }
1871 1856 }
1872 1857 %}
1873 1858
1874 1859
1875 1860 enc_class pre_call_FPU %{
1876 1861 // If method sets FPU control word restore it here
1877 1862 debug_only(int off0 = cbuf.insts_size());
1878 1863 if( Compile::current()->in_24_bit_fp_mode() ) {
1879 1864 MacroAssembler masm(&cbuf);
1880 1865 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1881 1866 }
1882 1867 debug_only(int off1 = cbuf.insts_size());
1883 1868 assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1884 1869 %}
1885 1870
1886 1871 enc_class post_call_FPU %{
1887 1872 // If method sets FPU control word do it here also
1888 1873 if( Compile::current()->in_24_bit_fp_mode() ) {
1889 1874 MacroAssembler masm(&cbuf);
1890 1875 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1891 1876 }
1892 1877 %}
1893 1878
1894 1879 enc_class preserve_SP %{
1895 1880 debug_only(int off0 = cbuf.insts_size());
1896 1881 MacroAssembler _masm(&cbuf);
1897 1882 // RBP is preserved across all calls, even compiled calls.
1898 1883 // Use it to preserve RSP in places where the callee might change the SP.
1899 1884 __ movptr(rbp_mh_SP_save, rsp);
1900 1885 debug_only(int off1 = cbuf.insts_size());
1901 1886 assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
1902 1887 %}
1903 1888
1904 1889 enc_class restore_SP %{
1905 1890 MacroAssembler _masm(&cbuf);
1906 1891 __ movptr(rsp, rbp_mh_SP_save);
1907 1892 %}
1908 1893
1909 1894 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
1910 1895 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1911 1896 // who we intended to call.
1912 1897 cbuf.set_insts_mark();
1913 1898 $$$emit8$primary;
1914 1899 if ( !_method ) {
1915 1900 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916 1901 runtime_call_Relocation::spec(), RELOC_IMM32 );
1917 1902 } else if(_optimized_virtual) {
1918 1903 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1919 1904 opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1920 1905 } else {
1921 1906 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1922 1907 static_call_Relocation::spec(), RELOC_IMM32 );
1923 1908 }
1924 1909 if( _method ) { // Emit stub for static call
1925 1910 emit_java_to_interp(cbuf);
1926 1911 }
1927 1912 %}
1928 1913
1929 1914 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
1930 1915 // !!!!!
1931 1916 // Generate "Mov EAX,0x00", placeholder instruction to load oop-info
1932 1917 // emit_call_dynamic_prologue( cbuf );
1933 1918 cbuf.set_insts_mark();
1934 1919 emit_opcode(cbuf, 0xB8 + EAX_enc); // mov EAX,-1
1935 1920 emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1936 1921 address virtual_call_oop_addr = cbuf.insts_mark();
1937 1922 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1938 1923 // who we intended to call.
1939 1924 cbuf.set_insts_mark();
1940 1925 $$$emit8$primary;
1941 1926 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1942 1927 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1943 1928 %}
1944 1929
1945 1930 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1946 1931 int disp = in_bytes(methodOopDesc::from_compiled_offset());
1947 1932 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1948 1933
1949 1934 // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1950 1935 cbuf.set_insts_mark();
1951 1936 $$$emit8$primary;
1952 1937 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1953 1938 emit_d8(cbuf, disp); // Displacement
1954 1939
1955 1940 %}
1956 1941
1957 1942 enc_class Xor_Reg (eRegI dst) %{
1958 1943 emit_opcode(cbuf, 0x33);
1959 1944 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1960 1945 %}
1961 1946
1962 1947 // Following encoding is no longer used, but may be restored if calling
1963 1948 // convention changes significantly.
1964 1949 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1965 1950 //
1966 1951 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
1967 1952 // // int ic_reg = Matcher::inline_cache_reg();
1968 1953 // // int ic_encode = Matcher::_regEncode[ic_reg];
1969 1954 // // int imo_reg = Matcher::interpreter_method_oop_reg();
1970 1955 // // int imo_encode = Matcher::_regEncode[imo_reg];
1971 1956 //
1972 1957 // // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1973 1958 // // // so we load it immediately before the call
1974 1959 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop
1975 1960 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1976 1961 //
1977 1962 // // xor rbp,ebp
1978 1963 // emit_opcode(cbuf, 0x33);
1979 1964 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1980 1965 //
1981 1966 // // CALL to interpreter.
1982 1967 // cbuf.set_insts_mark();
1983 1968 // $$$emit8$primary;
1984 1969 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1985 1970 // runtime_call_Relocation::spec(), RELOC_IMM32 );
1986 1971 // %}
1987 1972
1988 1973 enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR
1989 1974 $$$emit8$primary;
1990 1975 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1991 1976 $$$emit8$shift$$constant;
1992 1977 %}
1993 1978
1994 1979 enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate
1995 1980 // Load immediate does not have a zero or sign extended version
1996 1981 // for 8-bit immediates
1997 1982 emit_opcode(cbuf, 0xB8 + $dst$$reg);
1998 1983 $$$emit32$src$$constant;
1999 1984 %}
2000 1985
2001 1986 enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate
2002 1987 // Load immediate does not have a zero or sign extended version
2003 1988 // for 8-bit immediates
2004 1989 emit_opcode(cbuf, $primary + $dst$$reg);
2005 1990 $$$emit32$src$$constant;
2006 1991 %}
2007 1992
2008 1993 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
2009 1994 // Load immediate does not have a zero or sign extended version
2010 1995 // for 8-bit immediates
2011 1996 int dst_enc = $dst$$reg;
2012 1997 int src_con = $src$$constant & 0x0FFFFFFFFL;
2013 1998 if (src_con == 0) {
2014 1999 // xor dst, dst
2015 2000 emit_opcode(cbuf, 0x33);
2016 2001 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2017 2002 } else {
2018 2003 emit_opcode(cbuf, $primary + dst_enc);
2019 2004 emit_d32(cbuf, src_con);
2020 2005 }
2021 2006 %}
2022 2007
2023 2008 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
2024 2009 // Load immediate does not have a zero or sign extended version
2025 2010 // for 8-bit immediates
2026 2011 int dst_enc = $dst$$reg + 2;
2027 2012 int src_con = ((julong)($src$$constant)) >> 32;
2028 2013 if (src_con == 0) {
↓ open down ↓ |
646 lines elided |
↑ open up ↑ |
2029 2014 // xor dst, dst
2030 2015 emit_opcode(cbuf, 0x33);
2031 2016 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2032 2017 } else {
2033 2018 emit_opcode(cbuf, $primary + dst_enc);
2034 2019 emit_d32(cbuf, src_con);
2035 2020 }
2036 2021 %}
2037 2022
2038 2023
2039 - enc_class LdImmD (immD src) %{ // Load Immediate
2040 - if( is_positive_zero_double($src$$constant)) {
2041 - // FLDZ
2042 - emit_opcode(cbuf,0xD9);
2043 - emit_opcode(cbuf,0xEE);
2044 - } else if( is_positive_one_double($src$$constant)) {
2045 - // FLD1
2046 - emit_opcode(cbuf,0xD9);
2047 - emit_opcode(cbuf,0xE8);
2048 - } else {
2049 - emit_opcode(cbuf,0xDD);
2050 - emit_rm(cbuf, 0x0, 0x0, 0x5);
2051 - emit_double_constant(cbuf, $src$$constant);
2052 - }
2053 - %}
2054 -
2055 -
2056 - enc_class LdImmF (immF src) %{ // Load Immediate
2057 - if( is_positive_zero_float($src$$constant)) {
2058 - emit_opcode(cbuf,0xD9);
2059 - emit_opcode(cbuf,0xEE);
2060 - } else if( is_positive_one_float($src$$constant)) {
2061 - emit_opcode(cbuf,0xD9);
2062 - emit_opcode(cbuf,0xE8);
2063 - } else {
2064 - $$$emit8$primary;
2065 - // Load immediate does not have a zero or sign extended version
2066 - // for 8-bit immediates
2067 - // First load to TOS, then move to dst
2068 - emit_rm(cbuf, 0x0, 0x0, 0x5);
2069 - emit_float_constant(cbuf, $src$$constant);
2070 - }
2071 - %}
2072 -
2073 - enc_class LdImmX (regX dst, immXF con) %{ // Load Immediate
2074 - emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2075 - emit_float_constant(cbuf, $con$$constant);
2076 - %}
2077 -
2078 - enc_class LdImmXD (regXD dst, immXD con) %{ // Load Immediate
2079 - emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2080 - emit_double_constant(cbuf, $con$$constant);
2081 - %}
2082 -
2083 - enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
2084 - // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2085 - emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2086 - emit_opcode(cbuf, 0x0F);
2087 - emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2088 - emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2089 - emit_double_constant(cbuf, $con$$constant);
2090 - %}
2091 -
2092 - enc_class Opc_MemImm_F(immF src) %{
2093 - cbuf.set_insts_mark();
2094 - $$$emit8$primary;
2095 - emit_rm(cbuf, 0x0, $secondary, 0x5);
2096 - emit_float_constant(cbuf, $src$$constant);
2097 - %}
2098 -
2099 -
2100 2024 enc_class MovI2X_reg(regX dst, eRegI src) %{
2101 2025 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
2102 2026 emit_opcode(cbuf, 0x0F );
2103 2027 emit_opcode(cbuf, 0x6E );
2104 2028 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2105 2029 %}
2106 2030
2107 2031 enc_class MovX2I_reg(eRegI dst, regX src) %{
2108 2032 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
2109 2033 emit_opcode(cbuf, 0x0F );
2110 2034 emit_opcode(cbuf, 0x7E );
2111 2035 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2112 2036 %}
2113 2037
2114 2038 enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2115 2039 { // MOVD $dst,$src.lo
2116 2040 emit_opcode(cbuf,0x66);
2117 2041 emit_opcode(cbuf,0x0F);
2118 2042 emit_opcode(cbuf,0x6E);
2119 2043 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2120 2044 }
2121 2045 { // MOVD $tmp,$src.hi
2122 2046 emit_opcode(cbuf,0x66);
2123 2047 emit_opcode(cbuf,0x0F);
2124 2048 emit_opcode(cbuf,0x6E);
2125 2049 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2126 2050 }
2127 2051 { // PUNPCKLDQ $dst,$tmp
2128 2052 emit_opcode(cbuf,0x66);
2129 2053 emit_opcode(cbuf,0x0F);
2130 2054 emit_opcode(cbuf,0x62);
2131 2055 emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2132 2056 }
2133 2057 %}
2134 2058
2135 2059 enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2136 2060 { // MOVD $dst.lo,$src
2137 2061 emit_opcode(cbuf,0x66);
2138 2062 emit_opcode(cbuf,0x0F);
2139 2063 emit_opcode(cbuf,0x7E);
2140 2064 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2141 2065 }
2142 2066 { // PSHUFLW $tmp,$src,0x4E (01001110b)
2143 2067 emit_opcode(cbuf,0xF2);
2144 2068 emit_opcode(cbuf,0x0F);
2145 2069 emit_opcode(cbuf,0x70);
2146 2070 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2147 2071 emit_d8(cbuf, 0x4E);
2148 2072 }
2149 2073 { // MOVD $dst.hi,$tmp
2150 2074 emit_opcode(cbuf,0x66);
2151 2075 emit_opcode(cbuf,0x0F);
2152 2076 emit_opcode(cbuf,0x7E);
2153 2077 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2154 2078 }
2155 2079 %}
2156 2080
2157 2081
2158 2082 // Encode a reg-reg copy. If it is useless, then empty encoding.
2159 2083 enc_class enc_Copy( eRegI dst, eRegI src ) %{
2160 2084 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2161 2085 %}
2162 2086
2163 2087 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2164 2088 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2165 2089 %}
2166 2090
2167 2091 // Encode xmm reg-reg copy. If it is useless, then empty encoding.
2168 2092 enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2169 2093 encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2170 2094 %}
2171 2095
2172 2096 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
2173 2097 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2174 2098 %}
2175 2099
2176 2100 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
2177 2101 $$$emit8$primary;
2178 2102 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2179 2103 %}
2180 2104
2181 2105 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
2182 2106 $$$emit8$secondary;
2183 2107 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2184 2108 %}
2185 2109
2186 2110 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
2187 2111 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2188 2112 %}
2189 2113
2190 2114 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
2191 2115 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2192 2116 %}
2193 2117
2194 2118 enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2195 2119 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2196 2120 %}
2197 2121
2198 2122 enc_class Con32 (immI src) %{ // Con32(storeImmI)
2199 2123 // Output immediate
2200 2124 $$$emit32$src$$constant;
2201 2125 %}
2202 2126
2203 2127 enc_class Con32F_as_bits(immF src) %{ // storeF_imm
2204 2128 // Output Float immediate bits
2205 2129 jfloat jf = $src$$constant;
2206 2130 int jf_as_bits = jint_cast( jf );
2207 2131 emit_d32(cbuf, jf_as_bits);
2208 2132 %}
2209 2133
2210 2134 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm
2211 2135 // Output Float immediate bits
2212 2136 jfloat jf = $src$$constant;
2213 2137 int jf_as_bits = jint_cast( jf );
2214 2138 emit_d32(cbuf, jf_as_bits);
2215 2139 %}
2216 2140
2217 2141 enc_class Con16 (immI src) %{ // Con16(storeImmI)
2218 2142 // Output immediate
2219 2143 $$$emit16$src$$constant;
2220 2144 %}
2221 2145
2222 2146 enc_class Con_d32(immI src) %{
2223 2147 emit_d32(cbuf,$src$$constant);
2224 2148 %}
2225 2149
2226 2150 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
2227 2151 // Output immediate memory reference
2228 2152 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2229 2153 emit_d32(cbuf, 0x00);
2230 2154 %}
2231 2155
2232 2156 enc_class lock_prefix( ) %{
2233 2157 if( os::is_MP() )
2234 2158 emit_opcode(cbuf,0xF0); // [Lock]
2235 2159 %}
2236 2160
2237 2161 // Cmp-xchg long value.
2238 2162 // Note: we need to swap rbx, and rcx before and after the
2239 2163 // cmpxchg8 instruction because the instruction uses
2240 2164 // rcx as the high order word of the new value to store but
2241 2165 // our register encoding uses rbx,.
2242 2166 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2243 2167
2244 2168 // XCHG rbx,ecx
2245 2169 emit_opcode(cbuf,0x87);
2246 2170 emit_opcode(cbuf,0xD9);
2247 2171 // [Lock]
2248 2172 if( os::is_MP() )
2249 2173 emit_opcode(cbuf,0xF0);
2250 2174 // CMPXCHG8 [Eptr]
2251 2175 emit_opcode(cbuf,0x0F);
2252 2176 emit_opcode(cbuf,0xC7);
2253 2177 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2254 2178 // XCHG rbx,ecx
2255 2179 emit_opcode(cbuf,0x87);
2256 2180 emit_opcode(cbuf,0xD9);
2257 2181 %}
2258 2182
2259 2183 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2260 2184 // [Lock]
2261 2185 if( os::is_MP() )
2262 2186 emit_opcode(cbuf,0xF0);
2263 2187
2264 2188 // CMPXCHG [Eptr]
2265 2189 emit_opcode(cbuf,0x0F);
2266 2190 emit_opcode(cbuf,0xB1);
2267 2191 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2268 2192 %}
2269 2193
2270 2194 enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2271 2195 int res_encoding = $res$$reg;
2272 2196
2273 2197 // MOV res,0
2274 2198 emit_opcode( cbuf, 0xB8 + res_encoding);
2275 2199 emit_d32( cbuf, 0 );
2276 2200 // JNE,s fail
2277 2201 emit_opcode(cbuf,0x75);
2278 2202 emit_d8(cbuf, 5 );
2279 2203 // MOV res,1
2280 2204 emit_opcode( cbuf, 0xB8 + res_encoding);
2281 2205 emit_d32( cbuf, 1 );
2282 2206 // fail:
2283 2207 %}
2284 2208
2285 2209 enc_class set_instruction_start( ) %{
2286 2210 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2287 2211 %}
2288 2212
2289 2213 enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem
2290 2214 int reg_encoding = $ereg$$reg;
2291 2215 int base = $mem$$base;
2292 2216 int index = $mem$$index;
2293 2217 int scale = $mem$$scale;
2294 2218 int displace = $mem$$disp;
2295 2219 bool disp_is_oop = $mem->disp_is_oop();
2296 2220 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2297 2221 %}
2298 2222
2299 2223 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
2300 2224 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo
2301 2225 int base = $mem$$base;
2302 2226 int index = $mem$$index;
2303 2227 int scale = $mem$$scale;
2304 2228 int displace = $mem$$disp + 4; // Offset is 4 further in memory
2305 2229 assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2306 2230 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2307 2231 %}
2308 2232
2309 2233 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2310 2234 int r1, r2;
2311 2235 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2312 2236 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2313 2237 emit_opcode(cbuf,0x0F);
2314 2238 emit_opcode(cbuf,$tertiary);
2315 2239 emit_rm(cbuf, 0x3, r1, r2);
2316 2240 emit_d8(cbuf,$cnt$$constant);
2317 2241 emit_d8(cbuf,$primary);
2318 2242 emit_rm(cbuf, 0x3, $secondary, r1);
2319 2243 emit_d8(cbuf,$cnt$$constant);
2320 2244 %}
2321 2245
2322 2246 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2323 2247 emit_opcode( cbuf, 0x8B ); // Move
2324 2248 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2325 2249 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2326 2250 emit_d8(cbuf,$primary);
2327 2251 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2328 2252 emit_d8(cbuf,$cnt$$constant-32);
2329 2253 }
2330 2254 emit_d8(cbuf,$primary);
2331 2255 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2332 2256 emit_d8(cbuf,31);
2333 2257 %}
2334 2258
2335 2259 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2336 2260 int r1, r2;
2337 2261 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2338 2262 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2339 2263
2340 2264 emit_opcode( cbuf, 0x8B ); // Move r1,r2
2341 2265 emit_rm(cbuf, 0x3, r1, r2);
2342 2266 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2343 2267 emit_opcode(cbuf,$primary);
2344 2268 emit_rm(cbuf, 0x3, $secondary, r1);
2345 2269 emit_d8(cbuf,$cnt$$constant-32);
2346 2270 }
2347 2271 emit_opcode(cbuf,0x33); // XOR r2,r2
2348 2272 emit_rm(cbuf, 0x3, r2, r2);
2349 2273 %}
2350 2274
2351 2275 // Clone of RegMem but accepts an extra parameter to access each
2352 2276 // half of a double in memory; it never needs relocation info.
2353 2277 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2354 2278 emit_opcode(cbuf,$opcode$$constant);
2355 2279 int reg_encoding = $rm_reg$$reg;
2356 2280 int base = $mem$$base;
2357 2281 int index = $mem$$index;
2358 2282 int scale = $mem$$scale;
2359 2283 int displace = $mem$$disp + $disp_for_half$$constant;
2360 2284 bool disp_is_oop = false;
2361 2285 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2362 2286 %}
2363 2287
2364 2288 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2365 2289 //
2366 2290 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2367 2291 // and it never needs relocation information.
2368 2292 // Frequently used to move data between FPU's Stack Top and memory.
2369 2293 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2370 2294 int rm_byte_opcode = $rm_opcode$$constant;
2371 2295 int base = $mem$$base;
2372 2296 int index = $mem$$index;
2373 2297 int scale = $mem$$scale;
2374 2298 int displace = $mem$$disp;
2375 2299 assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2376 2300 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2377 2301 %}
2378 2302
2379 2303 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2380 2304 int rm_byte_opcode = $rm_opcode$$constant;
2381 2305 int base = $mem$$base;
2382 2306 int index = $mem$$index;
2383 2307 int scale = $mem$$scale;
2384 2308 int displace = $mem$$disp;
2385 2309 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2386 2310 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2387 2311 %}
2388 2312
2389 2313 enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea
2390 2314 int reg_encoding = $dst$$reg;
2391 2315 int base = $src0$$reg; // 0xFFFFFFFF indicates no base
2392 2316 int index = 0x04; // 0x04 indicates no index
2393 2317 int scale = 0x00; // 0x00 indicates no scale
2394 2318 int displace = $src1$$constant; // 0x00 indicates no displacement
2395 2319 bool disp_is_oop = false;
2396 2320 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2397 2321 %}
2398 2322
2399 2323 enc_class min_enc (eRegI dst, eRegI src) %{ // MIN
2400 2324 // Compare dst,src
2401 2325 emit_opcode(cbuf,0x3B);
2402 2326 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2403 2327 // jmp dst < src around move
2404 2328 emit_opcode(cbuf,0x7C);
2405 2329 emit_d8(cbuf,2);
2406 2330 // move dst,src
2407 2331 emit_opcode(cbuf,0x8B);
2408 2332 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2409 2333 %}
2410 2334
2411 2335 enc_class max_enc (eRegI dst, eRegI src) %{ // MAX
2412 2336 // Compare dst,src
2413 2337 emit_opcode(cbuf,0x3B);
2414 2338 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2415 2339 // jmp dst > src around move
2416 2340 emit_opcode(cbuf,0x7F);
2417 2341 emit_d8(cbuf,2);
2418 2342 // move dst,src
2419 2343 emit_opcode(cbuf,0x8B);
2420 2344 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2421 2345 %}
2422 2346
2423 2347 enc_class enc_FP_store(memory mem, regD src) %{
2424 2348 // If src is FPR1, we can just FST to store it.
2425 2349 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2426 2350 int reg_encoding = 0x2; // Just store
2427 2351 int base = $mem$$base;
2428 2352 int index = $mem$$index;
2429 2353 int scale = $mem$$scale;
2430 2354 int displace = $mem$$disp;
2431 2355 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2432 2356 if( $src$$reg != FPR1L_enc ) {
2433 2357 reg_encoding = 0x3; // Store & pop
2434 2358 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2435 2359 emit_d8( cbuf, 0xC0-1+$src$$reg );
2436 2360 }
2437 2361 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2438 2362 emit_opcode(cbuf,$primary);
2439 2363 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2440 2364 %}
2441 2365
2442 2366 enc_class neg_reg(eRegI dst) %{
2443 2367 // NEG $dst
2444 2368 emit_opcode(cbuf,0xF7);
2445 2369 emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2446 2370 %}
2447 2371
2448 2372 enc_class setLT_reg(eCXRegI dst) %{
2449 2373 // SETLT $dst
2450 2374 emit_opcode(cbuf,0x0F);
2451 2375 emit_opcode(cbuf,0x9C);
2452 2376 emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2453 2377 %}
2454 2378
2455 2379 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
2456 2380 int tmpReg = $tmp$$reg;
2457 2381
2458 2382 // SUB $p,$q
2459 2383 emit_opcode(cbuf,0x2B);
2460 2384 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2461 2385 // SBB $tmp,$tmp
2462 2386 emit_opcode(cbuf,0x1B);
2463 2387 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2464 2388 // AND $tmp,$y
2465 2389 emit_opcode(cbuf,0x23);
2466 2390 emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2467 2391 // ADD $p,$tmp
2468 2392 emit_opcode(cbuf,0x03);
2469 2393 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2470 2394 %}
2471 2395
2472 2396 enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT
2473 2397 int tmpReg = $tmp$$reg;
2474 2398
2475 2399 // SUB $p,$q
2476 2400 emit_opcode(cbuf,0x2B);
2477 2401 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2478 2402 // SBB $tmp,$tmp
2479 2403 emit_opcode(cbuf,0x1B);
2480 2404 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2481 2405 // AND $tmp,$y
2482 2406 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2483 2407 emit_opcode(cbuf,0x23);
2484 2408 int reg_encoding = tmpReg;
2485 2409 int base = $mem$$base;
2486 2410 int index = $mem$$index;
2487 2411 int scale = $mem$$scale;
2488 2412 int displace = $mem$$disp;
2489 2413 bool disp_is_oop = $mem->disp_is_oop();
2490 2414 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2491 2415 // ADD $p,$tmp
2492 2416 emit_opcode(cbuf,0x03);
2493 2417 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2494 2418 %}
2495 2419
2496 2420 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2497 2421 // TEST shift,32
2498 2422 emit_opcode(cbuf,0xF7);
2499 2423 emit_rm(cbuf, 0x3, 0, ECX_enc);
2500 2424 emit_d32(cbuf,0x20);
2501 2425 // JEQ,s small
2502 2426 emit_opcode(cbuf, 0x74);
2503 2427 emit_d8(cbuf, 0x04);
2504 2428 // MOV $dst.hi,$dst.lo
2505 2429 emit_opcode( cbuf, 0x8B );
2506 2430 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2507 2431 // CLR $dst.lo
2508 2432 emit_opcode(cbuf, 0x33);
2509 2433 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2510 2434 // small:
2511 2435 // SHLD $dst.hi,$dst.lo,$shift
2512 2436 emit_opcode(cbuf,0x0F);
2513 2437 emit_opcode(cbuf,0xA5);
2514 2438 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2515 2439 // SHL $dst.lo,$shift"
2516 2440 emit_opcode(cbuf,0xD3);
2517 2441 emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2518 2442 %}
2519 2443
2520 2444 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2521 2445 // TEST shift,32
2522 2446 emit_opcode(cbuf,0xF7);
2523 2447 emit_rm(cbuf, 0x3, 0, ECX_enc);
2524 2448 emit_d32(cbuf,0x20);
2525 2449 // JEQ,s small
2526 2450 emit_opcode(cbuf, 0x74);
2527 2451 emit_d8(cbuf, 0x04);
2528 2452 // MOV $dst.lo,$dst.hi
2529 2453 emit_opcode( cbuf, 0x8B );
2530 2454 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2531 2455 // CLR $dst.hi
2532 2456 emit_opcode(cbuf, 0x33);
2533 2457 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2534 2458 // small:
2535 2459 // SHRD $dst.lo,$dst.hi,$shift
2536 2460 emit_opcode(cbuf,0x0F);
2537 2461 emit_opcode(cbuf,0xAD);
2538 2462 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2539 2463 // SHR $dst.hi,$shift"
2540 2464 emit_opcode(cbuf,0xD3);
2541 2465 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2542 2466 %}
2543 2467
2544 2468 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2545 2469 // TEST shift,32
2546 2470 emit_opcode(cbuf,0xF7);
2547 2471 emit_rm(cbuf, 0x3, 0, ECX_enc);
2548 2472 emit_d32(cbuf,0x20);
2549 2473 // JEQ,s small
2550 2474 emit_opcode(cbuf, 0x74);
2551 2475 emit_d8(cbuf, 0x05);
2552 2476 // MOV $dst.lo,$dst.hi
2553 2477 emit_opcode( cbuf, 0x8B );
2554 2478 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2555 2479 // SAR $dst.hi,31
2556 2480 emit_opcode(cbuf, 0xC1);
2557 2481 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2558 2482 emit_d8(cbuf, 0x1F );
2559 2483 // small:
2560 2484 // SHRD $dst.lo,$dst.hi,$shift
2561 2485 emit_opcode(cbuf,0x0F);
2562 2486 emit_opcode(cbuf,0xAD);
2563 2487 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2564 2488 // SAR $dst.hi,$shift"
2565 2489 emit_opcode(cbuf,0xD3);
2566 2490 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2567 2491 %}
2568 2492
2569 2493
2570 2494 // ----------------- Encodings for floating point unit -----------------
2571 2495 // May leave result in FPU-TOS or FPU reg depending on opcodes
2572 2496 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV
2573 2497 $$$emit8$primary;
2574 2498 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2575 2499 %}
2576 2500
2577 2501 // Pop argument in FPR0 with FSTP ST(0)
2578 2502 enc_class PopFPU() %{
2579 2503 emit_opcode( cbuf, 0xDD );
2580 2504 emit_d8( cbuf, 0xD8 );
2581 2505 %}
2582 2506
2583 2507 // !!!!! equivalent to Pop_Reg_F
2584 2508 enc_class Pop_Reg_D( regD dst ) %{
2585 2509 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2586 2510 emit_d8( cbuf, 0xD8+$dst$$reg );
2587 2511 %}
2588 2512
2589 2513 enc_class Push_Reg_D( regD dst ) %{
2590 2514 emit_opcode( cbuf, 0xD9 );
2591 2515 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2592 2516 %}
2593 2517
2594 2518 enc_class strictfp_bias1( regD dst ) %{
2595 2519 emit_opcode( cbuf, 0xDB ); // FLD m80real
2596 2520 emit_opcode( cbuf, 0x2D );
2597 2521 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2598 2522 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2599 2523 emit_opcode( cbuf, 0xC8+$dst$$reg );
2600 2524 %}
2601 2525
2602 2526 enc_class strictfp_bias2( regD dst ) %{
2603 2527 emit_opcode( cbuf, 0xDB ); // FLD m80real
2604 2528 emit_opcode( cbuf, 0x2D );
2605 2529 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2606 2530 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2607 2531 emit_opcode( cbuf, 0xC8+$dst$$reg );
2608 2532 %}
2609 2533
2610 2534 // Special case for moving an integer register to a stack slot.
2611 2535 enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2612 2536 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2613 2537 %}
2614 2538
2615 2539 // Special case for moving a register to a stack slot.
2616 2540 enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2617 2541 // Opcode already emitted
2618 2542 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
2619 2543 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
2620 2544 emit_d32(cbuf, $dst$$disp); // Displacement
2621 2545 %}
2622 2546
2623 2547 // Push the integer in stackSlot 'src' onto FP-stack
2624 2548 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2625 2549 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2626 2550 %}
2627 2551
2628 2552 // Push the float in stackSlot 'src' onto FP-stack
2629 2553 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src]
2630 2554 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2631 2555 %}
2632 2556
2633 2557 // Push the double in stackSlot 'src' onto FP-stack
2634 2558 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src]
2635 2559 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2636 2560 %}
2637 2561
2638 2562 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2639 2563 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2640 2564 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2641 2565 %}
2642 2566
2643 2567 // Same as Pop_Mem_F except for opcode
2644 2568 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2645 2569 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2646 2570 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2647 2571 %}
2648 2572
2649 2573 enc_class Pop_Reg_F( regF dst ) %{
2650 2574 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2651 2575 emit_d8( cbuf, 0xD8+$dst$$reg );
2652 2576 %}
2653 2577
2654 2578 enc_class Push_Reg_F( regF dst ) %{
2655 2579 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2656 2580 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2657 2581 %}
2658 2582
2659 2583 // Push FPU's float to a stack-slot, and pop FPU-stack
2660 2584 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2661 2585 int pop = 0x02;
2662 2586 if ($src$$reg != FPR1L_enc) {
2663 2587 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2664 2588 emit_d8( cbuf, 0xC0-1+$src$$reg );
2665 2589 pop = 0x03;
2666 2590 }
2667 2591 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2668 2592 %}
2669 2593
2670 2594 // Push FPU's double to a stack-slot, and pop FPU-stack
2671 2595 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2672 2596 int pop = 0x02;
2673 2597 if ($src$$reg != FPR1L_enc) {
2674 2598 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2675 2599 emit_d8( cbuf, 0xC0-1+$src$$reg );
2676 2600 pop = 0x03;
2677 2601 }
2678 2602 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2679 2603 %}
2680 2604
2681 2605 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2682 2606 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2683 2607 int pop = 0xD0 - 1; // -1 since we skip FLD
2684 2608 if ($src$$reg != FPR1L_enc) {
2685 2609 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2686 2610 emit_d8( cbuf, 0xC0-1+$src$$reg );
2687 2611 pop = 0xD8;
2688 2612 }
2689 2613 emit_opcode( cbuf, 0xDD );
2690 2614 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2691 2615 %}
2692 2616
2693 2617
2694 2618 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2695 2619 MacroAssembler masm(&cbuf);
2696 2620 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg
2697 2621 masm.fmul( $src2$$reg+0); // value at TOS
2698 2622 masm.fadd( $src$$reg+0); // value at TOS
2699 2623 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store
2700 2624 %}
2701 2625
2702 2626
2703 2627 enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2704 2628 // load dst in FPR0
2705 2629 emit_opcode( cbuf, 0xD9 );
2706 2630 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2707 2631 if ($src$$reg != FPR1L_enc) {
2708 2632 // fincstp
2709 2633 emit_opcode (cbuf, 0xD9);
2710 2634 emit_opcode (cbuf, 0xF7);
2711 2635 // swap src with FPR1:
2712 2636 // FXCH FPR1 with src
2713 2637 emit_opcode(cbuf, 0xD9);
2714 2638 emit_d8(cbuf, 0xC8-1+$src$$reg );
2715 2639 // fdecstp
2716 2640 emit_opcode (cbuf, 0xD9);
2717 2641 emit_opcode (cbuf, 0xF6);
2718 2642 }
2719 2643 %}
2720 2644
2721 2645 enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2722 2646 // Allocate a word
2723 2647 emit_opcode(cbuf,0x83); // SUB ESP,8
2724 2648 emit_opcode(cbuf,0xEC);
2725 2649 emit_d8(cbuf,0x08);
2726 2650
2727 2651 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1
2728 2652 emit_opcode (cbuf, 0x0F );
2729 2653 emit_opcode (cbuf, 0x11 );
2730 2654 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2731 2655
2732 2656 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2733 2657 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2734 2658
2735 2659 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0
2736 2660 emit_opcode (cbuf, 0x0F );
2737 2661 emit_opcode (cbuf, 0x11 );
2738 2662 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2739 2663
2740 2664 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2741 2665 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2742 2666
2743 2667 %}
2744 2668
2745 2669 enc_class Push_ModX_encoding( regX src0, regX src1) %{
2746 2670 // Allocate a word
2747 2671 emit_opcode(cbuf,0x83); // SUB ESP,4
2748 2672 emit_opcode(cbuf,0xEC);
2749 2673 emit_d8(cbuf,0x04);
2750 2674
2751 2675 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1
2752 2676 emit_opcode (cbuf, 0x0F );
2753 2677 emit_opcode (cbuf, 0x11 );
2754 2678 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2755 2679
2756 2680 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2757 2681 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2758 2682
2759 2683 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0
2760 2684 emit_opcode (cbuf, 0x0F );
2761 2685 emit_opcode (cbuf, 0x11 );
2762 2686 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2763 2687
2764 2688 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2765 2689 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2766 2690
2767 2691 %}
2768 2692
2769 2693 enc_class Push_ResultXD(regXD dst) %{
2770 2694 store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2771 2695
2772 2696 // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2773 2697 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2774 2698 emit_opcode (cbuf, 0x0F );
2775 2699 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2776 2700 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2777 2701
2778 2702 emit_opcode(cbuf,0x83); // ADD ESP,8
2779 2703 emit_opcode(cbuf,0xC4);
2780 2704 emit_d8(cbuf,0x08);
2781 2705 %}
2782 2706
2783 2707 enc_class Push_ResultX(regX dst, immI d8) %{
2784 2708 store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2785 2709
2786 2710 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
2787 2711 emit_opcode (cbuf, 0x0F );
2788 2712 emit_opcode (cbuf, 0x10 );
2789 2713 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2790 2714
2791 2715 emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8)
2792 2716 emit_opcode(cbuf,0xC4);
2793 2717 emit_d8(cbuf,$d8$$constant);
2794 2718 %}
2795 2719
2796 2720 enc_class Push_SrcXD(regXD src) %{
2797 2721 // Allocate a word
2798 2722 emit_opcode(cbuf,0x83); // SUB ESP,8
2799 2723 emit_opcode(cbuf,0xEC);
2800 2724 emit_d8(cbuf,0x08);
2801 2725
2802 2726 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
2803 2727 emit_opcode (cbuf, 0x0F );
2804 2728 emit_opcode (cbuf, 0x11 );
2805 2729 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2806 2730
2807 2731 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2808 2732 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2809 2733 %}
2810 2734
2811 2735 enc_class push_stack_temp_qword() %{
2812 2736 emit_opcode(cbuf,0x83); // SUB ESP,8
2813 2737 emit_opcode(cbuf,0xEC);
2814 2738 emit_d8 (cbuf,0x08);
2815 2739 %}
2816 2740
2817 2741 enc_class pop_stack_temp_qword() %{
2818 2742 emit_opcode(cbuf,0x83); // ADD ESP,8
2819 2743 emit_opcode(cbuf,0xC4);
2820 2744 emit_d8 (cbuf,0x08);
2821 2745 %}
2822 2746
2823 2747 enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2824 2748 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src
2825 2749 emit_opcode (cbuf, 0x0F );
2826 2750 emit_opcode (cbuf, 0x11 );
2827 2751 encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2828 2752
2829 2753 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2830 2754 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2831 2755 %}
2832 2756
2833 2757 // Compute X^Y using Intel's fast hardware instructions, if possible.
2834 2758 // Otherwise return a NaN.
2835 2759 enc_class pow_exp_core_encoding %{
2836 2760 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X))
2837 2761 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q
2838 2762 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q
2839 2763 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q)
2840 2764 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q)
2841 2765 emit_opcode(cbuf,0x1C);
2842 2766 emit_d8(cbuf,0x24);
2843 2767 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1
2844 2768 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1
2845 2769 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q)
2846 2770 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q)
2847 2771 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2848 2772 emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask
2849 2773 emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2850 2774 emit_d32(cbuf,0xFFFFF800);
2851 2775 emit_opcode(cbuf,0x81); // add rax,1023 - the double exponent bias
2852 2776 emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2853 2777 emit_d32(cbuf,1023);
2854 2778 emit_opcode(cbuf,0x8B); // mov rbx,eax
2855 2779 emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2856 2780 emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position
2857 2781 emit_rm(cbuf,0x3,0x4,EAX_enc);
2858 2782 emit_d8(cbuf,20);
2859 2783 emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow
2860 2784 emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2861 2785 emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX
2862 2786 emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2863 2787 emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word
2864 2788 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2865 2789 emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2866 2790 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2867 2791 emit_d32(cbuf,0);
2868 2792 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2869 2793 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2870 2794 %}
2871 2795
2872 2796 // enc_class Pop_Reg_Mod_D( regD dst, regD src)
2873 2797 // was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2874 2798
2875 2799 enc_class Push_Result_Mod_D( regD src) %{
2876 2800 if ($src$$reg != FPR1L_enc) {
2877 2801 // fincstp
2878 2802 emit_opcode (cbuf, 0xD9);
2879 2803 emit_opcode (cbuf, 0xF7);
2880 2804 // FXCH FPR1 with src
2881 2805 emit_opcode(cbuf, 0xD9);
2882 2806 emit_d8(cbuf, 0xC8-1+$src$$reg );
2883 2807 // fdecstp
2884 2808 emit_opcode (cbuf, 0xD9);
2885 2809 emit_opcode (cbuf, 0xF6);
2886 2810 }
2887 2811 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2888 2812 // // FSTP FPR$dst$$reg
2889 2813 // emit_opcode( cbuf, 0xDD );
2890 2814 // emit_d8( cbuf, 0xD8+$dst$$reg );
2891 2815 %}
2892 2816
2893 2817 enc_class fnstsw_sahf_skip_parity() %{
2894 2818 // fnstsw ax
2895 2819 emit_opcode( cbuf, 0xDF );
2896 2820 emit_opcode( cbuf, 0xE0 );
2897 2821 // sahf
2898 2822 emit_opcode( cbuf, 0x9E );
2899 2823 // jnp ::skip
2900 2824 emit_opcode( cbuf, 0x7B );
2901 2825 emit_opcode( cbuf, 0x05 );
2902 2826 %}
2903 2827
2904 2828 enc_class emitModD() %{
2905 2829 // fprem must be iterative
2906 2830 // :: loop
2907 2831 // fprem
2908 2832 emit_opcode( cbuf, 0xD9 );
2909 2833 emit_opcode( cbuf, 0xF8 );
2910 2834 // wait
2911 2835 emit_opcode( cbuf, 0x9b );
2912 2836 // fnstsw ax
2913 2837 emit_opcode( cbuf, 0xDF );
2914 2838 emit_opcode( cbuf, 0xE0 );
2915 2839 // sahf
2916 2840 emit_opcode( cbuf, 0x9E );
2917 2841 // jp ::loop
2918 2842 emit_opcode( cbuf, 0x0F );
2919 2843 emit_opcode( cbuf, 0x8A );
2920 2844 emit_opcode( cbuf, 0xF4 );
2921 2845 emit_opcode( cbuf, 0xFF );
2922 2846 emit_opcode( cbuf, 0xFF );
2923 2847 emit_opcode( cbuf, 0xFF );
2924 2848 %}
2925 2849
2926 2850 enc_class fpu_flags() %{
2927 2851 // fnstsw_ax
2928 2852 emit_opcode( cbuf, 0xDF);
2929 2853 emit_opcode( cbuf, 0xE0);
2930 2854 // test ax,0x0400
2931 2855 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate
2932 2856 emit_opcode( cbuf, 0xA9 );
2933 2857 emit_d16 ( cbuf, 0x0400 );
2934 2858 // // // This sequence works, but stalls for 12-16 cycles on PPro
2935 2859 // // test rax,0x0400
2936 2860 // emit_opcode( cbuf, 0xA9 );
2937 2861 // emit_d32 ( cbuf, 0x00000400 );
2938 2862 //
2939 2863 // jz exit (no unordered comparison)
2940 2864 emit_opcode( cbuf, 0x74 );
2941 2865 emit_d8 ( cbuf, 0x02 );
2942 2866 // mov ah,1 - treat as LT case (set carry flag)
2943 2867 emit_opcode( cbuf, 0xB4 );
2944 2868 emit_d8 ( cbuf, 0x01 );
2945 2869 // sahf
2946 2870 emit_opcode( cbuf, 0x9E);
2947 2871 %}
2948 2872
2949 2873 enc_class cmpF_P6_fixup() %{
2950 2874 // Fixup the integer flags in case comparison involved a NaN
2951 2875 //
2952 2876 // JNP exit (no unordered comparison, P-flag is set by NaN)
2953 2877 emit_opcode( cbuf, 0x7B );
2954 2878 emit_d8 ( cbuf, 0x03 );
2955 2879 // MOV AH,1 - treat as LT case (set carry flag)
2956 2880 emit_opcode( cbuf, 0xB4 );
2957 2881 emit_d8 ( cbuf, 0x01 );
2958 2882 // SAHF
2959 2883 emit_opcode( cbuf, 0x9E);
2960 2884 // NOP // target for branch to avoid branch to branch
2961 2885 emit_opcode( cbuf, 0x90);
2962 2886 %}
2963 2887
2964 2888 // fnstsw_ax();
2965 2889 // sahf();
2966 2890 // movl(dst, nan_result);
2967 2891 // jcc(Assembler::parity, exit);
2968 2892 // movl(dst, less_result);
2969 2893 // jcc(Assembler::below, exit);
2970 2894 // movl(dst, equal_result);
2971 2895 // jcc(Assembler::equal, exit);
2972 2896 // movl(dst, greater_result);
2973 2897
2974 2898 // less_result = 1;
2975 2899 // greater_result = -1;
2976 2900 // equal_result = 0;
2977 2901 // nan_result = -1;
2978 2902
2979 2903 enc_class CmpF_Result(eRegI dst) %{
2980 2904 // fnstsw_ax();
2981 2905 emit_opcode( cbuf, 0xDF);
2982 2906 emit_opcode( cbuf, 0xE0);
2983 2907 // sahf
2984 2908 emit_opcode( cbuf, 0x9E);
2985 2909 // movl(dst, nan_result);
2986 2910 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2987 2911 emit_d32( cbuf, -1 );
2988 2912 // jcc(Assembler::parity, exit);
2989 2913 emit_opcode( cbuf, 0x7A );
2990 2914 emit_d8 ( cbuf, 0x13 );
2991 2915 // movl(dst, less_result);
2992 2916 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2993 2917 emit_d32( cbuf, -1 );
2994 2918 // jcc(Assembler::below, exit);
2995 2919 emit_opcode( cbuf, 0x72 );
2996 2920 emit_d8 ( cbuf, 0x0C );
2997 2921 // movl(dst, equal_result);
2998 2922 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2999 2923 emit_d32( cbuf, 0 );
3000 2924 // jcc(Assembler::equal, exit);
3001 2925 emit_opcode( cbuf, 0x74 );
3002 2926 emit_d8 ( cbuf, 0x05 );
3003 2927 // movl(dst, greater_result);
3004 2928 emit_opcode( cbuf, 0xB8 + $dst$$reg);
3005 2929 emit_d32( cbuf, 1 );
3006 2930 %}
3007 2931
3008 2932
3009 2933 // XMM version of CmpF_Result. Because the XMM compare
3010 2934 // instructions set the EFLAGS directly. It becomes simpler than
3011 2935 // the float version above.
3012 2936 enc_class CmpX_Result(eRegI dst) %{
3013 2937 MacroAssembler _masm(&cbuf);
3014 2938 Label nan, inc, done;
3015 2939
3016 2940 __ jccb(Assembler::parity, nan);
3017 2941 __ jccb(Assembler::equal, done);
3018 2942 __ jccb(Assembler::above, inc);
3019 2943 __ bind(nan);
3020 2944 __ decrement(as_Register($dst$$reg)); // NO L qqq
3021 2945 __ jmpb(done);
3022 2946 __ bind(inc);
3023 2947 __ increment(as_Register($dst$$reg)); // NO L qqq
3024 2948 __ bind(done);
3025 2949 %}
3026 2950
3027 2951 // Compare the longs and set flags
3028 2952 // BROKEN! Do Not use as-is
3029 2953 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
3030 2954 // CMP $src1.hi,$src2.hi
3031 2955 emit_opcode( cbuf, 0x3B );
3032 2956 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3033 2957 // JNE,s done
3034 2958 emit_opcode(cbuf,0x75);
3035 2959 emit_d8(cbuf, 2 );
3036 2960 // CMP $src1.lo,$src2.lo
3037 2961 emit_opcode( cbuf, 0x3B );
3038 2962 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3039 2963 // done:
3040 2964 %}
3041 2965
3042 2966 enc_class convert_int_long( regL dst, eRegI src ) %{
3043 2967 // mov $dst.lo,$src
3044 2968 int dst_encoding = $dst$$reg;
3045 2969 int src_encoding = $src$$reg;
3046 2970 encode_Copy( cbuf, dst_encoding , src_encoding );
3047 2971 // mov $dst.hi,$src
3048 2972 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
3049 2973 // sar $dst.hi,31
3050 2974 emit_opcode( cbuf, 0xC1 );
3051 2975 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
3052 2976 emit_d8(cbuf, 0x1F );
3053 2977 %}
3054 2978
3055 2979 enc_class convert_long_double( eRegL src ) %{
3056 2980 // push $src.hi
3057 2981 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3058 2982 // push $src.lo
3059 2983 emit_opcode(cbuf, 0x50+$src$$reg );
3060 2984 // fild 64-bits at [SP]
3061 2985 emit_opcode(cbuf,0xdf);
3062 2986 emit_d8(cbuf, 0x6C);
3063 2987 emit_d8(cbuf, 0x24);
3064 2988 emit_d8(cbuf, 0x00);
3065 2989 // pop stack
3066 2990 emit_opcode(cbuf, 0x83); // add SP, #8
3067 2991 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3068 2992 emit_d8(cbuf, 0x8);
3069 2993 %}
3070 2994
3071 2995 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
3072 2996 // IMUL EDX:EAX,$src1
3073 2997 emit_opcode( cbuf, 0xF7 );
3074 2998 emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
3075 2999 // SAR EDX,$cnt-32
3076 3000 int shift_count = ((int)$cnt$$constant) - 32;
3077 3001 if (shift_count > 0) {
3078 3002 emit_opcode(cbuf, 0xC1);
3079 3003 emit_rm(cbuf, 0x3, 7, $dst$$reg );
3080 3004 emit_d8(cbuf, shift_count);
3081 3005 }
3082 3006 %}
3083 3007
3084 3008 // this version doesn't have add sp, 8
3085 3009 enc_class convert_long_double2( eRegL src ) %{
3086 3010 // push $src.hi
3087 3011 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3088 3012 // push $src.lo
3089 3013 emit_opcode(cbuf, 0x50+$src$$reg );
3090 3014 // fild 64-bits at [SP]
3091 3015 emit_opcode(cbuf,0xdf);
3092 3016 emit_d8(cbuf, 0x6C);
3093 3017 emit_d8(cbuf, 0x24);
3094 3018 emit_d8(cbuf, 0x00);
3095 3019 %}
3096 3020
3097 3021 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
3098 3022 // Basic idea: long = (long)int * (long)int
3099 3023 // IMUL EDX:EAX, src
3100 3024 emit_opcode( cbuf, 0xF7 );
3101 3025 emit_rm( cbuf, 0x3, 0x5, $src$$reg);
3102 3026 %}
3103 3027
3104 3028 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
3105 3029 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
3106 3030 // MUL EDX:EAX, src
3107 3031 emit_opcode( cbuf, 0xF7 );
3108 3032 emit_rm( cbuf, 0x3, 0x4, $src$$reg);
3109 3033 %}
3110 3034
3111 3035 enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
3112 3036 // Basic idea: lo(result) = lo(x_lo * y_lo)
3113 3037 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
3114 3038 // MOV $tmp,$src.lo
3115 3039 encode_Copy( cbuf, $tmp$$reg, $src$$reg );
3116 3040 // IMUL $tmp,EDX
3117 3041 emit_opcode( cbuf, 0x0F );
3118 3042 emit_opcode( cbuf, 0xAF );
3119 3043 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3120 3044 // MOV EDX,$src.hi
3121 3045 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3122 3046 // IMUL EDX,EAX
3123 3047 emit_opcode( cbuf, 0x0F );
3124 3048 emit_opcode( cbuf, 0xAF );
3125 3049 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3126 3050 // ADD $tmp,EDX
3127 3051 emit_opcode( cbuf, 0x03 );
3128 3052 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3129 3053 // MUL EDX:EAX,$src.lo
3130 3054 emit_opcode( cbuf, 0xF7 );
3131 3055 emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3132 3056 // ADD EDX,ESI
3133 3057 emit_opcode( cbuf, 0x03 );
3134 3058 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
3135 3059 %}
3136 3060
3137 3061 enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
3138 3062 // Basic idea: lo(result) = lo(src * y_lo)
3139 3063 // hi(result) = hi(src * y_lo) + lo(src * y_hi)
3140 3064 // IMUL $tmp,EDX,$src
3141 3065 emit_opcode( cbuf, 0x6B );
3142 3066 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3143 3067 emit_d8( cbuf, (int)$src$$constant );
3144 3068 // MOV EDX,$src
3145 3069 emit_opcode(cbuf, 0xB8 + EDX_enc);
3146 3070 emit_d32( cbuf, (int)$src$$constant );
3147 3071 // MUL EDX:EAX,EDX
3148 3072 emit_opcode( cbuf, 0xF7 );
3149 3073 emit_rm( cbuf, 0x3, 0x4, EDX_enc );
3150 3074 // ADD EDX,ESI
3151 3075 emit_opcode( cbuf, 0x03 );
3152 3076 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
3153 3077 %}
3154 3078
3155 3079 enc_class long_div( eRegL src1, eRegL src2 ) %{
3156 3080 // PUSH src1.hi
3157 3081 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3158 3082 // PUSH src1.lo
3159 3083 emit_opcode(cbuf, 0x50+$src1$$reg );
3160 3084 // PUSH src2.hi
3161 3085 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3162 3086 // PUSH src2.lo
3163 3087 emit_opcode(cbuf, 0x50+$src2$$reg );
3164 3088 // CALL directly to the runtime
3165 3089 cbuf.set_insts_mark();
3166 3090 emit_opcode(cbuf,0xE8); // Call into runtime
3167 3091 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3168 3092 // Restore stack
3169 3093 emit_opcode(cbuf, 0x83); // add SP, #framesize
3170 3094 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3171 3095 emit_d8(cbuf, 4*4);
3172 3096 %}
3173 3097
3174 3098 enc_class long_mod( eRegL src1, eRegL src2 ) %{
3175 3099 // PUSH src1.hi
3176 3100 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3177 3101 // PUSH src1.lo
3178 3102 emit_opcode(cbuf, 0x50+$src1$$reg );
3179 3103 // PUSH src2.hi
3180 3104 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3181 3105 // PUSH src2.lo
3182 3106 emit_opcode(cbuf, 0x50+$src2$$reg );
3183 3107 // CALL directly to the runtime
3184 3108 cbuf.set_insts_mark();
3185 3109 emit_opcode(cbuf,0xE8); // Call into runtime
3186 3110 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3187 3111 // Restore stack
3188 3112 emit_opcode(cbuf, 0x83); // add SP, #framesize
3189 3113 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3190 3114 emit_d8(cbuf, 4*4);
3191 3115 %}
3192 3116
3193 3117 enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
3194 3118 // MOV $tmp,$src.lo
3195 3119 emit_opcode(cbuf, 0x8B);
3196 3120 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
3197 3121 // OR $tmp,$src.hi
3198 3122 emit_opcode(cbuf, 0x0B);
3199 3123 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
3200 3124 %}
3201 3125
3202 3126 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
3203 3127 // CMP $src1.lo,$src2.lo
3204 3128 emit_opcode( cbuf, 0x3B );
3205 3129 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3206 3130 // JNE,s skip
3207 3131 emit_cc(cbuf, 0x70, 0x5);
3208 3132 emit_d8(cbuf,2);
3209 3133 // CMP $src1.hi,$src2.hi
3210 3134 emit_opcode( cbuf, 0x3B );
3211 3135 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3212 3136 %}
3213 3137
3214 3138 enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
3215 3139 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
3216 3140 emit_opcode( cbuf, 0x3B );
3217 3141 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3218 3142 // MOV $tmp,$src1.hi
3219 3143 emit_opcode( cbuf, 0x8B );
3220 3144 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
3221 3145 // SBB $tmp,$src2.hi\t! Compute flags for long compare
3222 3146 emit_opcode( cbuf, 0x1B );
3223 3147 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
3224 3148 %}
3225 3149
3226 3150 enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
3227 3151 // XOR $tmp,$tmp
3228 3152 emit_opcode(cbuf,0x33); // XOR
3229 3153 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3230 3154 // CMP $tmp,$src.lo
3231 3155 emit_opcode( cbuf, 0x3B );
3232 3156 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3233 3157 // SBB $tmp,$src.hi
3234 3158 emit_opcode( cbuf, 0x1B );
3235 3159 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3236 3160 %}
3237 3161
3238 3162 // Sniff, sniff... smells like Gnu Superoptimizer
3239 3163 enc_class neg_long( eRegL dst ) %{
3240 3164 emit_opcode(cbuf,0xF7); // NEG hi
3241 3165 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3242 3166 emit_opcode(cbuf,0xF7); // NEG lo
3243 3167 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
3244 3168 emit_opcode(cbuf,0x83); // SBB hi,0
3245 3169 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3246 3170 emit_d8 (cbuf,0 );
3247 3171 %}
3248 3172
3249 3173 enc_class movq_ld(regXD dst, memory mem) %{
3250 3174 MacroAssembler _masm(&cbuf);
3251 3175 __ movq($dst$$XMMRegister, $mem$$Address);
3252 3176 %}
3253 3177
3254 3178 enc_class movq_st(memory mem, regXD src) %{
3255 3179 MacroAssembler _masm(&cbuf);
3256 3180 __ movq($mem$$Address, $src$$XMMRegister);
3257 3181 %}
3258 3182
3259 3183 enc_class pshufd_8x8(regX dst, regX src) %{
3260 3184 MacroAssembler _masm(&cbuf);
3261 3185
3262 3186 encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3263 3187 __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3264 3188 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3265 3189 %}
3266 3190
3267 3191 enc_class pshufd_4x16(regX dst, regX src) %{
3268 3192 MacroAssembler _masm(&cbuf);
3269 3193
3270 3194 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3271 3195 %}
3272 3196
3273 3197 enc_class pshufd(regXD dst, regXD src, int mode) %{
3274 3198 MacroAssembler _masm(&cbuf);
3275 3199
3276 3200 __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3277 3201 %}
3278 3202
3279 3203 enc_class pxor(regXD dst, regXD src) %{
3280 3204 MacroAssembler _masm(&cbuf);
3281 3205
3282 3206 __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3283 3207 %}
3284 3208
3285 3209 enc_class mov_i2x(regXD dst, eRegI src) %{
3286 3210 MacroAssembler _masm(&cbuf);
3287 3211
3288 3212 __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3289 3213 %}
3290 3214
3291 3215
3292 3216 // Because the transitions from emitted code to the runtime
3293 3217 // monitorenter/exit helper stubs are so slow it's critical that
3294 3218 // we inline both the stack-locking fast-path and the inflated fast path.
3295 3219 //
3296 3220 // See also: cmpFastLock and cmpFastUnlock.
3297 3221 //
3298 3222 // What follows is a specialized inline transliteration of the code
3299 3223 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
3300 3224 // another option would be to emit TrySlowEnter and TrySlowExit methods
3301 3225 // at startup-time. These methods would accept arguments as
3302 3226 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3303 3227 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
3304 3228 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3305 3229 // In practice, however, the # of lock sites is bounded and is usually small.
3306 3230 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3307 3231 // if the processor uses simple bimodal branch predictors keyed by EIP
3308 3232 // Since the helper routines would be called from multiple synchronization
3309 3233 // sites.
3310 3234 //
3311 3235 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3312 3236 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3313 3237 // to those specialized methods. That'd give us a mostly platform-independent
3314 3238 // implementation that the JITs could optimize and inline at their pleasure.
3315 3239 // Done correctly, the only time we'd need to cross to native could would be
3316 3240 // to park() or unpark() threads. We'd also need a few more unsafe operators
3317 3241 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3318 3242 // (b) explicit barriers or fence operations.
3319 3243 //
3320 3244 // TODO:
3321 3245 //
3322 3246 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3323 3247 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3324 3248 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
3325 3249 // the lock operators would typically be faster than reifying Self.
3326 3250 //
3327 3251 // * Ideally I'd define the primitives as:
3328 3252 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3329 3253 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3330 3254 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
3331 3255 // Instead, we're stuck with a rather awkward and brittle register assignments below.
3332 3256 // Furthermore the register assignments are overconstrained, possibly resulting in
3333 3257 // sub-optimal code near the synchronization site.
3334 3258 //
3335 3259 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
3336 3260 // Alternately, use a better sp-proximity test.
3337 3261 //
3338 3262 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3339 3263 // Either one is sufficient to uniquely identify a thread.
3340 3264 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3341 3265 //
3342 3266 // * Intrinsify notify() and notifyAll() for the common cases where the
3343 3267 // object is locked by the calling thread but the waitlist is empty.
3344 3268 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3345 3269 //
3346 3270 // * use jccb and jmpb instead of jcc and jmp to improve code density.
3347 3271 // But beware of excessive branch density on AMD Opterons.
3348 3272 //
3349 3273 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3350 3274 // or failure of the fast-path. If the fast-path fails then we pass
3351 3275 // control to the slow-path, typically in C. In Fast_Lock and
3352 3276 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3353 3277 // will emit a conditional branch immediately after the node.
3354 3278 // So we have branches to branches and lots of ICC.ZF games.
3355 3279 // Instead, it might be better to have C2 pass a "FailureLabel"
3356 3280 // into Fast_Lock and Fast_Unlock. In the case of success, control
3357 3281 // will drop through the node. ICC.ZF is undefined at exit.
3358 3282 // In the case of failure, the node will branch directly to the
3359 3283 // FailureLabel
3360 3284
3361 3285
3362 3286 // obj: object to lock
3363 3287 // box: on-stack box address (displaced header location) - KILLED
3364 3288 // rax,: tmp -- KILLED
3365 3289 // scr: tmp -- KILLED
3366 3290 enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3367 3291
3368 3292 Register objReg = as_Register($obj$$reg);
3369 3293 Register boxReg = as_Register($box$$reg);
3370 3294 Register tmpReg = as_Register($tmp$$reg);
3371 3295 Register scrReg = as_Register($scr$$reg);
3372 3296
3373 3297 // Ensure the register assignents are disjoint
3374 3298 guarantee (objReg != boxReg, "") ;
3375 3299 guarantee (objReg != tmpReg, "") ;
3376 3300 guarantee (objReg != scrReg, "") ;
3377 3301 guarantee (boxReg != tmpReg, "") ;
3378 3302 guarantee (boxReg != scrReg, "") ;
3379 3303 guarantee (tmpReg == as_Register(EAX_enc), "") ;
3380 3304
3381 3305 MacroAssembler masm(&cbuf);
3382 3306
3383 3307 if (_counters != NULL) {
3384 3308 masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3385 3309 }
3386 3310 if (EmitSync & 1) {
3387 3311 // set box->dhw = unused_mark (3)
3388 3312 // Force all sync thru slow-path: slow_enter() and slow_exit()
3389 3313 masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;
3390 3314 masm.cmpptr (rsp, (int32_t)0) ;
3391 3315 } else
3392 3316 if (EmitSync & 2) {
3393 3317 Label DONE_LABEL ;
3394 3318 if (UseBiasedLocking) {
3395 3319 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3396 3320 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3397 3321 }
3398 3322
3399 3323 masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword
3400 3324 masm.orptr (tmpReg, 0x1);
3401 3325 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
3402 3326 if (os::is_MP()) { masm.lock(); }
3403 3327 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3404 3328 masm.jcc(Assembler::equal, DONE_LABEL);
3405 3329 // Recursive locking
3406 3330 masm.subptr(tmpReg, rsp);
3407 3331 masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3408 3332 masm.movptr(Address(boxReg, 0), tmpReg);
3409 3333 masm.bind(DONE_LABEL) ;
3410 3334 } else {
3411 3335 // Possible cases that we'll encounter in fast_lock
3412 3336 // ------------------------------------------------
3413 3337 // * Inflated
3414 3338 // -- unlocked
3415 3339 // -- Locked
3416 3340 // = by self
3417 3341 // = by other
3418 3342 // * biased
3419 3343 // -- by Self
3420 3344 // -- by other
3421 3345 // * neutral
3422 3346 // * stack-locked
3423 3347 // -- by self
3424 3348 // = sp-proximity test hits
3425 3349 // = sp-proximity test generates false-negative
3426 3350 // -- by other
3427 3351 //
3428 3352
3429 3353 Label IsInflated, DONE_LABEL, PopDone ;
3430 3354
3431 3355 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3432 3356 // order to reduce the number of conditional branches in the most common cases.
3433 3357 // Beware -- there's a subtle invariant that fetch of the markword
3434 3358 // at [FETCH], below, will never observe a biased encoding (*101b).
3435 3359 // If this invariant is not held we risk exclusion (safety) failure.
3436 3360 if (UseBiasedLocking && !UseOptoBiasInlining) {
3437 3361 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3438 3362 }
3439 3363
3440 3364 masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
3441 3365 masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral)
3442 3366 masm.jccb (Assembler::notZero, IsInflated) ;
3443 3367
3444 3368 // Attempt stack-locking ...
3445 3369 masm.orptr (tmpReg, 0x1);
3446 3370 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
3447 3371 if (os::is_MP()) { masm.lock(); }
3448 3372 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3449 3373 if (_counters != NULL) {
3450 3374 masm.cond_inc32(Assembler::equal,
3451 3375 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3452 3376 }
3453 3377 masm.jccb (Assembler::equal, DONE_LABEL);
3454 3378
3455 3379 // Recursive locking
3456 3380 masm.subptr(tmpReg, rsp);
3457 3381 masm.andptr(tmpReg, 0xFFFFF003 );
3458 3382 masm.movptr(Address(boxReg, 0), tmpReg);
3459 3383 if (_counters != NULL) {
3460 3384 masm.cond_inc32(Assembler::equal,
3461 3385 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3462 3386 }
3463 3387 masm.jmp (DONE_LABEL) ;
3464 3388
3465 3389 masm.bind (IsInflated) ;
3466 3390
3467 3391 // The object is inflated.
3468 3392 //
3469 3393 // TODO-FIXME: eliminate the ugly use of manifest constants:
3470 3394 // Use markOopDesc::monitor_value instead of "2".
3471 3395 // use markOop::unused_mark() instead of "3".
3472 3396 // The tmpReg value is an objectMonitor reference ORed with
3473 3397 // markOopDesc::monitor_value (2). We can either convert tmpReg to an
3474 3398 // objectmonitor pointer by masking off the "2" bit or we can just
3475 3399 // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3476 3400 // field offsets with "-2" to compensate for and annul the low-order tag bit.
3477 3401 //
3478 3402 // I use the latter as it avoids AGI stalls.
3479 3403 // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3480 3404 // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3481 3405 //
3482 3406 #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3483 3407
3484 3408 // boxReg refers to the on-stack BasicLock in the current frame.
3485 3409 // We'd like to write:
3486 3410 // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
3487 3411 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
3488 3412 // additional latency as we have another ST in the store buffer that must drain.
3489 3413
3490 3414 if (EmitSync & 8192) {
3491 3415 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
3492 3416 masm.get_thread (scrReg) ;
3493 3417 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
3494 3418 masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov
3495 3419 if (os::is_MP()) { masm.lock(); }
3496 3420 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3497 3421 } else
3498 3422 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
3499 3423 masm.movptr(scrReg, boxReg) ;
3500 3424 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
3501 3425
3502 3426 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3503 3427 if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3504 3428 // prefetchw [eax + Offset(_owner)-2]
3505 3429 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3506 3430 }
3507 3431
3508 3432 if ((EmitSync & 64) == 0) {
3509 3433 // Optimistic form: consider XORL tmpReg,tmpReg
3510 3434 masm.movptr(tmpReg, NULL_WORD) ;
3511 3435 } else {
3512 3436 // Can suffer RTS->RTO upgrades on shared or cold $ lines
3513 3437 // Test-And-CAS instead of CAS
3514 3438 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
3515 3439 masm.testptr(tmpReg, tmpReg) ; // Locked ?
3516 3440 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3517 3441 }
3518 3442
3519 3443 // Appears unlocked - try to swing _owner from null to non-null.
3520 3444 // Ideally, I'd manifest "Self" with get_thread and then attempt
3521 3445 // to CAS the register containing Self into m->Owner.
3522 3446 // But we don't have enough registers, so instead we can either try to CAS
3523 3447 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
3524 3448 // we later store "Self" into m->Owner. Transiently storing a stack address
3525 3449 // (rsp or the address of the box) into m->owner is harmless.
3526 3450 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
3527 3451 if (os::is_MP()) { masm.lock(); }
3528 3452 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3529 3453 masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3
3530 3454 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3531 3455 masm.get_thread (scrReg) ; // beware: clobbers ICCs
3532 3456 masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
3533 3457 masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success
3534 3458
3535 3459 // If the CAS fails we can either retry or pass control to the slow-path.
3536 3460 // We use the latter tactic.
3537 3461 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3538 3462 // If the CAS was successful ...
3539 3463 // Self has acquired the lock
3540 3464 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3541 3465 // Intentional fall-through into DONE_LABEL ...
3542 3466 } else {
3543 3467 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
3544 3468 masm.movptr(boxReg, tmpReg) ;
3545 3469
3546 3470 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3547 3471 if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3548 3472 // prefetchw [eax + Offset(_owner)-2]
3549 3473 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3550 3474 }
3551 3475
3552 3476 if ((EmitSync & 64) == 0) {
3553 3477 // Optimistic form
3554 3478 masm.xorptr (tmpReg, tmpReg) ;
3555 3479 } else {
3556 3480 // Can suffer RTS->RTO upgrades on shared or cold $ lines
3557 3481 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
3558 3482 masm.testptr(tmpReg, tmpReg) ; // Locked ?
3559 3483 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3560 3484 }
3561 3485
3562 3486 // Appears unlocked - try to swing _owner from null to non-null.
3563 3487 // Use either "Self" (in scr) or rsp as thread identity in _owner.
3564 3488 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
3565 3489 masm.get_thread (scrReg) ;
3566 3490 if (os::is_MP()) { masm.lock(); }
3567 3491 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3568 3492
3569 3493 // If the CAS fails we can either retry or pass control to the slow-path.
3570 3494 // We use the latter tactic.
3571 3495 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3572 3496 // If the CAS was successful ...
3573 3497 // Self has acquired the lock
3574 3498 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3575 3499 // Intentional fall-through into DONE_LABEL ...
3576 3500 }
3577 3501
3578 3502 // DONE_LABEL is a hot target - we'd really like to place it at the
3579 3503 // start of cache line by padding with NOPs.
3580 3504 // See the AMD and Intel software optimization manuals for the
3581 3505 // most efficient "long" NOP encodings.
3582 3506 // Unfortunately none of our alignment mechanisms suffice.
3583 3507 masm.bind(DONE_LABEL);
3584 3508
3585 3509 // Avoid branch-to-branch on AMD processors
3586 3510 // This appears to be superstition.
3587 3511 if (EmitSync & 32) masm.nop() ;
3588 3512
3589 3513
3590 3514 // At DONE_LABEL the icc ZFlag is set as follows ...
3591 3515 // Fast_Unlock uses the same protocol.
3592 3516 // ZFlag == 1 -> Success
3593 3517 // ZFlag == 0 -> Failure - force control through the slow-path
3594 3518 }
3595 3519 %}
3596 3520
3597 3521 // obj: object to unlock
3598 3522 // box: box address (displaced header location), killed. Must be EAX.
3599 3523 // rbx,: killed tmp; cannot be obj nor box.
3600 3524 //
3601 3525 // Some commentary on balanced locking:
3602 3526 //
3603 3527 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3604 3528 // Methods that don't have provably balanced locking are forced to run in the
3605 3529 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3606 3530 // The interpreter provides two properties:
3607 3531 // I1: At return-time the interpreter automatically and quietly unlocks any
3608 3532 // objects acquired the current activation (frame). Recall that the
3609 3533 // interpreter maintains an on-stack list of locks currently held by
3610 3534 // a frame.
3611 3535 // I2: If a method attempts to unlock an object that is not held by the
3612 3536 // the frame the interpreter throws IMSX.
3613 3537 //
3614 3538 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3615 3539 // B() doesn't have provably balanced locking so it runs in the interpreter.
3616 3540 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3617 3541 // is still locked by A().
3618 3542 //
3619 3543 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3620 3544 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3621 3545 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3622 3546 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3623 3547
3624 3548 enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3625 3549
3626 3550 Register objReg = as_Register($obj$$reg);
3627 3551 Register boxReg = as_Register($box$$reg);
3628 3552 Register tmpReg = as_Register($tmp$$reg);
3629 3553
3630 3554 guarantee (objReg != boxReg, "") ;
3631 3555 guarantee (objReg != tmpReg, "") ;
3632 3556 guarantee (boxReg != tmpReg, "") ;
3633 3557 guarantee (boxReg == as_Register(EAX_enc), "") ;
3634 3558 MacroAssembler masm(&cbuf);
3635 3559
3636 3560 if (EmitSync & 4) {
3637 3561 // Disable - inhibit all inlining. Force control through the slow-path
3638 3562 masm.cmpptr (rsp, 0) ;
3639 3563 } else
3640 3564 if (EmitSync & 8) {
3641 3565 Label DONE_LABEL ;
3642 3566 if (UseBiasedLocking) {
3643 3567 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3644 3568 }
3645 3569 // classic stack-locking code ...
3646 3570 masm.movptr(tmpReg, Address(boxReg, 0)) ;
3647 3571 masm.testptr(tmpReg, tmpReg) ;
3648 3572 masm.jcc (Assembler::zero, DONE_LABEL) ;
3649 3573 if (os::is_MP()) { masm.lock(); }
3650 3574 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3651 3575 masm.bind(DONE_LABEL);
3652 3576 } else {
3653 3577 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3654 3578
3655 3579 // Critically, the biased locking test must have precedence over
3656 3580 // and appear before the (box->dhw == 0) recursive stack-lock test.
3657 3581 if (UseBiasedLocking && !UseOptoBiasInlining) {
3658 3582 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3659 3583 }
3660 3584
3661 3585 masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header
3662 3586 masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3663 3587 masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock
3664 3588
3665 3589 masm.testptr(tmpReg, 0x02) ; // Inflated?
3666 3590 masm.jccb (Assembler::zero, Stacked) ;
3667 3591
3668 3592 masm.bind (Inflated) ;
3669 3593 // It's inflated.
3670 3594 // Despite our balanced locking property we still check that m->_owner == Self
3671 3595 // as java routines or native JNI code called by this thread might
3672 3596 // have released the lock.
3673 3597 // Refer to the comments in synchronizer.cpp for how we might encode extra
3674 3598 // state in _succ so we can avoid fetching EntryList|cxq.
3675 3599 //
3676 3600 // I'd like to add more cases in fast_lock() and fast_unlock() --
3677 3601 // such as recursive enter and exit -- but we have to be wary of
3678 3602 // I$ bloat, T$ effects and BP$ effects.
3679 3603 //
3680 3604 // If there's no contention try a 1-0 exit. That is, exit without
3681 3605 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3682 3606 // we detect and recover from the race that the 1-0 exit admits.
3683 3607 //
3684 3608 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3685 3609 // before it STs null into _owner, releasing the lock. Updates
3686 3610 // to data protected by the critical section must be visible before
3687 3611 // we drop the lock (and thus before any other thread could acquire
3688 3612 // the lock and observe the fields protected by the lock).
3689 3613 // IA32's memory-model is SPO, so STs are ordered with respect to
3690 3614 // each other and there's no need for an explicit barrier (fence).
3691 3615 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3692 3616
3693 3617 masm.get_thread (boxReg) ;
3694 3618 if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
3695 3619 // prefetchw [ebx + Offset(_owner)-2]
3696 3620 masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3697 3621 }
3698 3622
3699 3623 // Note that we could employ various encoding schemes to reduce
3700 3624 // the number of loads below (currently 4) to just 2 or 3.
3701 3625 // Refer to the comments in synchronizer.cpp.
3702 3626 // In practice the chain of fetches doesn't seem to impact performance, however.
3703 3627 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3704 3628 // Attempt to reduce branch density - AMD's branch predictor.
3705 3629 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3706 3630 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3707 3631 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3708 3632 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3709 3633 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3710 3634 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3711 3635 masm.jmpb (DONE_LABEL) ;
3712 3636 } else {
3713 3637 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3714 3638 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3715 3639 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3716 3640 masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3717 3641 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3718 3642 masm.jccb (Assembler::notZero, CheckSucc) ;
3719 3643 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3720 3644 masm.jmpb (DONE_LABEL) ;
3721 3645 }
3722 3646
3723 3647 // The Following code fragment (EmitSync & 65536) improves the performance of
3724 3648 // contended applications and contended synchronization microbenchmarks.
3725 3649 // Unfortunately the emission of the code - even though not executed - causes regressions
3726 3650 // in scimark and jetstream, evidently because of $ effects. Replacing the code
3727 3651 // with an equal number of never-executed NOPs results in the same regression.
3728 3652 // We leave it off by default.
3729 3653
3730 3654 if ((EmitSync & 65536) != 0) {
3731 3655 Label LSuccess, LGoSlowPath ;
3732 3656
3733 3657 masm.bind (CheckSucc) ;
3734 3658
3735 3659 // Optional pre-test ... it's safe to elide this
3736 3660 if ((EmitSync & 16) == 0) {
3737 3661 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3738 3662 masm.jccb (Assembler::zero, LGoSlowPath) ;
3739 3663 }
3740 3664
3741 3665 // We have a classic Dekker-style idiom:
3742 3666 // ST m->_owner = 0 ; MEMBAR; LD m->_succ
3743 3667 // There are a number of ways to implement the barrier:
3744 3668 // (1) lock:andl &m->_owner, 0
3745 3669 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3746 3670 // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3747 3671 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3748 3672 // (2) If supported, an explicit MFENCE is appealing.
3749 3673 // In older IA32 processors MFENCE is slower than lock:add or xchg
3750 3674 // particularly if the write-buffer is full as might be the case if
3751 3675 // if stores closely precede the fence or fence-equivalent instruction.
3752 3676 // In more modern implementations MFENCE appears faster, however.
3753 3677 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3754 3678 // The $lines underlying the top-of-stack should be in M-state.
3755 3679 // The locked add instruction is serializing, of course.
3756 3680 // (4) Use xchg, which is serializing
3757 3681 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3758 3682 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3759 3683 // The integer condition codes will tell us if succ was 0.
3760 3684 // Since _succ and _owner should reside in the same $line and
3761 3685 // we just stored into _owner, it's likely that the $line
3762 3686 // remains in M-state for the lock:orl.
3763 3687 //
3764 3688 // We currently use (3), although it's likely that switching to (2)
3765 3689 // is correct for the future.
3766 3690
3767 3691 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3768 3692 if (os::is_MP()) {
3769 3693 if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
3770 3694 masm.mfence();
3771 3695 } else {
3772 3696 masm.lock () ; masm.addptr(Address(rsp, 0), 0) ;
3773 3697 }
3774 3698 }
3775 3699 // Ratify _succ remains non-null
3776 3700 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3777 3701 masm.jccb (Assembler::notZero, LSuccess) ;
3778 3702
3779 3703 masm.xorptr(boxReg, boxReg) ; // box is really EAX
3780 3704 if (os::is_MP()) { masm.lock(); }
3781 3705 masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3782 3706 masm.jccb (Assembler::notEqual, LSuccess) ;
3783 3707 // Since we're low on registers we installed rsp as a placeholding in _owner.
3784 3708 // Now install Self over rsp. This is safe as we're transitioning from
3785 3709 // non-null to non=null
3786 3710 masm.get_thread (boxReg) ;
3787 3711 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3788 3712 // Intentional fall-through into LGoSlowPath ...
3789 3713
3790 3714 masm.bind (LGoSlowPath) ;
3791 3715 masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure
3792 3716 masm.jmpb (DONE_LABEL) ;
3793 3717
3794 3718 masm.bind (LSuccess) ;
3795 3719 masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success
3796 3720 masm.jmpb (DONE_LABEL) ;
3797 3721 }
3798 3722
3799 3723 masm.bind (Stacked) ;
3800 3724 // It's not inflated and it's not recursively stack-locked and it's not biased.
3801 3725 // It must be stack-locked.
3802 3726 // Try to reset the header to displaced header.
3803 3727 // The "box" value on the stack is stable, so we can reload
3804 3728 // and be assured we observe the same value as above.
3805 3729 masm.movptr(tmpReg, Address(boxReg, 0)) ;
3806 3730 if (os::is_MP()) { masm.lock(); }
3807 3731 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3808 3732 // Intention fall-thru into DONE_LABEL
3809 3733
3810 3734
3811 3735 // DONE_LABEL is a hot target - we'd really like to place it at the
3812 3736 // start of cache line by padding with NOPs.
3813 3737 // See the AMD and Intel software optimization manuals for the
3814 3738 // most efficient "long" NOP encodings.
3815 3739 // Unfortunately none of our alignment mechanisms suffice.
3816 3740 if ((EmitSync & 65536) == 0) {
3817 3741 masm.bind (CheckSucc) ;
3818 3742 }
3819 3743 masm.bind(DONE_LABEL);
3820 3744
3821 3745 // Avoid branch to branch on AMD processors
3822 3746 if (EmitSync & 32768) { masm.nop() ; }
3823 3747 }
3824 3748 %}
3825 3749
3826 3750
3827 3751 enc_class enc_pop_rdx() %{
3828 3752 emit_opcode(cbuf,0x5A);
3829 3753 %}
3830 3754
3831 3755 enc_class enc_rethrow() %{
3832 3756 cbuf.set_insts_mark();
3833 3757 emit_opcode(cbuf, 0xE9); // jmp entry
3834 3758 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3835 3759 runtime_call_Relocation::spec(), RELOC_IMM32 );
3836 3760 %}
3837 3761
3838 3762
3839 3763 // Convert a double to an int. Java semantics require we do complex
3840 3764 // manglelations in the corner cases. So we set the rounding mode to
3841 3765 // 'zero', store the darned double down as an int, and reset the
3842 3766 // rounding mode to 'nearest'. The hardware throws an exception which
3843 3767 // patches up the correct value directly to the stack.
3844 3768 enc_class D2I_encoding( regD src ) %{
3845 3769 // Flip to round-to-zero mode. We attempted to allow invalid-op
3846 3770 // exceptions here, so that a NAN or other corner-case value will
3847 3771 // thrown an exception (but normal values get converted at full speed).
3848 3772 // However, I2C adapters and other float-stack manglers leave pending
3849 3773 // invalid-op exceptions hanging. We would have to clear them before
3850 3774 // enabling them and that is more expensive than just testing for the
3851 3775 // invalid value Intel stores down in the corner cases.
3852 3776 emit_opcode(cbuf,0xD9); // FLDCW trunc
3853 3777 emit_opcode(cbuf,0x2D);
3854 3778 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3855 3779 // Allocate a word
3856 3780 emit_opcode(cbuf,0x83); // SUB ESP,4
3857 3781 emit_opcode(cbuf,0xEC);
3858 3782 emit_d8(cbuf,0x04);
3859 3783 // Encoding assumes a double has been pushed into FPR0.
3860 3784 // Store down the double as an int, popping the FPU stack
3861 3785 emit_opcode(cbuf,0xDB); // FISTP [ESP]
3862 3786 emit_opcode(cbuf,0x1C);
3863 3787 emit_d8(cbuf,0x24);
3864 3788 // Restore the rounding mode; mask the exception
3865 3789 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3866 3790 emit_opcode(cbuf,0x2D);
3867 3791 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3868 3792 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3869 3793 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3870 3794
3871 3795 // Load the converted int; adjust CPU stack
3872 3796 emit_opcode(cbuf,0x58); // POP EAX
3873 3797 emit_opcode(cbuf,0x3D); // CMP EAX,imm
3874 3798 emit_d32 (cbuf,0x80000000); // 0x80000000
3875 3799 emit_opcode(cbuf,0x75); // JNE around_slow_call
3876 3800 emit_d8 (cbuf,0x07); // Size of slow_call
3877 3801 // Push src onto stack slow-path
3878 3802 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3879 3803 emit_d8 (cbuf,0xC0-1+$src$$reg );
3880 3804 // CALL directly to the runtime
3881 3805 cbuf.set_insts_mark();
3882 3806 emit_opcode(cbuf,0xE8); // Call into runtime
3883 3807 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3884 3808 // Carry on here...
3885 3809 %}
3886 3810
3887 3811 enc_class D2L_encoding( regD src ) %{
3888 3812 emit_opcode(cbuf,0xD9); // FLDCW trunc
3889 3813 emit_opcode(cbuf,0x2D);
3890 3814 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3891 3815 // Allocate a word
3892 3816 emit_opcode(cbuf,0x83); // SUB ESP,8
3893 3817 emit_opcode(cbuf,0xEC);
3894 3818 emit_d8(cbuf,0x08);
3895 3819 // Encoding assumes a double has been pushed into FPR0.
3896 3820 // Store down the double as a long, popping the FPU stack
3897 3821 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3898 3822 emit_opcode(cbuf,0x3C);
3899 3823 emit_d8(cbuf,0x24);
3900 3824 // Restore the rounding mode; mask the exception
3901 3825 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3902 3826 emit_opcode(cbuf,0x2D);
3903 3827 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3904 3828 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3905 3829 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3906 3830
3907 3831 // Load the converted int; adjust CPU stack
3908 3832 emit_opcode(cbuf,0x58); // POP EAX
3909 3833 emit_opcode(cbuf,0x5A); // POP EDX
3910 3834 emit_opcode(cbuf,0x81); // CMP EDX,imm
3911 3835 emit_d8 (cbuf,0xFA); // rdx
3912 3836 emit_d32 (cbuf,0x80000000); // 0x80000000
3913 3837 emit_opcode(cbuf,0x75); // JNE around_slow_call
3914 3838 emit_d8 (cbuf,0x07+4); // Size of slow_call
3915 3839 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3916 3840 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3917 3841 emit_opcode(cbuf,0x75); // JNE around_slow_call
3918 3842 emit_d8 (cbuf,0x07); // Size of slow_call
3919 3843 // Push src onto stack slow-path
3920 3844 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3921 3845 emit_d8 (cbuf,0xC0-1+$src$$reg );
3922 3846 // CALL directly to the runtime
3923 3847 cbuf.set_insts_mark();
3924 3848 emit_opcode(cbuf,0xE8); // Call into runtime
3925 3849 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3926 3850 // Carry on here...
3927 3851 %}
3928 3852
3929 3853 enc_class X2L_encoding( regX src ) %{
3930 3854 // Allocate a word
3931 3855 emit_opcode(cbuf,0x83); // SUB ESP,8
3932 3856 emit_opcode(cbuf,0xEC);
3933 3857 emit_d8(cbuf,0x08);
3934 3858
3935 3859 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
3936 3860 emit_opcode (cbuf, 0x0F );
3937 3861 emit_opcode (cbuf, 0x11 );
3938 3862 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3939 3863
3940 3864 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
3941 3865 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3942 3866
3943 3867 emit_opcode(cbuf,0xD9); // FLDCW trunc
3944 3868 emit_opcode(cbuf,0x2D);
3945 3869 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3946 3870
3947 3871 // Encoding assumes a double has been pushed into FPR0.
3948 3872 // Store down the double as a long, popping the FPU stack
3949 3873 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3950 3874 emit_opcode(cbuf,0x3C);
3951 3875 emit_d8(cbuf,0x24);
3952 3876
3953 3877 // Restore the rounding mode; mask the exception
3954 3878 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3955 3879 emit_opcode(cbuf,0x2D);
3956 3880 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3957 3881 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3958 3882 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3959 3883
3960 3884 // Load the converted int; adjust CPU stack
3961 3885 emit_opcode(cbuf,0x58); // POP EAX
3962 3886
3963 3887 emit_opcode(cbuf,0x5A); // POP EDX
3964 3888
3965 3889 emit_opcode(cbuf,0x81); // CMP EDX,imm
3966 3890 emit_d8 (cbuf,0xFA); // rdx
3967 3891 emit_d32 (cbuf,0x80000000);// 0x80000000
3968 3892
3969 3893 emit_opcode(cbuf,0x75); // JNE around_slow_call
3970 3894 emit_d8 (cbuf,0x13+4); // Size of slow_call
3971 3895
3972 3896 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3973 3897 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3974 3898
3975 3899 emit_opcode(cbuf,0x75); // JNE around_slow_call
3976 3900 emit_d8 (cbuf,0x13); // Size of slow_call
3977 3901
3978 3902 // Allocate a word
3979 3903 emit_opcode(cbuf,0x83); // SUB ESP,4
3980 3904 emit_opcode(cbuf,0xEC);
3981 3905 emit_d8(cbuf,0x04);
3982 3906
3983 3907 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
3984 3908 emit_opcode (cbuf, 0x0F );
3985 3909 emit_opcode (cbuf, 0x11 );
3986 3910 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3987 3911
3988 3912 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
3989 3913 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3990 3914
3991 3915 emit_opcode(cbuf,0x83); // ADD ESP,4
3992 3916 emit_opcode(cbuf,0xC4);
3993 3917 emit_d8(cbuf,0x04);
3994 3918
3995 3919 // CALL directly to the runtime
3996 3920 cbuf.set_insts_mark();
3997 3921 emit_opcode(cbuf,0xE8); // Call into runtime
3998 3922 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3999 3923 // Carry on here...
4000 3924 %}
4001 3925
4002 3926 enc_class XD2L_encoding( regXD src ) %{
4003 3927 // Allocate a word
4004 3928 emit_opcode(cbuf,0x83); // SUB ESP,8
4005 3929 emit_opcode(cbuf,0xEC);
4006 3930 emit_d8(cbuf,0x08);
4007 3931
4008 3932 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
4009 3933 emit_opcode (cbuf, 0x0F );
4010 3934 emit_opcode (cbuf, 0x11 );
4011 3935 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4012 3936
4013 3937 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
4014 3938 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4015 3939
4016 3940 emit_opcode(cbuf,0xD9); // FLDCW trunc
4017 3941 emit_opcode(cbuf,0x2D);
4018 3942 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
4019 3943
4020 3944 // Encoding assumes a double has been pushed into FPR0.
4021 3945 // Store down the double as a long, popping the FPU stack
4022 3946 emit_opcode(cbuf,0xDF); // FISTP [ESP]
4023 3947 emit_opcode(cbuf,0x3C);
4024 3948 emit_d8(cbuf,0x24);
4025 3949
4026 3950 // Restore the rounding mode; mask the exception
4027 3951 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
4028 3952 emit_opcode(cbuf,0x2D);
4029 3953 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
4030 3954 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
4031 3955 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
4032 3956
4033 3957 // Load the converted int; adjust CPU stack
4034 3958 emit_opcode(cbuf,0x58); // POP EAX
4035 3959
4036 3960 emit_opcode(cbuf,0x5A); // POP EDX
4037 3961
4038 3962 emit_opcode(cbuf,0x81); // CMP EDX,imm
4039 3963 emit_d8 (cbuf,0xFA); // rdx
4040 3964 emit_d32 (cbuf,0x80000000); // 0x80000000
4041 3965
4042 3966 emit_opcode(cbuf,0x75); // JNE around_slow_call
4043 3967 emit_d8 (cbuf,0x13+4); // Size of slow_call
4044 3968
4045 3969 emit_opcode(cbuf,0x85); // TEST EAX,EAX
4046 3970 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
4047 3971
4048 3972 emit_opcode(cbuf,0x75); // JNE around_slow_call
4049 3973 emit_d8 (cbuf,0x13); // Size of slow_call
4050 3974
4051 3975 // Push src onto stack slow-path
4052 3976 // Allocate a word
4053 3977 emit_opcode(cbuf,0x83); // SUB ESP,8
4054 3978 emit_opcode(cbuf,0xEC);
4055 3979 emit_d8(cbuf,0x08);
4056 3980
4057 3981 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
4058 3982 emit_opcode (cbuf, 0x0F );
4059 3983 emit_opcode (cbuf, 0x11 );
4060 3984 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4061 3985
4062 3986 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
4063 3987 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4064 3988
4065 3989 emit_opcode(cbuf,0x83); // ADD ESP,8
4066 3990 emit_opcode(cbuf,0xC4);
4067 3991 emit_d8(cbuf,0x08);
4068 3992
4069 3993 // CALL directly to the runtime
4070 3994 cbuf.set_insts_mark();
4071 3995 emit_opcode(cbuf,0xE8); // Call into runtime
4072 3996 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4073 3997 // Carry on here...
4074 3998 %}
4075 3999
4076 4000 enc_class D2X_encoding( regX dst, regD src ) %{
4077 4001 // Allocate a word
4078 4002 emit_opcode(cbuf,0x83); // SUB ESP,4
4079 4003 emit_opcode(cbuf,0xEC);
4080 4004 emit_d8(cbuf,0x04);
4081 4005 int pop = 0x02;
4082 4006 if ($src$$reg != FPR1L_enc) {
4083 4007 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
4084 4008 emit_d8( cbuf, 0xC0-1+$src$$reg );
4085 4009 pop = 0x03;
4086 4010 }
4087 4011 store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP]
4088 4012
4089 4013 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
4090 4014 emit_opcode (cbuf, 0x0F );
4091 4015 emit_opcode (cbuf, 0x10 );
4092 4016 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4093 4017
4094 4018 emit_opcode(cbuf,0x83); // ADD ESP,4
4095 4019 emit_opcode(cbuf,0xC4);
4096 4020 emit_d8(cbuf,0x04);
4097 4021 // Carry on here...
4098 4022 %}
4099 4023
4100 4024 enc_class FX2I_encoding( regX src, eRegI dst ) %{
4101 4025 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4102 4026
4103 4027 // Compare the result to see if we need to go to the slow path
4104 4028 emit_opcode(cbuf,0x81); // CMP dst,imm
4105 4029 emit_rm (cbuf,0x3,0x7,$dst$$reg);
4106 4030 emit_d32 (cbuf,0x80000000); // 0x80000000
4107 4031
4108 4032 emit_opcode(cbuf,0x75); // JNE around_slow_call
4109 4033 emit_d8 (cbuf,0x13); // Size of slow_call
4110 4034 // Store xmm to a temp memory
4111 4035 // location and push it onto stack.
4112 4036
4113 4037 emit_opcode(cbuf,0x83); // SUB ESP,4
4114 4038 emit_opcode(cbuf,0xEC);
4115 4039 emit_d8(cbuf, $primary ? 0x8 : 0x4);
4116 4040
4117 4041 emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm
4118 4042 emit_opcode (cbuf, 0x0F );
4119 4043 emit_opcode (cbuf, 0x11 );
4120 4044 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4121 4045
4122 4046 emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP]
4123 4047 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4124 4048
4125 4049 emit_opcode(cbuf,0x83); // ADD ESP,4
4126 4050 emit_opcode(cbuf,0xC4);
4127 4051 emit_d8(cbuf, $primary ? 0x8 : 0x4);
4128 4052
4129 4053 // CALL directly to the runtime
4130 4054 cbuf.set_insts_mark();
4131 4055 emit_opcode(cbuf,0xE8); // Call into runtime
4132 4056 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4133 4057
4134 4058 // Carry on here...
4135 4059 %}
4136 4060
4137 4061 enc_class X2D_encoding( regD dst, regX src ) %{
4138 4062 // Allocate a word
4139 4063 emit_opcode(cbuf,0x83); // SUB ESP,4
4140 4064 emit_opcode(cbuf,0xEC);
4141 4065 emit_d8(cbuf,0x04);
4142 4066
4143 4067 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4144 4068 emit_opcode (cbuf, 0x0F );
4145 4069 emit_opcode (cbuf, 0x11 );
4146 4070 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4147 4071
4148 4072 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
4149 4073 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4150 4074
4151 4075 emit_opcode(cbuf,0x83); // ADD ESP,4
4152 4076 emit_opcode(cbuf,0xC4);
4153 4077 emit_d8(cbuf,0x04);
4154 4078
4155 4079 // Carry on here...
4156 4080 %}
4157 4081
4158 4082 enc_class AbsXF_encoding(regX dst) %{
4159 4083 address signmask_address=(address)float_signmask_pool;
4160 4084 // andpd:\tANDPS $dst,[signconst]
4161 4085 emit_opcode(cbuf, 0x0F);
4162 4086 emit_opcode(cbuf, 0x54);
4163 4087 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4164 4088 emit_d32(cbuf, (int)signmask_address);
4165 4089 %}
4166 4090
4167 4091 enc_class AbsXD_encoding(regXD dst) %{
4168 4092 address signmask_address=(address)double_signmask_pool;
4169 4093 // andpd:\tANDPD $dst,[signconst]
4170 4094 emit_opcode(cbuf, 0x66);
4171 4095 emit_opcode(cbuf, 0x0F);
4172 4096 emit_opcode(cbuf, 0x54);
4173 4097 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4174 4098 emit_d32(cbuf, (int)signmask_address);
4175 4099 %}
4176 4100
4177 4101 enc_class NegXF_encoding(regX dst) %{
4178 4102 address signmask_address=(address)float_signflip_pool;
4179 4103 // andpd:\tXORPS $dst,[signconst]
4180 4104 emit_opcode(cbuf, 0x0F);
4181 4105 emit_opcode(cbuf, 0x57);
4182 4106 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4183 4107 emit_d32(cbuf, (int)signmask_address);
4184 4108 %}
4185 4109
4186 4110 enc_class NegXD_encoding(regXD dst) %{
4187 4111 address signmask_address=(address)double_signflip_pool;
4188 4112 // andpd:\tXORPD $dst,[signconst]
4189 4113 emit_opcode(cbuf, 0x66);
4190 4114 emit_opcode(cbuf, 0x0F);
4191 4115 emit_opcode(cbuf, 0x57);
4192 4116 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4193 4117 emit_d32(cbuf, (int)signmask_address);
4194 4118 %}
4195 4119
4196 4120 enc_class FMul_ST_reg( eRegF src1 ) %{
4197 4121 // Operand was loaded from memory into fp ST (stack top)
4198 4122 // FMUL ST,$src /* D8 C8+i */
4199 4123 emit_opcode(cbuf, 0xD8);
4200 4124 emit_opcode(cbuf, 0xC8 + $src1$$reg);
4201 4125 %}
4202 4126
4203 4127 enc_class FAdd_ST_reg( eRegF src2 ) %{
4204 4128 // FADDP ST,src2 /* D8 C0+i */
4205 4129 emit_opcode(cbuf, 0xD8);
4206 4130 emit_opcode(cbuf, 0xC0 + $src2$$reg);
4207 4131 //could use FADDP src2,fpST /* DE C0+i */
4208 4132 %}
4209 4133
4210 4134 enc_class FAddP_reg_ST( eRegF src2 ) %{
4211 4135 // FADDP src2,ST /* DE C0+i */
4212 4136 emit_opcode(cbuf, 0xDE);
4213 4137 emit_opcode(cbuf, 0xC0 + $src2$$reg);
4214 4138 %}
4215 4139
4216 4140 enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
4217 4141 // Operand has been loaded into fp ST (stack top)
4218 4142 // FSUB ST,$src1
4219 4143 emit_opcode(cbuf, 0xD8);
4220 4144 emit_opcode(cbuf, 0xE0 + $src1$$reg);
4221 4145
4222 4146 // FDIV
4223 4147 emit_opcode(cbuf, 0xD8);
4224 4148 emit_opcode(cbuf, 0xF0 + $src2$$reg);
4225 4149 %}
4226 4150
4227 4151 enc_class MulFAddF (eRegF src1, eRegF src2) %{
4228 4152 // Operand was loaded from memory into fp ST (stack top)
4229 4153 // FADD ST,$src /* D8 C0+i */
4230 4154 emit_opcode(cbuf, 0xD8);
4231 4155 emit_opcode(cbuf, 0xC0 + $src1$$reg);
4232 4156
4233 4157 // FMUL ST,src2 /* D8 C*+i */
4234 4158 emit_opcode(cbuf, 0xD8);
4235 4159 emit_opcode(cbuf, 0xC8 + $src2$$reg);
4236 4160 %}
4237 4161
4238 4162
4239 4163 enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
4240 4164 // Operand was loaded from memory into fp ST (stack top)
4241 4165 // FADD ST,$src /* D8 C0+i */
4242 4166 emit_opcode(cbuf, 0xD8);
4243 4167 emit_opcode(cbuf, 0xC0 + $src1$$reg);
4244 4168
4245 4169 // FMULP src2,ST /* DE C8+i */
4246 4170 emit_opcode(cbuf, 0xDE);
4247 4171 emit_opcode(cbuf, 0xC8 + $src2$$reg);
4248 4172 %}
4249 4173
4250 4174 // Atomically load the volatile long
4251 4175 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4252 4176 emit_opcode(cbuf,0xDF);
4253 4177 int rm_byte_opcode = 0x05;
4254 4178 int base = $mem$$base;
4255 4179 int index = $mem$$index;
4256 4180 int scale = $mem$$scale;
4257 4181 int displace = $mem$$disp;
4258 4182 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4259 4183 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4260 4184 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4261 4185 %}
4262 4186
4263 4187 enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4264 4188 { // Atomic long load
4265 4189 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4266 4190 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4267 4191 emit_opcode(cbuf,0x0F);
4268 4192 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4269 4193 int base = $mem$$base;
4270 4194 int index = $mem$$index;
4271 4195 int scale = $mem$$scale;
4272 4196 int displace = $mem$$disp;
4273 4197 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4274 4198 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4275 4199 }
4276 4200 { // MOVSD $dst,$tmp ! atomic long store
4277 4201 emit_opcode(cbuf,0xF2);
4278 4202 emit_opcode(cbuf,0x0F);
4279 4203 emit_opcode(cbuf,0x11);
4280 4204 int base = $dst$$base;
4281 4205 int index = $dst$$index;
4282 4206 int scale = $dst$$scale;
4283 4207 int displace = $dst$$disp;
4284 4208 bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4285 4209 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4286 4210 }
4287 4211 %}
4288 4212
4289 4213 enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4290 4214 { // Atomic long load
4291 4215 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4292 4216 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4293 4217 emit_opcode(cbuf,0x0F);
4294 4218 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4295 4219 int base = $mem$$base;
4296 4220 int index = $mem$$index;
4297 4221 int scale = $mem$$scale;
4298 4222 int displace = $mem$$disp;
4299 4223 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4300 4224 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4301 4225 }
4302 4226 { // MOVD $dst.lo,$tmp
4303 4227 emit_opcode(cbuf,0x66);
4304 4228 emit_opcode(cbuf,0x0F);
4305 4229 emit_opcode(cbuf,0x7E);
4306 4230 emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4307 4231 }
4308 4232 { // PSRLQ $tmp,32
4309 4233 emit_opcode(cbuf,0x66);
4310 4234 emit_opcode(cbuf,0x0F);
4311 4235 emit_opcode(cbuf,0x73);
4312 4236 emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4313 4237 emit_d8(cbuf, 0x20);
4314 4238 }
4315 4239 { // MOVD $dst.hi,$tmp
4316 4240 emit_opcode(cbuf,0x66);
4317 4241 emit_opcode(cbuf,0x0F);
4318 4242 emit_opcode(cbuf,0x7E);
4319 4243 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4320 4244 }
4321 4245 %}
4322 4246
4323 4247 // Volatile Store Long. Must be atomic, so move it into
4324 4248 // the FP TOS and then do a 64-bit FIST. Has to probe the
4325 4249 // target address before the store (for null-ptr checks)
4326 4250 // so the memory operand is used twice in the encoding.
4327 4251 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4328 4252 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4329 4253 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop
4330 4254 emit_opcode(cbuf,0xDF);
4331 4255 int rm_byte_opcode = 0x07;
4332 4256 int base = $mem$$base;
4333 4257 int index = $mem$$index;
4334 4258 int scale = $mem$$scale;
4335 4259 int displace = $mem$$disp;
4336 4260 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4337 4261 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4338 4262 %}
4339 4263
4340 4264 enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4341 4265 { // Atomic long load
4342 4266 // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4343 4267 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4344 4268 emit_opcode(cbuf,0x0F);
4345 4269 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4346 4270 int base = $src$$base;
4347 4271 int index = $src$$index;
4348 4272 int scale = $src$$scale;
4349 4273 int displace = $src$$disp;
4350 4274 bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4351 4275 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4352 4276 }
4353 4277 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
4354 4278 { // MOVSD $mem,$tmp ! atomic long store
4355 4279 emit_opcode(cbuf,0xF2);
4356 4280 emit_opcode(cbuf,0x0F);
4357 4281 emit_opcode(cbuf,0x11);
4358 4282 int base = $mem$$base;
4359 4283 int index = $mem$$index;
4360 4284 int scale = $mem$$scale;
4361 4285 int displace = $mem$$disp;
4362 4286 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4363 4287 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4364 4288 }
4365 4289 %}
4366 4290
4367 4291 enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4368 4292 { // MOVD $tmp,$src.lo
4369 4293 emit_opcode(cbuf,0x66);
4370 4294 emit_opcode(cbuf,0x0F);
4371 4295 emit_opcode(cbuf,0x6E);
4372 4296 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4373 4297 }
4374 4298 { // MOVD $tmp2,$src.hi
4375 4299 emit_opcode(cbuf,0x66);
4376 4300 emit_opcode(cbuf,0x0F);
4377 4301 emit_opcode(cbuf,0x6E);
4378 4302 emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4379 4303 }
4380 4304 { // PUNPCKLDQ $tmp,$tmp2
4381 4305 emit_opcode(cbuf,0x66);
4382 4306 emit_opcode(cbuf,0x0F);
4383 4307 emit_opcode(cbuf,0x62);
4384 4308 emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4385 4309 }
4386 4310 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
4387 4311 { // MOVSD $mem,$tmp ! atomic long store
4388 4312 emit_opcode(cbuf,0xF2);
4389 4313 emit_opcode(cbuf,0x0F);
4390 4314 emit_opcode(cbuf,0x11);
4391 4315 int base = $mem$$base;
4392 4316 int index = $mem$$index;
4393 4317 int scale = $mem$$scale;
4394 4318 int displace = $mem$$disp;
4395 4319 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4396 4320 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4397 4321 }
4398 4322 %}
4399 4323
4400 4324 // Safepoint Poll. This polls the safepoint page, and causes an
4401 4325 // exception if it is not readable. Unfortunately, it kills the condition code
4402 4326 // in the process
4403 4327 // We current use TESTL [spp],EDI
4404 4328 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4405 4329
4406 4330 enc_class Safepoint_Poll() %{
4407 4331 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
4408 4332 emit_opcode(cbuf,0x85);
4409 4333 emit_rm (cbuf, 0x0, 0x7, 0x5);
4410 4334 emit_d32(cbuf, (intptr_t)os::get_polling_page());
4411 4335 %}
4412 4336 %}
4413 4337
4414 4338
4415 4339 //----------FRAME--------------------------------------------------------------
4416 4340 // Definition of frame structure and management information.
4417 4341 //
4418 4342 // S T A C K L A Y O U T Allocators stack-slot number
4419 4343 // | (to get allocators register number
4420 4344 // G Owned by | | v add OptoReg::stack0())
4421 4345 // r CALLER | |
4422 4346 // o | +--------+ pad to even-align allocators stack-slot
4423 4347 // w V | pad0 | numbers; owned by CALLER
4424 4348 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4425 4349 // h ^ | in | 5
4426 4350 // | | args | 4 Holes in incoming args owned by SELF
4427 4351 // | | | | 3
4428 4352 // | | +--------+
4429 4353 // V | | old out| Empty on Intel, window on Sparc
4430 4354 // | old |preserve| Must be even aligned.
4431 4355 // | SP-+--------+----> Matcher::_old_SP, even aligned
4432 4356 // | | in | 3 area for Intel ret address
4433 4357 // Owned by |preserve| Empty on Sparc.
4434 4358 // SELF +--------+
4435 4359 // | | pad2 | 2 pad to align old SP
4436 4360 // | +--------+ 1
4437 4361 // | | locks | 0
4438 4362 // | +--------+----> OptoReg::stack0(), even aligned
4439 4363 // | | pad1 | 11 pad to align new SP
4440 4364 // | +--------+
4441 4365 // | | | 10
4442 4366 // | | spills | 9 spills
4443 4367 // V | | 8 (pad0 slot for callee)
4444 4368 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4445 4369 // ^ | out | 7
4446 4370 // | | args | 6 Holes in outgoing args owned by CALLEE
4447 4371 // Owned by +--------+
4448 4372 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4449 4373 // | new |preserve| Must be even-aligned.
4450 4374 // | SP-+--------+----> Matcher::_new_SP, even aligned
4451 4375 // | | |
4452 4376 //
4453 4377 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4454 4378 // known from SELF's arguments and the Java calling convention.
4455 4379 // Region 6-7 is determined per call site.
4456 4380 // Note 2: If the calling convention leaves holes in the incoming argument
4457 4381 // area, those holes are owned by SELF. Holes in the outgoing area
4458 4382 // are owned by the CALLEE. Holes should not be nessecary in the
4459 4383 // incoming area, as the Java calling convention is completely under
4460 4384 // the control of the AD file. Doubles can be sorted and packed to
4461 4385 // avoid holes. Holes in the outgoing arguments may be nessecary for
4462 4386 // varargs C calling conventions.
4463 4387 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4464 4388 // even aligned with pad0 as needed.
4465 4389 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4466 4390 // region 6-11 is even aligned; it may be padded out more so that
4467 4391 // the region from SP to FP meets the minimum stack alignment.
4468 4392
4469 4393 frame %{
4470 4394 // What direction does stack grow in (assumed to be same for C & Java)
4471 4395 stack_direction(TOWARDS_LOW);
4472 4396
4473 4397 // These three registers define part of the calling convention
4474 4398 // between compiled code and the interpreter.
4475 4399 inline_cache_reg(EAX); // Inline Cache Register
4476 4400 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter
4477 4401
4478 4402 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
4479 4403 cisc_spilling_operand_name(indOffset32);
4480 4404
4481 4405 // Number of stack slots consumed by locking an object
4482 4406 sync_stack_slots(1);
4483 4407
4484 4408 // Compiled code's Frame Pointer
4485 4409 frame_pointer(ESP);
4486 4410 // Interpreter stores its frame pointer in a register which is
4487 4411 // stored to the stack by I2CAdaptors.
4488 4412 // I2CAdaptors convert from interpreted java to compiled java.
4489 4413 interpreter_frame_pointer(EBP);
4490 4414
4491 4415 // Stack alignment requirement
4492 4416 // Alignment size in bytes (128-bit -> 16 bytes)
4493 4417 stack_alignment(StackAlignmentInBytes);
4494 4418
4495 4419 // Number of stack slots between incoming argument block and the start of
4496 4420 // a new frame. The PROLOG must add this many slots to the stack. The
4497 4421 // EPILOG must remove this many slots. Intel needs one slot for
4498 4422 // return address and one for rbp, (must save rbp)
4499 4423 in_preserve_stack_slots(2+VerifyStackAtCalls);
4500 4424
4501 4425 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4502 4426 // for calls to C. Supports the var-args backing area for register parms.
4503 4427 varargs_C_out_slots_killed(0);
4504 4428
4505 4429 // The after-PROLOG location of the return address. Location of
4506 4430 // return address specifies a type (REG or STACK) and a number
4507 4431 // representing the register number (i.e. - use a register name) or
4508 4432 // stack slot.
4509 4433 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4510 4434 // Otherwise, it is above the locks and verification slot and alignment word
4511 4435 return_addr(STACK - 1 +
4512 4436 round_to(1+VerifyStackAtCalls+
4513 4437 Compile::current()->fixed_slots(),
4514 4438 (StackAlignmentInBytes/wordSize)));
4515 4439
4516 4440 // Body of function which returns an integer array locating
4517 4441 // arguments either in registers or in stack slots. Passed an array
4518 4442 // of ideal registers called "sig" and a "length" count. Stack-slot
4519 4443 // offsets are based on outgoing arguments, i.e. a CALLER setting up
4520 4444 // arguments for a CALLEE. Incoming stack arguments are
4521 4445 // automatically biased by the preserve_stack_slots field above.
4522 4446 calling_convention %{
4523 4447 // No difference between ingoing/outgoing just pass false
4524 4448 SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4525 4449 %}
4526 4450
4527 4451
4528 4452 // Body of function which returns an integer array locating
4529 4453 // arguments either in registers or in stack slots. Passed an array
4530 4454 // of ideal registers called "sig" and a "length" count. Stack-slot
4531 4455 // offsets are based on outgoing arguments, i.e. a CALLER setting up
4532 4456 // arguments for a CALLEE. Incoming stack arguments are
4533 4457 // automatically biased by the preserve_stack_slots field above.
4534 4458 c_calling_convention %{
4535 4459 // This is obviously always outgoing
4536 4460 (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4537 4461 %}
4538 4462
4539 4463 // Location of C & interpreter return values
4540 4464 c_return_value %{
4541 4465 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4542 4466 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
4543 4467 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4544 4468
4545 4469 // in SSE2+ mode we want to keep the FPU stack clean so pretend
4546 4470 // that C functions return float and double results in XMM0.
4547 4471 if( ideal_reg == Op_RegD && UseSSE>=2 )
4548 4472 return OptoRegPair(XMM0b_num,XMM0a_num);
4549 4473 if( ideal_reg == Op_RegF && UseSSE>=2 )
4550 4474 return OptoRegPair(OptoReg::Bad,XMM0a_num);
4551 4475
4552 4476 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4553 4477 %}
4554 4478
4555 4479 // Location of return values
4556 4480 return_value %{
4557 4481 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4558 4482 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
4559 4483 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4560 4484 if( ideal_reg == Op_RegD && UseSSE>=2 )
4561 4485 return OptoRegPair(XMM0b_num,XMM0a_num);
4562 4486 if( ideal_reg == Op_RegF && UseSSE>=1 )
4563 4487 return OptoRegPair(OptoReg::Bad,XMM0a_num);
4564 4488 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4565 4489 %}
4566 4490
4567 4491 %}
4568 4492
4569 4493 //----------ATTRIBUTES---------------------------------------------------------
4570 4494 //----------Operand Attributes-------------------------------------------------
4571 4495 op_attrib op_cost(0); // Required cost attribute
4572 4496
4573 4497 //----------Instruction Attributes---------------------------------------------
4574 4498 ins_attrib ins_cost(100); // Required cost attribute
4575 4499 ins_attrib ins_size(8); // Required size attribute (in bits)
4576 4500 ins_attrib ins_pc_relative(0); // Required PC Relative flag
4577 4501 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
4578 4502 // non-matching short branch variant of some
4579 4503 // long branch?
4580 4504 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2)
4581 4505 // specifies the alignment that some part of the instruction (not
4582 4506 // necessarily the start) requires. If > 1, a compute_padding()
4583 4507 // function must be provided for the instruction
4584 4508
4585 4509 //----------OPERANDS-----------------------------------------------------------
4586 4510 // Operand definitions must precede instruction definitions for correct parsing
4587 4511 // in the ADLC because operands constitute user defined types which are used in
4588 4512 // instruction definitions.
4589 4513
4590 4514 //----------Simple Operands----------------------------------------------------
4591 4515 // Immediate Operands
4592 4516 // Integer Immediate
4593 4517 operand immI() %{
4594 4518 match(ConI);
4595 4519
4596 4520 op_cost(10);
4597 4521 format %{ %}
4598 4522 interface(CONST_INTER);
4599 4523 %}
4600 4524
4601 4525 // Constant for test vs zero
4602 4526 operand immI0() %{
4603 4527 predicate(n->get_int() == 0);
4604 4528 match(ConI);
4605 4529
4606 4530 op_cost(0);
4607 4531 format %{ %}
4608 4532 interface(CONST_INTER);
4609 4533 %}
4610 4534
4611 4535 // Constant for increment
4612 4536 operand immI1() %{
4613 4537 predicate(n->get_int() == 1);
4614 4538 match(ConI);
4615 4539
4616 4540 op_cost(0);
4617 4541 format %{ %}
4618 4542 interface(CONST_INTER);
4619 4543 %}
4620 4544
4621 4545 // Constant for decrement
4622 4546 operand immI_M1() %{
4623 4547 predicate(n->get_int() == -1);
4624 4548 match(ConI);
4625 4549
4626 4550 op_cost(0);
4627 4551 format %{ %}
4628 4552 interface(CONST_INTER);
4629 4553 %}
4630 4554
4631 4555 // Valid scale values for addressing modes
4632 4556 operand immI2() %{
4633 4557 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4634 4558 match(ConI);
4635 4559
4636 4560 format %{ %}
4637 4561 interface(CONST_INTER);
4638 4562 %}
4639 4563
4640 4564 operand immI8() %{
4641 4565 predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
4642 4566 match(ConI);
4643 4567
4644 4568 op_cost(5);
4645 4569 format %{ %}
4646 4570 interface(CONST_INTER);
4647 4571 %}
4648 4572
4649 4573 operand immI16() %{
4650 4574 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4651 4575 match(ConI);
4652 4576
4653 4577 op_cost(10);
4654 4578 format %{ %}
4655 4579 interface(CONST_INTER);
4656 4580 %}
4657 4581
4658 4582 // Constant for long shifts
4659 4583 operand immI_32() %{
4660 4584 predicate( n->get_int() == 32 );
4661 4585 match(ConI);
4662 4586
4663 4587 op_cost(0);
4664 4588 format %{ %}
4665 4589 interface(CONST_INTER);
4666 4590 %}
4667 4591
4668 4592 operand immI_1_31() %{
4669 4593 predicate( n->get_int() >= 1 && n->get_int() <= 31 );
4670 4594 match(ConI);
4671 4595
4672 4596 op_cost(0);
4673 4597 format %{ %}
4674 4598 interface(CONST_INTER);
4675 4599 %}
4676 4600
4677 4601 operand immI_32_63() %{
4678 4602 predicate( n->get_int() >= 32 && n->get_int() <= 63 );
4679 4603 match(ConI);
4680 4604 op_cost(0);
4681 4605
4682 4606 format %{ %}
4683 4607 interface(CONST_INTER);
4684 4608 %}
4685 4609
4686 4610 operand immI_1() %{
4687 4611 predicate( n->get_int() == 1 );
4688 4612 match(ConI);
4689 4613
4690 4614 op_cost(0);
4691 4615 format %{ %}
4692 4616 interface(CONST_INTER);
4693 4617 %}
4694 4618
4695 4619 operand immI_2() %{
4696 4620 predicate( n->get_int() == 2 );
4697 4621 match(ConI);
4698 4622
4699 4623 op_cost(0);
4700 4624 format %{ %}
4701 4625 interface(CONST_INTER);
4702 4626 %}
4703 4627
4704 4628 operand immI_3() %{
4705 4629 predicate( n->get_int() == 3 );
4706 4630 match(ConI);
4707 4631
4708 4632 op_cost(0);
4709 4633 format %{ %}
4710 4634 interface(CONST_INTER);
4711 4635 %}
4712 4636
4713 4637 // Pointer Immediate
4714 4638 operand immP() %{
4715 4639 match(ConP);
4716 4640
4717 4641 op_cost(10);
4718 4642 format %{ %}
4719 4643 interface(CONST_INTER);
4720 4644 %}
4721 4645
4722 4646 // NULL Pointer Immediate
4723 4647 operand immP0() %{
4724 4648 predicate( n->get_ptr() == 0 );
4725 4649 match(ConP);
4726 4650 op_cost(0);
4727 4651
4728 4652 format %{ %}
4729 4653 interface(CONST_INTER);
4730 4654 %}
4731 4655
4732 4656 // Long Immediate
4733 4657 operand immL() %{
4734 4658 match(ConL);
4735 4659
4736 4660 op_cost(20);
4737 4661 format %{ %}
4738 4662 interface(CONST_INTER);
4739 4663 %}
4740 4664
4741 4665 // Long Immediate zero
4742 4666 operand immL0() %{
4743 4667 predicate( n->get_long() == 0L );
4744 4668 match(ConL);
4745 4669 op_cost(0);
4746 4670
4747 4671 format %{ %}
4748 4672 interface(CONST_INTER);
4749 4673 %}
4750 4674
4751 4675 // Long Immediate zero
4752 4676 operand immL_M1() %{
4753 4677 predicate( n->get_long() == -1L );
4754 4678 match(ConL);
4755 4679 op_cost(0);
4756 4680
4757 4681 format %{ %}
4758 4682 interface(CONST_INTER);
4759 4683 %}
4760 4684
4761 4685 // Long immediate from 0 to 127.
4762 4686 // Used for a shorter form of long mul by 10.
4763 4687 operand immL_127() %{
4764 4688 predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4765 4689 match(ConL);
4766 4690 op_cost(0);
4767 4691
4768 4692 format %{ %}
4769 4693 interface(CONST_INTER);
4770 4694 %}
4771 4695
4772 4696 // Long Immediate: low 32-bit mask
4773 4697 operand immL_32bits() %{
4774 4698 predicate(n->get_long() == 0xFFFFFFFFL);
4775 4699 match(ConL);
4776 4700 op_cost(0);
4777 4701
4778 4702 format %{ %}
4779 4703 interface(CONST_INTER);
4780 4704 %}
4781 4705
4782 4706 // Long Immediate: low 32-bit mask
4783 4707 operand immL32() %{
4784 4708 predicate(n->get_long() == (int)(n->get_long()));
4785 4709 match(ConL);
4786 4710 op_cost(20);
4787 4711
4788 4712 format %{ %}
4789 4713 interface(CONST_INTER);
4790 4714 %}
4791 4715
4792 4716 //Double Immediate zero
4793 4717 operand immD0() %{
↓ open down ↓ |
2684 lines elided |
↑ open up ↑ |
4794 4718 // Do additional (and counter-intuitive) test against NaN to work around VC++
4795 4719 // bug that generates code such that NaNs compare equal to 0.0
4796 4720 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4797 4721 match(ConD);
4798 4722
4799 4723 op_cost(5);
4800 4724 format %{ %}
4801 4725 interface(CONST_INTER);
4802 4726 %}
4803 4727
4804 -// Double Immediate
4728 +// Double Immediate one
4805 4729 operand immD1() %{
4806 4730 predicate( UseSSE<=1 && n->getd() == 1.0 );
4807 4731 match(ConD);
4808 4732
4809 4733 op_cost(5);
4810 4734 format %{ %}
4811 4735 interface(CONST_INTER);
4812 4736 %}
4813 4737
4814 4738 // Double Immediate
4815 4739 operand immD() %{
4816 4740 predicate(UseSSE<=1);
4817 4741 match(ConD);
4818 4742
4819 4743 op_cost(5);
4820 4744 format %{ %}
4821 4745 interface(CONST_INTER);
4822 4746 %}
4823 4747
4824 4748 operand immXD() %{
4825 4749 predicate(UseSSE>=2);
4826 4750 match(ConD);
4827 4751
4828 4752 op_cost(5);
4829 4753 format %{ %}
4830 4754 interface(CONST_INTER);
4831 4755 %}
4832 4756
4833 4757 // Double Immediate zero
4834 4758 operand immXD0() %{
4835 4759 // Do additional (and counter-intuitive) test against NaN to work around VC++
4836 4760 // bug that generates code such that NaNs compare equal to 0.0 AND do not
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
4837 4761 // compare equal to -0.0.
4838 4762 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4839 4763 match(ConD);
4840 4764
4841 4765 format %{ %}
4842 4766 interface(CONST_INTER);
4843 4767 %}
4844 4768
4845 4769 // Float Immediate zero
4846 4770 operand immF0() %{
4847 - predicate( UseSSE == 0 && n->getf() == 0.0 );
4771 + predicate(UseSSE == 0 && n->getf() == 0.0F);
4772 + match(ConF);
4773 +
4774 + op_cost(5);
4775 + format %{ %}
4776 + interface(CONST_INTER);
4777 +%}
4778 +
4779 +// Float Immediate one
4780 +operand immF1() %{
4781 + predicate(UseSSE == 0 && n->getf() == 1.0F);
4848 4782 match(ConF);
4849 4783
4850 4784 op_cost(5);
4851 4785 format %{ %}
4852 4786 interface(CONST_INTER);
4853 4787 %}
4854 4788
4855 4789 // Float Immediate
4856 4790 operand immF() %{
4857 4791 predicate( UseSSE == 0 );
4858 4792 match(ConF);
4859 4793
4860 4794 op_cost(5);
4861 4795 format %{ %}
4862 4796 interface(CONST_INTER);
4863 4797 %}
4864 4798
4865 4799 // Float Immediate
4866 4800 operand immXF() %{
4867 4801 predicate(UseSSE >= 1);
4868 4802 match(ConF);
4869 4803
4870 4804 op_cost(5);
4871 4805 format %{ %}
4872 4806 interface(CONST_INTER);
4873 4807 %}
4874 4808
4875 4809 // Float Immediate zero. Zero and not -0.0
4876 4810 operand immXF0() %{
4877 4811 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4878 4812 match(ConF);
4879 4813
4880 4814 op_cost(5);
4881 4815 format %{ %}
4882 4816 interface(CONST_INTER);
4883 4817 %}
4884 4818
4885 4819 // Immediates for special shifts (sign extend)
4886 4820
4887 4821 // Constants for increment
4888 4822 operand immI_16() %{
4889 4823 predicate( n->get_int() == 16 );
4890 4824 match(ConI);
4891 4825
4892 4826 format %{ %}
4893 4827 interface(CONST_INTER);
4894 4828 %}
4895 4829
4896 4830 operand immI_24() %{
4897 4831 predicate( n->get_int() == 24 );
4898 4832 match(ConI);
4899 4833
4900 4834 format %{ %}
4901 4835 interface(CONST_INTER);
4902 4836 %}
4903 4837
4904 4838 // Constant for byte-wide masking
4905 4839 operand immI_255() %{
4906 4840 predicate( n->get_int() == 255 );
4907 4841 match(ConI);
4908 4842
4909 4843 format %{ %}
4910 4844 interface(CONST_INTER);
4911 4845 %}
4912 4846
4913 4847 // Constant for short-wide masking
4914 4848 operand immI_65535() %{
4915 4849 predicate(n->get_int() == 65535);
4916 4850 match(ConI);
4917 4851
4918 4852 format %{ %}
4919 4853 interface(CONST_INTER);
4920 4854 %}
4921 4855
4922 4856 // Register Operands
4923 4857 // Integer Register
4924 4858 operand eRegI() %{
4925 4859 constraint(ALLOC_IN_RC(e_reg));
4926 4860 match(RegI);
4927 4861 match(xRegI);
4928 4862 match(eAXRegI);
4929 4863 match(eBXRegI);
4930 4864 match(eCXRegI);
4931 4865 match(eDXRegI);
4932 4866 match(eDIRegI);
4933 4867 match(eSIRegI);
4934 4868
4935 4869 format %{ %}
4936 4870 interface(REG_INTER);
4937 4871 %}
4938 4872
4939 4873 // Subset of Integer Register
4940 4874 operand xRegI(eRegI reg) %{
4941 4875 constraint(ALLOC_IN_RC(x_reg));
4942 4876 match(reg);
4943 4877 match(eAXRegI);
4944 4878 match(eBXRegI);
4945 4879 match(eCXRegI);
4946 4880 match(eDXRegI);
4947 4881
4948 4882 format %{ %}
4949 4883 interface(REG_INTER);
4950 4884 %}
4951 4885
4952 4886 // Special Registers
4953 4887 operand eAXRegI(xRegI reg) %{
4954 4888 constraint(ALLOC_IN_RC(eax_reg));
4955 4889 match(reg);
4956 4890 match(eRegI);
4957 4891
4958 4892 format %{ "EAX" %}
4959 4893 interface(REG_INTER);
4960 4894 %}
4961 4895
4962 4896 // Special Registers
4963 4897 operand eBXRegI(xRegI reg) %{
4964 4898 constraint(ALLOC_IN_RC(ebx_reg));
4965 4899 match(reg);
4966 4900 match(eRegI);
4967 4901
4968 4902 format %{ "EBX" %}
4969 4903 interface(REG_INTER);
4970 4904 %}
4971 4905
4972 4906 operand eCXRegI(xRegI reg) %{
4973 4907 constraint(ALLOC_IN_RC(ecx_reg));
4974 4908 match(reg);
4975 4909 match(eRegI);
4976 4910
4977 4911 format %{ "ECX" %}
4978 4912 interface(REG_INTER);
4979 4913 %}
4980 4914
4981 4915 operand eDXRegI(xRegI reg) %{
4982 4916 constraint(ALLOC_IN_RC(edx_reg));
4983 4917 match(reg);
4984 4918 match(eRegI);
4985 4919
4986 4920 format %{ "EDX" %}
4987 4921 interface(REG_INTER);
4988 4922 %}
4989 4923
4990 4924 operand eDIRegI(xRegI reg) %{
4991 4925 constraint(ALLOC_IN_RC(edi_reg));
4992 4926 match(reg);
4993 4927 match(eRegI);
4994 4928
4995 4929 format %{ "EDI" %}
4996 4930 interface(REG_INTER);
4997 4931 %}
4998 4932
4999 4933 operand naxRegI() %{
5000 4934 constraint(ALLOC_IN_RC(nax_reg));
5001 4935 match(RegI);
5002 4936 match(eCXRegI);
5003 4937 match(eDXRegI);
5004 4938 match(eSIRegI);
5005 4939 match(eDIRegI);
5006 4940
5007 4941 format %{ %}
5008 4942 interface(REG_INTER);
5009 4943 %}
5010 4944
5011 4945 operand nadxRegI() %{
5012 4946 constraint(ALLOC_IN_RC(nadx_reg));
5013 4947 match(RegI);
5014 4948 match(eBXRegI);
5015 4949 match(eCXRegI);
5016 4950 match(eSIRegI);
5017 4951 match(eDIRegI);
5018 4952
5019 4953 format %{ %}
5020 4954 interface(REG_INTER);
5021 4955 %}
5022 4956
5023 4957 operand ncxRegI() %{
5024 4958 constraint(ALLOC_IN_RC(ncx_reg));
5025 4959 match(RegI);
5026 4960 match(eAXRegI);
5027 4961 match(eDXRegI);
5028 4962 match(eSIRegI);
5029 4963 match(eDIRegI);
5030 4964
5031 4965 format %{ %}
5032 4966 interface(REG_INTER);
5033 4967 %}
5034 4968
5035 4969 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
5036 4970 // //
5037 4971 operand eSIRegI(xRegI reg) %{
5038 4972 constraint(ALLOC_IN_RC(esi_reg));
5039 4973 match(reg);
5040 4974 match(eRegI);
5041 4975
5042 4976 format %{ "ESI" %}
5043 4977 interface(REG_INTER);
5044 4978 %}
5045 4979
5046 4980 // Pointer Register
5047 4981 operand anyRegP() %{
5048 4982 constraint(ALLOC_IN_RC(any_reg));
5049 4983 match(RegP);
5050 4984 match(eAXRegP);
5051 4985 match(eBXRegP);
5052 4986 match(eCXRegP);
5053 4987 match(eDIRegP);
5054 4988 match(eRegP);
5055 4989
5056 4990 format %{ %}
5057 4991 interface(REG_INTER);
5058 4992 %}
5059 4993
5060 4994 operand eRegP() %{
5061 4995 constraint(ALLOC_IN_RC(e_reg));
5062 4996 match(RegP);
5063 4997 match(eAXRegP);
5064 4998 match(eBXRegP);
5065 4999 match(eCXRegP);
5066 5000 match(eDIRegP);
5067 5001
5068 5002 format %{ %}
5069 5003 interface(REG_INTER);
5070 5004 %}
5071 5005
5072 5006 // On windows95, EBP is not safe to use for implicit null tests.
5073 5007 operand eRegP_no_EBP() %{
5074 5008 constraint(ALLOC_IN_RC(e_reg_no_rbp));
5075 5009 match(RegP);
5076 5010 match(eAXRegP);
5077 5011 match(eBXRegP);
5078 5012 match(eCXRegP);
5079 5013 match(eDIRegP);
5080 5014
5081 5015 op_cost(100);
5082 5016 format %{ %}
5083 5017 interface(REG_INTER);
5084 5018 %}
5085 5019
5086 5020 operand naxRegP() %{
5087 5021 constraint(ALLOC_IN_RC(nax_reg));
5088 5022 match(RegP);
5089 5023 match(eBXRegP);
5090 5024 match(eDXRegP);
5091 5025 match(eCXRegP);
5092 5026 match(eSIRegP);
5093 5027 match(eDIRegP);
5094 5028
5095 5029 format %{ %}
5096 5030 interface(REG_INTER);
5097 5031 %}
5098 5032
5099 5033 operand nabxRegP() %{
5100 5034 constraint(ALLOC_IN_RC(nabx_reg));
5101 5035 match(RegP);
5102 5036 match(eCXRegP);
5103 5037 match(eDXRegP);
5104 5038 match(eSIRegP);
5105 5039 match(eDIRegP);
5106 5040
5107 5041 format %{ %}
5108 5042 interface(REG_INTER);
5109 5043 %}
5110 5044
5111 5045 operand pRegP() %{
5112 5046 constraint(ALLOC_IN_RC(p_reg));
5113 5047 match(RegP);
5114 5048 match(eBXRegP);
5115 5049 match(eDXRegP);
5116 5050 match(eSIRegP);
5117 5051 match(eDIRegP);
5118 5052
5119 5053 format %{ %}
5120 5054 interface(REG_INTER);
5121 5055 %}
5122 5056
5123 5057 // Special Registers
5124 5058 // Return a pointer value
5125 5059 operand eAXRegP(eRegP reg) %{
5126 5060 constraint(ALLOC_IN_RC(eax_reg));
5127 5061 match(reg);
5128 5062 format %{ "EAX" %}
5129 5063 interface(REG_INTER);
5130 5064 %}
5131 5065
5132 5066 // Used in AtomicAdd
5133 5067 operand eBXRegP(eRegP reg) %{
5134 5068 constraint(ALLOC_IN_RC(ebx_reg));
5135 5069 match(reg);
5136 5070 format %{ "EBX" %}
5137 5071 interface(REG_INTER);
5138 5072 %}
5139 5073
5140 5074 // Tail-call (interprocedural jump) to interpreter
5141 5075 operand eCXRegP(eRegP reg) %{
5142 5076 constraint(ALLOC_IN_RC(ecx_reg));
5143 5077 match(reg);
5144 5078 format %{ "ECX" %}
5145 5079 interface(REG_INTER);
5146 5080 %}
5147 5081
5148 5082 operand eSIRegP(eRegP reg) %{
5149 5083 constraint(ALLOC_IN_RC(esi_reg));
5150 5084 match(reg);
5151 5085 format %{ "ESI" %}
5152 5086 interface(REG_INTER);
5153 5087 %}
5154 5088
5155 5089 // Used in rep stosw
5156 5090 operand eDIRegP(eRegP reg) %{
5157 5091 constraint(ALLOC_IN_RC(edi_reg));
5158 5092 match(reg);
5159 5093 format %{ "EDI" %}
5160 5094 interface(REG_INTER);
5161 5095 %}
5162 5096
5163 5097 operand eBPRegP() %{
5164 5098 constraint(ALLOC_IN_RC(ebp_reg));
5165 5099 match(RegP);
5166 5100 format %{ "EBP" %}
5167 5101 interface(REG_INTER);
5168 5102 %}
5169 5103
5170 5104 operand eRegL() %{
5171 5105 constraint(ALLOC_IN_RC(long_reg));
5172 5106 match(RegL);
5173 5107 match(eADXRegL);
5174 5108
5175 5109 format %{ %}
5176 5110 interface(REG_INTER);
5177 5111 %}
5178 5112
5179 5113 operand eADXRegL( eRegL reg ) %{
5180 5114 constraint(ALLOC_IN_RC(eadx_reg));
5181 5115 match(reg);
5182 5116
5183 5117 format %{ "EDX:EAX" %}
5184 5118 interface(REG_INTER);
5185 5119 %}
5186 5120
5187 5121 operand eBCXRegL( eRegL reg ) %{
5188 5122 constraint(ALLOC_IN_RC(ebcx_reg));
5189 5123 match(reg);
5190 5124
5191 5125 format %{ "EBX:ECX" %}
5192 5126 interface(REG_INTER);
5193 5127 %}
5194 5128
5195 5129 // Special case for integer high multiply
5196 5130 operand eADXRegL_low_only() %{
5197 5131 constraint(ALLOC_IN_RC(eadx_reg));
5198 5132 match(RegL);
5199 5133
5200 5134 format %{ "EAX" %}
5201 5135 interface(REG_INTER);
5202 5136 %}
5203 5137
5204 5138 // Flags register, used as output of compare instructions
5205 5139 operand eFlagsReg() %{
5206 5140 constraint(ALLOC_IN_RC(int_flags));
5207 5141 match(RegFlags);
5208 5142
5209 5143 format %{ "EFLAGS" %}
5210 5144 interface(REG_INTER);
5211 5145 %}
5212 5146
5213 5147 // Flags register, used as output of FLOATING POINT compare instructions
5214 5148 operand eFlagsRegU() %{
5215 5149 constraint(ALLOC_IN_RC(int_flags));
5216 5150 match(RegFlags);
5217 5151
5218 5152 format %{ "EFLAGS_U" %}
5219 5153 interface(REG_INTER);
5220 5154 %}
5221 5155
5222 5156 operand eFlagsRegUCF() %{
5223 5157 constraint(ALLOC_IN_RC(int_flags));
5224 5158 match(RegFlags);
5225 5159 predicate(false);
5226 5160
5227 5161 format %{ "EFLAGS_U_CF" %}
5228 5162 interface(REG_INTER);
5229 5163 %}
5230 5164
5231 5165 // Condition Code Register used by long compare
5232 5166 operand flagsReg_long_LTGE() %{
5233 5167 constraint(ALLOC_IN_RC(int_flags));
5234 5168 match(RegFlags);
5235 5169 format %{ "FLAGS_LTGE" %}
5236 5170 interface(REG_INTER);
5237 5171 %}
5238 5172 operand flagsReg_long_EQNE() %{
5239 5173 constraint(ALLOC_IN_RC(int_flags));
5240 5174 match(RegFlags);
5241 5175 format %{ "FLAGS_EQNE" %}
5242 5176 interface(REG_INTER);
5243 5177 %}
5244 5178 operand flagsReg_long_LEGT() %{
5245 5179 constraint(ALLOC_IN_RC(int_flags));
5246 5180 match(RegFlags);
5247 5181 format %{ "FLAGS_LEGT" %}
5248 5182 interface(REG_INTER);
5249 5183 %}
5250 5184
5251 5185 // Float register operands
5252 5186 operand regD() %{
5253 5187 predicate( UseSSE < 2 );
5254 5188 constraint(ALLOC_IN_RC(dbl_reg));
5255 5189 match(RegD);
5256 5190 match(regDPR1);
5257 5191 match(regDPR2);
5258 5192 format %{ %}
5259 5193 interface(REG_INTER);
5260 5194 %}
5261 5195
5262 5196 operand regDPR1(regD reg) %{
5263 5197 predicate( UseSSE < 2 );
5264 5198 constraint(ALLOC_IN_RC(dbl_reg0));
5265 5199 match(reg);
5266 5200 format %{ "FPR1" %}
5267 5201 interface(REG_INTER);
5268 5202 %}
5269 5203
5270 5204 operand regDPR2(regD reg) %{
5271 5205 predicate( UseSSE < 2 );
5272 5206 constraint(ALLOC_IN_RC(dbl_reg1));
5273 5207 match(reg);
5274 5208 format %{ "FPR2" %}
5275 5209 interface(REG_INTER);
5276 5210 %}
5277 5211
5278 5212 operand regnotDPR1(regD reg) %{
5279 5213 predicate( UseSSE < 2 );
5280 5214 constraint(ALLOC_IN_RC(dbl_notreg0));
5281 5215 match(reg);
5282 5216 format %{ %}
5283 5217 interface(REG_INTER);
5284 5218 %}
5285 5219
5286 5220 // XMM Double register operands
5287 5221 operand regXD() %{
5288 5222 predicate( UseSSE>=2 );
5289 5223 constraint(ALLOC_IN_RC(xdb_reg));
5290 5224 match(RegD);
5291 5225 match(regXD6);
5292 5226 match(regXD7);
5293 5227 format %{ %}
5294 5228 interface(REG_INTER);
5295 5229 %}
5296 5230
5297 5231 // XMM6 double register operands
5298 5232 operand regXD6(regXD reg) %{
5299 5233 predicate( UseSSE>=2 );
5300 5234 constraint(ALLOC_IN_RC(xdb_reg6));
5301 5235 match(reg);
5302 5236 format %{ "XMM6" %}
5303 5237 interface(REG_INTER);
5304 5238 %}
5305 5239
5306 5240 // XMM7 double register operands
5307 5241 operand regXD7(regXD reg) %{
5308 5242 predicate( UseSSE>=2 );
5309 5243 constraint(ALLOC_IN_RC(xdb_reg7));
5310 5244 match(reg);
5311 5245 format %{ "XMM7" %}
5312 5246 interface(REG_INTER);
5313 5247 %}
5314 5248
5315 5249 // Float register operands
5316 5250 operand regF() %{
5317 5251 predicate( UseSSE < 2 );
5318 5252 constraint(ALLOC_IN_RC(flt_reg));
5319 5253 match(RegF);
5320 5254 match(regFPR1);
5321 5255 format %{ %}
5322 5256 interface(REG_INTER);
5323 5257 %}
5324 5258
5325 5259 // Float register operands
5326 5260 operand regFPR1(regF reg) %{
5327 5261 predicate( UseSSE < 2 );
5328 5262 constraint(ALLOC_IN_RC(flt_reg0));
5329 5263 match(reg);
5330 5264 format %{ "FPR1" %}
5331 5265 interface(REG_INTER);
5332 5266 %}
5333 5267
5334 5268 // XMM register operands
5335 5269 operand regX() %{
5336 5270 predicate( UseSSE>=1 );
5337 5271 constraint(ALLOC_IN_RC(xmm_reg));
5338 5272 match(RegF);
5339 5273 format %{ %}
5340 5274 interface(REG_INTER);
5341 5275 %}
5342 5276
5343 5277
5344 5278 //----------Memory Operands----------------------------------------------------
5345 5279 // Direct Memory Operand
5346 5280 operand direct(immP addr) %{
5347 5281 match(addr);
5348 5282
5349 5283 format %{ "[$addr]" %}
5350 5284 interface(MEMORY_INTER) %{
5351 5285 base(0xFFFFFFFF);
5352 5286 index(0x4);
5353 5287 scale(0x0);
5354 5288 disp($addr);
5355 5289 %}
5356 5290 %}
5357 5291
5358 5292 // Indirect Memory Operand
5359 5293 operand indirect(eRegP reg) %{
5360 5294 constraint(ALLOC_IN_RC(e_reg));
5361 5295 match(reg);
5362 5296
5363 5297 format %{ "[$reg]" %}
5364 5298 interface(MEMORY_INTER) %{
5365 5299 base($reg);
5366 5300 index(0x4);
5367 5301 scale(0x0);
5368 5302 disp(0x0);
5369 5303 %}
5370 5304 %}
5371 5305
5372 5306 // Indirect Memory Plus Short Offset Operand
5373 5307 operand indOffset8(eRegP reg, immI8 off) %{
5374 5308 match(AddP reg off);
5375 5309
5376 5310 format %{ "[$reg + $off]" %}
5377 5311 interface(MEMORY_INTER) %{
5378 5312 base($reg);
5379 5313 index(0x4);
5380 5314 scale(0x0);
5381 5315 disp($off);
5382 5316 %}
5383 5317 %}
5384 5318
5385 5319 // Indirect Memory Plus Long Offset Operand
5386 5320 operand indOffset32(eRegP reg, immI off) %{
5387 5321 match(AddP reg off);
5388 5322
5389 5323 format %{ "[$reg + $off]" %}
5390 5324 interface(MEMORY_INTER) %{
5391 5325 base($reg);
5392 5326 index(0x4);
5393 5327 scale(0x0);
5394 5328 disp($off);
5395 5329 %}
5396 5330 %}
5397 5331
5398 5332 // Indirect Memory Plus Long Offset Operand
5399 5333 operand indOffset32X(eRegI reg, immP off) %{
5400 5334 match(AddP off reg);
5401 5335
5402 5336 format %{ "[$reg + $off]" %}
5403 5337 interface(MEMORY_INTER) %{
5404 5338 base($reg);
5405 5339 index(0x4);
5406 5340 scale(0x0);
5407 5341 disp($off);
5408 5342 %}
5409 5343 %}
5410 5344
5411 5345 // Indirect Memory Plus Index Register Plus Offset Operand
5412 5346 operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
5413 5347 match(AddP (AddP reg ireg) off);
5414 5348
5415 5349 op_cost(10);
5416 5350 format %{"[$reg + $off + $ireg]" %}
5417 5351 interface(MEMORY_INTER) %{
5418 5352 base($reg);
5419 5353 index($ireg);
5420 5354 scale(0x0);
5421 5355 disp($off);
5422 5356 %}
5423 5357 %}
5424 5358
5425 5359 // Indirect Memory Plus Index Register Plus Offset Operand
5426 5360 operand indIndex(eRegP reg, eRegI ireg) %{
5427 5361 match(AddP reg ireg);
5428 5362
5429 5363 op_cost(10);
5430 5364 format %{"[$reg + $ireg]" %}
5431 5365 interface(MEMORY_INTER) %{
5432 5366 base($reg);
5433 5367 index($ireg);
5434 5368 scale(0x0);
5435 5369 disp(0x0);
5436 5370 %}
5437 5371 %}
5438 5372
5439 5373 // // -------------------------------------------------------------------------
5440 5374 // // 486 architecture doesn't support "scale * index + offset" with out a base
5441 5375 // // -------------------------------------------------------------------------
5442 5376 // // Scaled Memory Operands
5443 5377 // // Indirect Memory Times Scale Plus Offset Operand
5444 5378 // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
5445 5379 // match(AddP off (LShiftI ireg scale));
5446 5380 //
5447 5381 // op_cost(10);
5448 5382 // format %{"[$off + $ireg << $scale]" %}
5449 5383 // interface(MEMORY_INTER) %{
5450 5384 // base(0x4);
5451 5385 // index($ireg);
5452 5386 // scale($scale);
5453 5387 // disp($off);
5454 5388 // %}
5455 5389 // %}
5456 5390
5457 5391 // Indirect Memory Times Scale Plus Index Register
5458 5392 operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
5459 5393 match(AddP reg (LShiftI ireg scale));
5460 5394
5461 5395 op_cost(10);
5462 5396 format %{"[$reg + $ireg << $scale]" %}
5463 5397 interface(MEMORY_INTER) %{
5464 5398 base($reg);
5465 5399 index($ireg);
5466 5400 scale($scale);
5467 5401 disp(0x0);
5468 5402 %}
5469 5403 %}
5470 5404
5471 5405 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5472 5406 operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
5473 5407 match(AddP (AddP reg (LShiftI ireg scale)) off);
5474 5408
5475 5409 op_cost(10);
5476 5410 format %{"[$reg + $off + $ireg << $scale]" %}
5477 5411 interface(MEMORY_INTER) %{
5478 5412 base($reg);
5479 5413 index($ireg);
5480 5414 scale($scale);
5481 5415 disp($off);
5482 5416 %}
5483 5417 %}
5484 5418
5485 5419 //----------Load Long Memory Operands------------------------------------------
5486 5420 // The load-long idiom will use it's address expression again after loading
5487 5421 // the first word of the long. If the load-long destination overlaps with
5488 5422 // registers used in the addressing expression, the 2nd half will be loaded
5489 5423 // from a clobbered address. Fix this by requiring that load-long use
5490 5424 // address registers that do not overlap with the load-long target.
5491 5425
5492 5426 // load-long support
5493 5427 operand load_long_RegP() %{
5494 5428 constraint(ALLOC_IN_RC(esi_reg));
5495 5429 match(RegP);
5496 5430 match(eSIRegP);
5497 5431 op_cost(100);
5498 5432 format %{ %}
5499 5433 interface(REG_INTER);
5500 5434 %}
5501 5435
5502 5436 // Indirect Memory Operand Long
5503 5437 operand load_long_indirect(load_long_RegP reg) %{
5504 5438 constraint(ALLOC_IN_RC(esi_reg));
5505 5439 match(reg);
5506 5440
5507 5441 format %{ "[$reg]" %}
5508 5442 interface(MEMORY_INTER) %{
5509 5443 base($reg);
5510 5444 index(0x4);
5511 5445 scale(0x0);
5512 5446 disp(0x0);
5513 5447 %}
5514 5448 %}
5515 5449
5516 5450 // Indirect Memory Plus Long Offset Operand
5517 5451 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
5518 5452 match(AddP reg off);
5519 5453
5520 5454 format %{ "[$reg + $off]" %}
5521 5455 interface(MEMORY_INTER) %{
5522 5456 base($reg);
5523 5457 index(0x4);
5524 5458 scale(0x0);
5525 5459 disp($off);
5526 5460 %}
5527 5461 %}
5528 5462
5529 5463 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
5530 5464
5531 5465
5532 5466 //----------Special Memory Operands--------------------------------------------
5533 5467 // Stack Slot Operand - This operand is used for loading and storing temporary
5534 5468 // values on the stack where a match requires a value to
5535 5469 // flow through memory.
5536 5470 operand stackSlotP(sRegP reg) %{
5537 5471 constraint(ALLOC_IN_RC(stack_slots));
5538 5472 // No match rule because this operand is only generated in matching
5539 5473 format %{ "[$reg]" %}
5540 5474 interface(MEMORY_INTER) %{
5541 5475 base(0x4); // ESP
5542 5476 index(0x4); // No Index
5543 5477 scale(0x0); // No Scale
5544 5478 disp($reg); // Stack Offset
5545 5479 %}
5546 5480 %}
5547 5481
5548 5482 operand stackSlotI(sRegI reg) %{
5549 5483 constraint(ALLOC_IN_RC(stack_slots));
5550 5484 // No match rule because this operand is only generated in matching
5551 5485 format %{ "[$reg]" %}
5552 5486 interface(MEMORY_INTER) %{
5553 5487 base(0x4); // ESP
5554 5488 index(0x4); // No Index
5555 5489 scale(0x0); // No Scale
5556 5490 disp($reg); // Stack Offset
5557 5491 %}
5558 5492 %}
5559 5493
5560 5494 operand stackSlotF(sRegF reg) %{
5561 5495 constraint(ALLOC_IN_RC(stack_slots));
5562 5496 // No match rule because this operand is only generated in matching
5563 5497 format %{ "[$reg]" %}
5564 5498 interface(MEMORY_INTER) %{
5565 5499 base(0x4); // ESP
5566 5500 index(0x4); // No Index
5567 5501 scale(0x0); // No Scale
5568 5502 disp($reg); // Stack Offset
5569 5503 %}
5570 5504 %}
5571 5505
5572 5506 operand stackSlotD(sRegD reg) %{
5573 5507 constraint(ALLOC_IN_RC(stack_slots));
5574 5508 // No match rule because this operand is only generated in matching
5575 5509 format %{ "[$reg]" %}
5576 5510 interface(MEMORY_INTER) %{
5577 5511 base(0x4); // ESP
5578 5512 index(0x4); // No Index
5579 5513 scale(0x0); // No Scale
5580 5514 disp($reg); // Stack Offset
5581 5515 %}
5582 5516 %}
5583 5517
5584 5518 operand stackSlotL(sRegL reg) %{
5585 5519 constraint(ALLOC_IN_RC(stack_slots));
5586 5520 // No match rule because this operand is only generated in matching
5587 5521 format %{ "[$reg]" %}
5588 5522 interface(MEMORY_INTER) %{
5589 5523 base(0x4); // ESP
5590 5524 index(0x4); // No Index
5591 5525 scale(0x0); // No Scale
5592 5526 disp($reg); // Stack Offset
5593 5527 %}
5594 5528 %}
5595 5529
5596 5530 //----------Memory Operands - Win95 Implicit Null Variants----------------
5597 5531 // Indirect Memory Operand
5598 5532 operand indirect_win95_safe(eRegP_no_EBP reg)
5599 5533 %{
5600 5534 constraint(ALLOC_IN_RC(e_reg));
5601 5535 match(reg);
5602 5536
5603 5537 op_cost(100);
5604 5538 format %{ "[$reg]" %}
5605 5539 interface(MEMORY_INTER) %{
5606 5540 base($reg);
5607 5541 index(0x4);
5608 5542 scale(0x0);
5609 5543 disp(0x0);
5610 5544 %}
5611 5545 %}
5612 5546
5613 5547 // Indirect Memory Plus Short Offset Operand
5614 5548 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
5615 5549 %{
5616 5550 match(AddP reg off);
5617 5551
5618 5552 op_cost(100);
5619 5553 format %{ "[$reg + $off]" %}
5620 5554 interface(MEMORY_INTER) %{
5621 5555 base($reg);
5622 5556 index(0x4);
5623 5557 scale(0x0);
5624 5558 disp($off);
5625 5559 %}
5626 5560 %}
5627 5561
5628 5562 // Indirect Memory Plus Long Offset Operand
5629 5563 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
5630 5564 %{
5631 5565 match(AddP reg off);
5632 5566
5633 5567 op_cost(100);
5634 5568 format %{ "[$reg + $off]" %}
5635 5569 interface(MEMORY_INTER) %{
5636 5570 base($reg);
5637 5571 index(0x4);
5638 5572 scale(0x0);
5639 5573 disp($off);
5640 5574 %}
5641 5575 %}
5642 5576
5643 5577 // Indirect Memory Plus Index Register Plus Offset Operand
5644 5578 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
5645 5579 %{
5646 5580 match(AddP (AddP reg ireg) off);
5647 5581
5648 5582 op_cost(100);
5649 5583 format %{"[$reg + $off + $ireg]" %}
5650 5584 interface(MEMORY_INTER) %{
5651 5585 base($reg);
5652 5586 index($ireg);
5653 5587 scale(0x0);
5654 5588 disp($off);
5655 5589 %}
5656 5590 %}
5657 5591
5658 5592 // Indirect Memory Times Scale Plus Index Register
5659 5593 operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
5660 5594 %{
5661 5595 match(AddP reg (LShiftI ireg scale));
5662 5596
5663 5597 op_cost(100);
5664 5598 format %{"[$reg + $ireg << $scale]" %}
5665 5599 interface(MEMORY_INTER) %{
5666 5600 base($reg);
5667 5601 index($ireg);
5668 5602 scale($scale);
5669 5603 disp(0x0);
5670 5604 %}
5671 5605 %}
5672 5606
5673 5607 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5674 5608 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
5675 5609 %{
5676 5610 match(AddP (AddP reg (LShiftI ireg scale)) off);
5677 5611
5678 5612 op_cost(100);
5679 5613 format %{"[$reg + $off + $ireg << $scale]" %}
5680 5614 interface(MEMORY_INTER) %{
5681 5615 base($reg);
5682 5616 index($ireg);
5683 5617 scale($scale);
5684 5618 disp($off);
5685 5619 %}
5686 5620 %}
5687 5621
5688 5622 //----------Conditional Branch Operands----------------------------------------
5689 5623 // Comparison Op - This is the operation of the comparison, and is limited to
5690 5624 // the following set of codes:
5691 5625 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5692 5626 //
5693 5627 // Other attributes of the comparison, such as unsignedness, are specified
5694 5628 // by the comparison instruction that sets a condition code flags register.
5695 5629 // That result is represented by a flags operand whose subtype is appropriate
5696 5630 // to the unsignedness (etc.) of the comparison.
5697 5631 //
5698 5632 // Later, the instruction which matches both the Comparison Op (a Bool) and
5699 5633 // the flags (produced by the Cmp) specifies the coding of the comparison op
5700 5634 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5701 5635
5702 5636 // Comparision Code
5703 5637 operand cmpOp() %{
5704 5638 match(Bool);
5705 5639
5706 5640 format %{ "" %}
5707 5641 interface(COND_INTER) %{
5708 5642 equal(0x4, "e");
5709 5643 not_equal(0x5, "ne");
5710 5644 less(0xC, "l");
5711 5645 greater_equal(0xD, "ge");
5712 5646 less_equal(0xE, "le");
5713 5647 greater(0xF, "g");
5714 5648 %}
5715 5649 %}
5716 5650
5717 5651 // Comparison Code, unsigned compare. Used by FP also, with
5718 5652 // C2 (unordered) turned into GT or LT already. The other bits
5719 5653 // C0 and C3 are turned into Carry & Zero flags.
5720 5654 operand cmpOpU() %{
5721 5655 match(Bool);
5722 5656
5723 5657 format %{ "" %}
5724 5658 interface(COND_INTER) %{
5725 5659 equal(0x4, "e");
5726 5660 not_equal(0x5, "ne");
5727 5661 less(0x2, "b");
5728 5662 greater_equal(0x3, "nb");
5729 5663 less_equal(0x6, "be");
5730 5664 greater(0x7, "nbe");
5731 5665 %}
5732 5666 %}
5733 5667
5734 5668 // Floating comparisons that don't require any fixup for the unordered case
5735 5669 operand cmpOpUCF() %{
5736 5670 match(Bool);
5737 5671 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5738 5672 n->as_Bool()->_test._test == BoolTest::ge ||
5739 5673 n->as_Bool()->_test._test == BoolTest::le ||
5740 5674 n->as_Bool()->_test._test == BoolTest::gt);
5741 5675 format %{ "" %}
5742 5676 interface(COND_INTER) %{
5743 5677 equal(0x4, "e");
5744 5678 not_equal(0x5, "ne");
5745 5679 less(0x2, "b");
5746 5680 greater_equal(0x3, "nb");
5747 5681 less_equal(0x6, "be");
5748 5682 greater(0x7, "nbe");
5749 5683 %}
5750 5684 %}
5751 5685
5752 5686
5753 5687 // Floating comparisons that can be fixed up with extra conditional jumps
5754 5688 operand cmpOpUCF2() %{
5755 5689 match(Bool);
5756 5690 predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5757 5691 n->as_Bool()->_test._test == BoolTest::eq);
5758 5692 format %{ "" %}
5759 5693 interface(COND_INTER) %{
5760 5694 equal(0x4, "e");
5761 5695 not_equal(0x5, "ne");
5762 5696 less(0x2, "b");
5763 5697 greater_equal(0x3, "nb");
5764 5698 less_equal(0x6, "be");
5765 5699 greater(0x7, "nbe");
5766 5700 %}
5767 5701 %}
5768 5702
5769 5703 // Comparison Code for FP conditional move
5770 5704 operand cmpOp_fcmov() %{
5771 5705 match(Bool);
5772 5706
5773 5707 format %{ "" %}
5774 5708 interface(COND_INTER) %{
5775 5709 equal (0x0C8);
5776 5710 not_equal (0x1C8);
5777 5711 less (0x0C0);
5778 5712 greater_equal(0x1C0);
5779 5713 less_equal (0x0D0);
5780 5714 greater (0x1D0);
5781 5715 %}
5782 5716 %}
5783 5717
5784 5718 // Comparision Code used in long compares
5785 5719 operand cmpOp_commute() %{
5786 5720 match(Bool);
5787 5721
5788 5722 format %{ "" %}
5789 5723 interface(COND_INTER) %{
5790 5724 equal(0x4, "e");
5791 5725 not_equal(0x5, "ne");
5792 5726 less(0xF, "g");
5793 5727 greater_equal(0xE, "le");
5794 5728 less_equal(0xD, "ge");
5795 5729 greater(0xC, "l");
5796 5730 %}
5797 5731 %}
5798 5732
5799 5733 //----------OPERAND CLASSES----------------------------------------------------
5800 5734 // Operand Classes are groups of operands that are used as to simplify
5801 5735 // instruction definitions by not requiring the AD writer to specify separate
5802 5736 // instructions for every form of operand when the instruction accepts
5803 5737 // multiple operand types with the same basic encoding and format. The classic
5804 5738 // case of this is memory operands.
5805 5739
5806 5740 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5807 5741 indIndex, indIndexScale, indIndexScaleOffset);
5808 5742
5809 5743 // Long memory operations are encoded in 2 instructions and a +4 offset.
5810 5744 // This means some kind of offset is always required and you cannot use
5811 5745 // an oop as the offset (done when working on static globals).
5812 5746 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5813 5747 indIndex, indIndexScale, indIndexScaleOffset);
5814 5748
5815 5749
5816 5750 //----------PIPELINE-----------------------------------------------------------
5817 5751 // Rules which define the behavior of the target architectures pipeline.
5818 5752 pipeline %{
5819 5753
5820 5754 //----------ATTRIBUTES---------------------------------------------------------
5821 5755 attributes %{
5822 5756 variable_size_instructions; // Fixed size instructions
5823 5757 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
5824 5758 instruction_unit_size = 1; // An instruction is 1 bytes long
5825 5759 instruction_fetch_unit_size = 16; // The processor fetches one line
5826 5760 instruction_fetch_units = 1; // of 16 bytes
5827 5761
5828 5762 // List of nop instructions
5829 5763 nops( MachNop );
5830 5764 %}
5831 5765
5832 5766 //----------RESOURCES----------------------------------------------------------
5833 5767 // Resources are the functional units available to the machine
5834 5768
5835 5769 // Generic P2/P3 pipeline
5836 5770 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5837 5771 // 3 instructions decoded per cycle.
5838 5772 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5839 5773 // 2 ALU op, only ALU0 handles mul/div instructions.
5840 5774 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5841 5775 MS0, MS1, MEM = MS0 | MS1,
5842 5776 BR, FPU,
5843 5777 ALU0, ALU1, ALU = ALU0 | ALU1 );
5844 5778
5845 5779 //----------PIPELINE DESCRIPTION-----------------------------------------------
5846 5780 // Pipeline Description specifies the stages in the machine's pipeline
5847 5781
5848 5782 // Generic P2/P3 pipeline
5849 5783 pipe_desc(S0, S1, S2, S3, S4, S5);
5850 5784
5851 5785 //----------PIPELINE CLASSES---------------------------------------------------
5852 5786 // Pipeline Classes describe the stages in which input and output are
5853 5787 // referenced by the hardware pipeline.
5854 5788
5855 5789 // Naming convention: ialu or fpu
5856 5790 // Then: _reg
5857 5791 // Then: _reg if there is a 2nd register
5858 5792 // Then: _long if it's a pair of instructions implementing a long
5859 5793 // Then: _fat if it requires the big decoder
5860 5794 // Or: _mem if it requires the big decoder and a memory unit.
5861 5795
5862 5796 // Integer ALU reg operation
5863 5797 pipe_class ialu_reg(eRegI dst) %{
5864 5798 single_instruction;
5865 5799 dst : S4(write);
5866 5800 dst : S3(read);
5867 5801 DECODE : S0; // any decoder
5868 5802 ALU : S3; // any alu
5869 5803 %}
5870 5804
5871 5805 // Long ALU reg operation
5872 5806 pipe_class ialu_reg_long(eRegL dst) %{
5873 5807 instruction_count(2);
5874 5808 dst : S4(write);
5875 5809 dst : S3(read);
5876 5810 DECODE : S0(2); // any 2 decoders
5877 5811 ALU : S3(2); // both alus
5878 5812 %}
5879 5813
5880 5814 // Integer ALU reg operation using big decoder
5881 5815 pipe_class ialu_reg_fat(eRegI dst) %{
5882 5816 single_instruction;
5883 5817 dst : S4(write);
5884 5818 dst : S3(read);
5885 5819 D0 : S0; // big decoder only
5886 5820 ALU : S3; // any alu
5887 5821 %}
5888 5822
5889 5823 // Long ALU reg operation using big decoder
5890 5824 pipe_class ialu_reg_long_fat(eRegL dst) %{
5891 5825 instruction_count(2);
5892 5826 dst : S4(write);
5893 5827 dst : S3(read);
5894 5828 D0 : S0(2); // big decoder only; twice
5895 5829 ALU : S3(2); // any 2 alus
5896 5830 %}
5897 5831
5898 5832 // Integer ALU reg-reg operation
5899 5833 pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5900 5834 single_instruction;
5901 5835 dst : S4(write);
5902 5836 src : S3(read);
5903 5837 DECODE : S0; // any decoder
5904 5838 ALU : S3; // any alu
5905 5839 %}
5906 5840
5907 5841 // Long ALU reg-reg operation
5908 5842 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5909 5843 instruction_count(2);
5910 5844 dst : S4(write);
5911 5845 src : S3(read);
5912 5846 DECODE : S0(2); // any 2 decoders
5913 5847 ALU : S3(2); // both alus
5914 5848 %}
5915 5849
5916 5850 // Integer ALU reg-reg operation
5917 5851 pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5918 5852 single_instruction;
5919 5853 dst : S4(write);
5920 5854 src : S3(read);
5921 5855 D0 : S0; // big decoder only
5922 5856 ALU : S3; // any alu
5923 5857 %}
5924 5858
5925 5859 // Long ALU reg-reg operation
5926 5860 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5927 5861 instruction_count(2);
5928 5862 dst : S4(write);
5929 5863 src : S3(read);
5930 5864 D0 : S0(2); // big decoder only; twice
5931 5865 ALU : S3(2); // both alus
5932 5866 %}
5933 5867
5934 5868 // Integer ALU reg-mem operation
5935 5869 pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5936 5870 single_instruction;
5937 5871 dst : S5(write);
5938 5872 mem : S3(read);
5939 5873 D0 : S0; // big decoder only
5940 5874 ALU : S4; // any alu
5941 5875 MEM : S3; // any mem
5942 5876 %}
5943 5877
5944 5878 // Long ALU reg-mem operation
5945 5879 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5946 5880 instruction_count(2);
5947 5881 dst : S5(write);
5948 5882 mem : S3(read);
5949 5883 D0 : S0(2); // big decoder only; twice
5950 5884 ALU : S4(2); // any 2 alus
5951 5885 MEM : S3(2); // both mems
5952 5886 %}
5953 5887
5954 5888 // Integer mem operation (prefetch)
5955 5889 pipe_class ialu_mem(memory mem)
5956 5890 %{
5957 5891 single_instruction;
5958 5892 mem : S3(read);
5959 5893 D0 : S0; // big decoder only
5960 5894 MEM : S3; // any mem
5961 5895 %}
5962 5896
5963 5897 // Integer Store to Memory
5964 5898 pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5965 5899 single_instruction;
5966 5900 mem : S3(read);
5967 5901 src : S5(read);
5968 5902 D0 : S0; // big decoder only
5969 5903 ALU : S4; // any alu
5970 5904 MEM : S3;
5971 5905 %}
5972 5906
5973 5907 // Long Store to Memory
5974 5908 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5975 5909 instruction_count(2);
5976 5910 mem : S3(read);
5977 5911 src : S5(read);
5978 5912 D0 : S0(2); // big decoder only; twice
5979 5913 ALU : S4(2); // any 2 alus
5980 5914 MEM : S3(2); // Both mems
5981 5915 %}
5982 5916
5983 5917 // Integer Store to Memory
5984 5918 pipe_class ialu_mem_imm(memory mem) %{
5985 5919 single_instruction;
5986 5920 mem : S3(read);
5987 5921 D0 : S0; // big decoder only
5988 5922 ALU : S4; // any alu
5989 5923 MEM : S3;
5990 5924 %}
5991 5925
5992 5926 // Integer ALU0 reg-reg operation
5993 5927 pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5994 5928 single_instruction;
5995 5929 dst : S4(write);
5996 5930 src : S3(read);
5997 5931 D0 : S0; // Big decoder only
5998 5932 ALU0 : S3; // only alu0
5999 5933 %}
6000 5934
6001 5935 // Integer ALU0 reg-mem operation
6002 5936 pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
6003 5937 single_instruction;
6004 5938 dst : S5(write);
6005 5939 mem : S3(read);
6006 5940 D0 : S0; // big decoder only
6007 5941 ALU0 : S4; // ALU0 only
6008 5942 MEM : S3; // any mem
6009 5943 %}
6010 5944
6011 5945 // Integer ALU reg-reg operation
6012 5946 pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
6013 5947 single_instruction;
6014 5948 cr : S4(write);
6015 5949 src1 : S3(read);
6016 5950 src2 : S3(read);
6017 5951 DECODE : S0; // any decoder
6018 5952 ALU : S3; // any alu
6019 5953 %}
6020 5954
6021 5955 // Integer ALU reg-imm operation
6022 5956 pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
6023 5957 single_instruction;
6024 5958 cr : S4(write);
6025 5959 src1 : S3(read);
6026 5960 DECODE : S0; // any decoder
6027 5961 ALU : S3; // any alu
6028 5962 %}
6029 5963
6030 5964 // Integer ALU reg-mem operation
6031 5965 pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
6032 5966 single_instruction;
6033 5967 cr : S4(write);
6034 5968 src1 : S3(read);
6035 5969 src2 : S3(read);
6036 5970 D0 : S0; // big decoder only
6037 5971 ALU : S4; // any alu
6038 5972 MEM : S3;
6039 5973 %}
6040 5974
6041 5975 // Conditional move reg-reg
6042 5976 pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
6043 5977 instruction_count(4);
6044 5978 y : S4(read);
6045 5979 q : S3(read);
6046 5980 p : S3(read);
6047 5981 DECODE : S0(4); // any decoder
6048 5982 %}
6049 5983
6050 5984 // Conditional move reg-reg
6051 5985 pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
6052 5986 single_instruction;
6053 5987 dst : S4(write);
6054 5988 src : S3(read);
6055 5989 cr : S3(read);
6056 5990 DECODE : S0; // any decoder
6057 5991 %}
6058 5992
6059 5993 // Conditional move reg-mem
6060 5994 pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
6061 5995 single_instruction;
6062 5996 dst : S4(write);
6063 5997 src : S3(read);
6064 5998 cr : S3(read);
6065 5999 DECODE : S0; // any decoder
6066 6000 MEM : S3;
6067 6001 %}
6068 6002
6069 6003 // Conditional move reg-reg long
6070 6004 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
6071 6005 single_instruction;
6072 6006 dst : S4(write);
6073 6007 src : S3(read);
6074 6008 cr : S3(read);
6075 6009 DECODE : S0(2); // any 2 decoders
6076 6010 %}
6077 6011
6078 6012 // Conditional move double reg-reg
6079 6013 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
6080 6014 single_instruction;
6081 6015 dst : S4(write);
6082 6016 src : S3(read);
6083 6017 cr : S3(read);
6084 6018 DECODE : S0; // any decoder
6085 6019 %}
6086 6020
6087 6021 // Float reg-reg operation
6088 6022 pipe_class fpu_reg(regD dst) %{
6089 6023 instruction_count(2);
6090 6024 dst : S3(read);
6091 6025 DECODE : S0(2); // any 2 decoders
6092 6026 FPU : S3;
6093 6027 %}
6094 6028
6095 6029 // Float reg-reg operation
6096 6030 pipe_class fpu_reg_reg(regD dst, regD src) %{
6097 6031 instruction_count(2);
6098 6032 dst : S4(write);
6099 6033 src : S3(read);
6100 6034 DECODE : S0(2); // any 2 decoders
6101 6035 FPU : S3;
6102 6036 %}
6103 6037
6104 6038 // Float reg-reg operation
6105 6039 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
6106 6040 instruction_count(3);
6107 6041 dst : S4(write);
6108 6042 src1 : S3(read);
6109 6043 src2 : S3(read);
6110 6044 DECODE : S0(3); // any 3 decoders
6111 6045 FPU : S3(2);
6112 6046 %}
6113 6047
6114 6048 // Float reg-reg operation
6115 6049 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
6116 6050 instruction_count(4);
6117 6051 dst : S4(write);
6118 6052 src1 : S3(read);
6119 6053 src2 : S3(read);
6120 6054 src3 : S3(read);
6121 6055 DECODE : S0(4); // any 3 decoders
6122 6056 FPU : S3(2);
6123 6057 %}
6124 6058
6125 6059 // Float reg-reg operation
6126 6060 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
6127 6061 instruction_count(4);
6128 6062 dst : S4(write);
6129 6063 src1 : S3(read);
6130 6064 src2 : S3(read);
6131 6065 src3 : S3(read);
6132 6066 DECODE : S1(3); // any 3 decoders
6133 6067 D0 : S0; // Big decoder only
6134 6068 FPU : S3(2);
6135 6069 MEM : S3;
6136 6070 %}
6137 6071
6138 6072 // Float reg-mem operation
6139 6073 pipe_class fpu_reg_mem(regD dst, memory mem) %{
6140 6074 instruction_count(2);
6141 6075 dst : S5(write);
6142 6076 mem : S3(read);
6143 6077 D0 : S0; // big decoder only
6144 6078 DECODE : S1; // any decoder for FPU POP
6145 6079 FPU : S4;
6146 6080 MEM : S3; // any mem
6147 6081 %}
6148 6082
6149 6083 // Float reg-mem operation
6150 6084 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
6151 6085 instruction_count(3);
6152 6086 dst : S5(write);
6153 6087 src1 : S3(read);
6154 6088 mem : S3(read);
6155 6089 D0 : S0; // big decoder only
6156 6090 DECODE : S1(2); // any decoder for FPU POP
6157 6091 FPU : S4;
6158 6092 MEM : S3; // any mem
6159 6093 %}
6160 6094
6161 6095 // Float mem-reg operation
6162 6096 pipe_class fpu_mem_reg(memory mem, regD src) %{
6163 6097 instruction_count(2);
6164 6098 src : S5(read);
6165 6099 mem : S3(read);
6166 6100 DECODE : S0; // any decoder for FPU PUSH
6167 6101 D0 : S1; // big decoder only
6168 6102 FPU : S4;
6169 6103 MEM : S3; // any mem
6170 6104 %}
6171 6105
6172 6106 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
6173 6107 instruction_count(3);
6174 6108 src1 : S3(read);
6175 6109 src2 : S3(read);
6176 6110 mem : S3(read);
6177 6111 DECODE : S0(2); // any decoder for FPU PUSH
6178 6112 D0 : S1; // big decoder only
6179 6113 FPU : S4;
6180 6114 MEM : S3; // any mem
6181 6115 %}
6182 6116
6183 6117 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
6184 6118 instruction_count(3);
6185 6119 src1 : S3(read);
6186 6120 src2 : S3(read);
6187 6121 mem : S4(read);
6188 6122 DECODE : S0; // any decoder for FPU PUSH
6189 6123 D0 : S0(2); // big decoder only
6190 6124 FPU : S4;
6191 6125 MEM : S3(2); // any mem
6192 6126 %}
6193 6127
6194 6128 pipe_class fpu_mem_mem(memory dst, memory src1) %{
6195 6129 instruction_count(2);
6196 6130 src1 : S3(read);
6197 6131 dst : S4(read);
6198 6132 D0 : S0(2); // big decoder only
6199 6133 MEM : S3(2); // any mem
6200 6134 %}
6201 6135
6202 6136 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
6203 6137 instruction_count(3);
6204 6138 src1 : S3(read);
6205 6139 src2 : S3(read);
6206 6140 dst : S4(read);
6207 6141 D0 : S0(3); // big decoder only
6208 6142 FPU : S4;
6209 6143 MEM : S3(3); // any mem
6210 6144 %}
6211 6145
6212 6146 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
6213 6147 instruction_count(3);
6214 6148 src1 : S4(read);
6215 6149 mem : S4(read);
6216 6150 DECODE : S0; // any decoder for FPU PUSH
6217 6151 D0 : S0(2); // big decoder only
6218 6152 FPU : S4;
6219 6153 MEM : S3(2); // any mem
6220 6154 %}
6221 6155
6222 6156 // Float load constant
6223 6157 pipe_class fpu_reg_con(regD dst) %{
6224 6158 instruction_count(2);
6225 6159 dst : S5(write);
6226 6160 D0 : S0; // big decoder only for the load
6227 6161 DECODE : S1; // any decoder for FPU POP
6228 6162 FPU : S4;
6229 6163 MEM : S3; // any mem
6230 6164 %}
6231 6165
6232 6166 // Float load constant
6233 6167 pipe_class fpu_reg_reg_con(regD dst, regD src) %{
6234 6168 instruction_count(3);
6235 6169 dst : S5(write);
6236 6170 src : S3(read);
6237 6171 D0 : S0; // big decoder only for the load
6238 6172 DECODE : S1(2); // any decoder for FPU POP
6239 6173 FPU : S4;
6240 6174 MEM : S3; // any mem
6241 6175 %}
6242 6176
6243 6177 // UnConditional branch
6244 6178 pipe_class pipe_jmp( label labl ) %{
6245 6179 single_instruction;
6246 6180 BR : S3;
6247 6181 %}
6248 6182
6249 6183 // Conditional branch
6250 6184 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
6251 6185 single_instruction;
6252 6186 cr : S1(read);
6253 6187 BR : S3;
6254 6188 %}
6255 6189
6256 6190 // Allocation idiom
6257 6191 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
6258 6192 instruction_count(1); force_serialization;
6259 6193 fixed_latency(6);
6260 6194 heap_ptr : S3(read);
6261 6195 DECODE : S0(3);
6262 6196 D0 : S2;
6263 6197 MEM : S3;
6264 6198 ALU : S3(2);
6265 6199 dst : S5(write);
6266 6200 BR : S5;
6267 6201 %}
6268 6202
6269 6203 // Generic big/slow expanded idiom
6270 6204 pipe_class pipe_slow( ) %{
6271 6205 instruction_count(10); multiple_bundles; force_serialization;
6272 6206 fixed_latency(100);
6273 6207 D0 : S0(2);
6274 6208 MEM : S3(2);
6275 6209 %}
6276 6210
6277 6211 // The real do-nothing guy
6278 6212 pipe_class empty( ) %{
6279 6213 instruction_count(0);
6280 6214 %}
6281 6215
6282 6216 // Define the class for the Nop node
6283 6217 define %{
6284 6218 MachNop = empty;
6285 6219 %}
6286 6220
6287 6221 %}
6288 6222
6289 6223 //----------INSTRUCTIONS-------------------------------------------------------
6290 6224 //
6291 6225 // match -- States which machine-independent subtree may be replaced
6292 6226 // by this instruction.
6293 6227 // ins_cost -- The estimated cost of this instruction is used by instruction
6294 6228 // selection to identify a minimum cost tree of machine
6295 6229 // instructions that matches a tree of machine-independent
6296 6230 // instructions.
6297 6231 // format -- A string providing the disassembly for this instruction.
6298 6232 // The value of an instruction's operand may be inserted
6299 6233 // by referring to it with a '$' prefix.
6300 6234 // opcode -- Three instruction opcodes may be provided. These are referred
6301 6235 // to within an encode class as $primary, $secondary, and $tertiary
6302 6236 // respectively. The primary opcode is commonly used to
6303 6237 // indicate the type of machine instruction, while secondary
6304 6238 // and tertiary are often used for prefix options or addressing
6305 6239 // modes.
6306 6240 // ins_encode -- A list of encode classes with parameters. The encode class
6307 6241 // name must have been defined in an 'enc_class' specification
6308 6242 // in the encode section of the architecture description.
6309 6243
6310 6244 //----------BSWAP-Instruction--------------------------------------------------
6311 6245 instruct bytes_reverse_int(eRegI dst) %{
6312 6246 match(Set dst (ReverseBytesI dst));
6313 6247
6314 6248 format %{ "BSWAP $dst" %}
6315 6249 opcode(0x0F, 0xC8);
6316 6250 ins_encode( OpcP, OpcSReg(dst) );
6317 6251 ins_pipe( ialu_reg );
6318 6252 %}
6319 6253
6320 6254 instruct bytes_reverse_long(eRegL dst) %{
6321 6255 match(Set dst (ReverseBytesL dst));
6322 6256
6323 6257 format %{ "BSWAP $dst.lo\n\t"
6324 6258 "BSWAP $dst.hi\n\t"
6325 6259 "XCHG $dst.lo $dst.hi" %}
6326 6260
6327 6261 ins_cost(125);
6328 6262 ins_encode( bswap_long_bytes(dst) );
6329 6263 ins_pipe( ialu_reg_reg);
6330 6264 %}
6331 6265
6332 6266 instruct bytes_reverse_unsigned_short(eRegI dst) %{
6333 6267 match(Set dst (ReverseBytesUS dst));
6334 6268
6335 6269 format %{ "BSWAP $dst\n\t"
6336 6270 "SHR $dst,16\n\t" %}
6337 6271 ins_encode %{
6338 6272 __ bswapl($dst$$Register);
6339 6273 __ shrl($dst$$Register, 16);
6340 6274 %}
6341 6275 ins_pipe( ialu_reg );
6342 6276 %}
6343 6277
6344 6278 instruct bytes_reverse_short(eRegI dst) %{
6345 6279 match(Set dst (ReverseBytesS dst));
6346 6280
6347 6281 format %{ "BSWAP $dst\n\t"
6348 6282 "SAR $dst,16\n\t" %}
6349 6283 ins_encode %{
6350 6284 __ bswapl($dst$$Register);
6351 6285 __ sarl($dst$$Register, 16);
6352 6286 %}
6353 6287 ins_pipe( ialu_reg );
6354 6288 %}
6355 6289
6356 6290
6357 6291 //---------- Zeros Count Instructions ------------------------------------------
6358 6292
6359 6293 instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6360 6294 predicate(UseCountLeadingZerosInstruction);
6361 6295 match(Set dst (CountLeadingZerosI src));
6362 6296 effect(KILL cr);
6363 6297
6364 6298 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %}
6365 6299 ins_encode %{
6366 6300 __ lzcntl($dst$$Register, $src$$Register);
6367 6301 %}
6368 6302 ins_pipe(ialu_reg);
6369 6303 %}
6370 6304
6371 6305 instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
6372 6306 predicate(!UseCountLeadingZerosInstruction);
6373 6307 match(Set dst (CountLeadingZerosI src));
6374 6308 effect(KILL cr);
6375 6309
6376 6310 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t"
6377 6311 "JNZ skip\n\t"
6378 6312 "MOV $dst, -1\n"
6379 6313 "skip:\n\t"
6380 6314 "NEG $dst\n\t"
6381 6315 "ADD $dst, 31" %}
6382 6316 ins_encode %{
6383 6317 Register Rdst = $dst$$Register;
6384 6318 Register Rsrc = $src$$Register;
6385 6319 Label skip;
6386 6320 __ bsrl(Rdst, Rsrc);
6387 6321 __ jccb(Assembler::notZero, skip);
6388 6322 __ movl(Rdst, -1);
6389 6323 __ bind(skip);
6390 6324 __ negl(Rdst);
6391 6325 __ addl(Rdst, BitsPerInt - 1);
6392 6326 %}
6393 6327 ins_pipe(ialu_reg);
6394 6328 %}
6395 6329
6396 6330 instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6397 6331 predicate(UseCountLeadingZerosInstruction);
6398 6332 match(Set dst (CountLeadingZerosL src));
6399 6333 effect(TEMP dst, KILL cr);
6400 6334
6401 6335 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t"
6402 6336 "JNC done\n\t"
6403 6337 "LZCNT $dst, $src.lo\n\t"
6404 6338 "ADD $dst, 32\n"
6405 6339 "done:" %}
6406 6340 ins_encode %{
6407 6341 Register Rdst = $dst$$Register;
6408 6342 Register Rsrc = $src$$Register;
6409 6343 Label done;
6410 6344 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
6411 6345 __ jccb(Assembler::carryClear, done);
6412 6346 __ lzcntl(Rdst, Rsrc);
6413 6347 __ addl(Rdst, BitsPerInt);
6414 6348 __ bind(done);
6415 6349 %}
6416 6350 ins_pipe(ialu_reg);
6417 6351 %}
6418 6352
6419 6353 instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
6420 6354 predicate(!UseCountLeadingZerosInstruction);
6421 6355 match(Set dst (CountLeadingZerosL src));
6422 6356 effect(TEMP dst, KILL cr);
6423 6357
6424 6358 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t"
6425 6359 "JZ msw_is_zero\n\t"
6426 6360 "ADD $dst, 32\n\t"
6427 6361 "JMP not_zero\n"
6428 6362 "msw_is_zero:\n\t"
6429 6363 "BSR $dst, $src.lo\n\t"
6430 6364 "JNZ not_zero\n\t"
6431 6365 "MOV $dst, -1\n"
6432 6366 "not_zero:\n\t"
6433 6367 "NEG $dst\n\t"
6434 6368 "ADD $dst, 63\n" %}
6435 6369 ins_encode %{
6436 6370 Register Rdst = $dst$$Register;
6437 6371 Register Rsrc = $src$$Register;
6438 6372 Label msw_is_zero;
6439 6373 Label not_zero;
6440 6374 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
6441 6375 __ jccb(Assembler::zero, msw_is_zero);
6442 6376 __ addl(Rdst, BitsPerInt);
6443 6377 __ jmpb(not_zero);
6444 6378 __ bind(msw_is_zero);
6445 6379 __ bsrl(Rdst, Rsrc);
6446 6380 __ jccb(Assembler::notZero, not_zero);
6447 6381 __ movl(Rdst, -1);
6448 6382 __ bind(not_zero);
6449 6383 __ negl(Rdst);
6450 6384 __ addl(Rdst, BitsPerLong - 1);
6451 6385 %}
6452 6386 ins_pipe(ialu_reg);
6453 6387 %}
6454 6388
6455 6389 instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6456 6390 match(Set dst (CountTrailingZerosI src));
6457 6391 effect(KILL cr);
6458 6392
6459 6393 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
6460 6394 "JNZ done\n\t"
6461 6395 "MOV $dst, 32\n"
6462 6396 "done:" %}
6463 6397 ins_encode %{
6464 6398 Register Rdst = $dst$$Register;
6465 6399 Label done;
6466 6400 __ bsfl(Rdst, $src$$Register);
6467 6401 __ jccb(Assembler::notZero, done);
6468 6402 __ movl(Rdst, BitsPerInt);
6469 6403 __ bind(done);
6470 6404 %}
6471 6405 ins_pipe(ialu_reg);
6472 6406 %}
6473 6407
6474 6408 instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6475 6409 match(Set dst (CountTrailingZerosL src));
6476 6410 effect(TEMP dst, KILL cr);
6477 6411
6478 6412 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
6479 6413 "JNZ done\n\t"
6480 6414 "BSF $dst, $src.hi\n\t"
6481 6415 "JNZ msw_not_zero\n\t"
6482 6416 "MOV $dst, 32\n"
6483 6417 "msw_not_zero:\n\t"
6484 6418 "ADD $dst, 32\n"
6485 6419 "done:" %}
6486 6420 ins_encode %{
6487 6421 Register Rdst = $dst$$Register;
6488 6422 Register Rsrc = $src$$Register;
6489 6423 Label msw_not_zero;
6490 6424 Label done;
6491 6425 __ bsfl(Rdst, Rsrc);
6492 6426 __ jccb(Assembler::notZero, done);
6493 6427 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
6494 6428 __ jccb(Assembler::notZero, msw_not_zero);
6495 6429 __ movl(Rdst, BitsPerInt);
6496 6430 __ bind(msw_not_zero);
6497 6431 __ addl(Rdst, BitsPerInt);
6498 6432 __ bind(done);
6499 6433 %}
6500 6434 ins_pipe(ialu_reg);
6501 6435 %}
6502 6436
6503 6437
6504 6438 //---------- Population Count Instructions -------------------------------------
6505 6439
6506 6440 instruct popCountI(eRegI dst, eRegI src) %{
6507 6441 predicate(UsePopCountInstruction);
6508 6442 match(Set dst (PopCountI src));
6509 6443
6510 6444 format %{ "POPCNT $dst, $src" %}
6511 6445 ins_encode %{
6512 6446 __ popcntl($dst$$Register, $src$$Register);
6513 6447 %}
6514 6448 ins_pipe(ialu_reg);
6515 6449 %}
6516 6450
6517 6451 instruct popCountI_mem(eRegI dst, memory mem) %{
6518 6452 predicate(UsePopCountInstruction);
6519 6453 match(Set dst (PopCountI (LoadI mem)));
6520 6454
6521 6455 format %{ "POPCNT $dst, $mem" %}
6522 6456 ins_encode %{
6523 6457 __ popcntl($dst$$Register, $mem$$Address);
6524 6458 %}
6525 6459 ins_pipe(ialu_reg);
6526 6460 %}
6527 6461
6528 6462 // Note: Long.bitCount(long) returns an int.
6529 6463 instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
6530 6464 predicate(UsePopCountInstruction);
6531 6465 match(Set dst (PopCountL src));
6532 6466 effect(KILL cr, TEMP tmp, TEMP dst);
6533 6467
6534 6468 format %{ "POPCNT $dst, $src.lo\n\t"
6535 6469 "POPCNT $tmp, $src.hi\n\t"
6536 6470 "ADD $dst, $tmp" %}
6537 6471 ins_encode %{
6538 6472 __ popcntl($dst$$Register, $src$$Register);
6539 6473 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
6540 6474 __ addl($dst$$Register, $tmp$$Register);
6541 6475 %}
6542 6476 ins_pipe(ialu_reg);
6543 6477 %}
6544 6478
6545 6479 // Note: Long.bitCount(long) returns an int.
6546 6480 instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
6547 6481 predicate(UsePopCountInstruction);
6548 6482 match(Set dst (PopCountL (LoadL mem)));
6549 6483 effect(KILL cr, TEMP tmp, TEMP dst);
6550 6484
6551 6485 format %{ "POPCNT $dst, $mem\n\t"
6552 6486 "POPCNT $tmp, $mem+4\n\t"
6553 6487 "ADD $dst, $tmp" %}
6554 6488 ins_encode %{
6555 6489 //__ popcntl($dst$$Register, $mem$$Address$$first);
6556 6490 //__ popcntl($tmp$$Register, $mem$$Address$$second);
6557 6491 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
6558 6492 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
6559 6493 __ addl($dst$$Register, $tmp$$Register);
6560 6494 %}
6561 6495 ins_pipe(ialu_reg);
6562 6496 %}
6563 6497
6564 6498
6565 6499 //----------Load/Store/Move Instructions---------------------------------------
6566 6500 //----------Load Instructions--------------------------------------------------
6567 6501 // Load Byte (8bit signed)
6568 6502 instruct loadB(xRegI dst, memory mem) %{
6569 6503 match(Set dst (LoadB mem));
6570 6504
6571 6505 ins_cost(125);
6572 6506 format %{ "MOVSX8 $dst,$mem\t# byte" %}
6573 6507
6574 6508 ins_encode %{
6575 6509 __ movsbl($dst$$Register, $mem$$Address);
6576 6510 %}
6577 6511
6578 6512 ins_pipe(ialu_reg_mem);
6579 6513 %}
6580 6514
6581 6515 // Load Byte (8bit signed) into Long Register
6582 6516 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6583 6517 match(Set dst (ConvI2L (LoadB mem)));
6584 6518 effect(KILL cr);
6585 6519
6586 6520 ins_cost(375);
6587 6521 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
6588 6522 "MOV $dst.hi,$dst.lo\n\t"
6589 6523 "SAR $dst.hi,7" %}
6590 6524
6591 6525 ins_encode %{
6592 6526 __ movsbl($dst$$Register, $mem$$Address);
6593 6527 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6594 6528 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
6595 6529 %}
6596 6530
6597 6531 ins_pipe(ialu_reg_mem);
6598 6532 %}
6599 6533
6600 6534 // Load Unsigned Byte (8bit UNsigned)
6601 6535 instruct loadUB(xRegI dst, memory mem) %{
6602 6536 match(Set dst (LoadUB mem));
6603 6537
6604 6538 ins_cost(125);
6605 6539 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
6606 6540
6607 6541 ins_encode %{
6608 6542 __ movzbl($dst$$Register, $mem$$Address);
6609 6543 %}
6610 6544
6611 6545 ins_pipe(ialu_reg_mem);
6612 6546 %}
6613 6547
6614 6548 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6615 6549 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6616 6550 match(Set dst (ConvI2L (LoadUB mem)));
6617 6551 effect(KILL cr);
6618 6552
6619 6553 ins_cost(250);
6620 6554 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
6621 6555 "XOR $dst.hi,$dst.hi" %}
6622 6556
6623 6557 ins_encode %{
6624 6558 Register Rdst = $dst$$Register;
6625 6559 __ movzbl(Rdst, $mem$$Address);
6626 6560 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6627 6561 %}
6628 6562
6629 6563 ins_pipe(ialu_reg_mem);
6630 6564 %}
6631 6565
6632 6566 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
6633 6567 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
6634 6568 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6635 6569 effect(KILL cr);
6636 6570
6637 6571 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
6638 6572 "XOR $dst.hi,$dst.hi\n\t"
6639 6573 "AND $dst.lo,$mask" %}
6640 6574 ins_encode %{
6641 6575 Register Rdst = $dst$$Register;
6642 6576 __ movzbl(Rdst, $mem$$Address);
6643 6577 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6644 6578 __ andl(Rdst, $mask$$constant);
6645 6579 %}
6646 6580 ins_pipe(ialu_reg_mem);
6647 6581 %}
6648 6582
6649 6583 // Load Short (16bit signed)
6650 6584 instruct loadS(eRegI dst, memory mem) %{
6651 6585 match(Set dst (LoadS mem));
6652 6586
6653 6587 ins_cost(125);
6654 6588 format %{ "MOVSX $dst,$mem\t# short" %}
6655 6589
6656 6590 ins_encode %{
6657 6591 __ movswl($dst$$Register, $mem$$Address);
6658 6592 %}
6659 6593
6660 6594 ins_pipe(ialu_reg_mem);
6661 6595 %}
6662 6596
6663 6597 // Load Short (16 bit signed) to Byte (8 bit signed)
6664 6598 instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6665 6599 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6666 6600
6667 6601 ins_cost(125);
6668 6602 format %{ "MOVSX $dst, $mem\t# short -> byte" %}
6669 6603 ins_encode %{
6670 6604 __ movsbl($dst$$Register, $mem$$Address);
6671 6605 %}
6672 6606 ins_pipe(ialu_reg_mem);
6673 6607 %}
6674 6608
6675 6609 // Load Short (16bit signed) into Long Register
6676 6610 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6677 6611 match(Set dst (ConvI2L (LoadS mem)));
6678 6612 effect(KILL cr);
6679 6613
6680 6614 ins_cost(375);
6681 6615 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t"
6682 6616 "MOV $dst.hi,$dst.lo\n\t"
6683 6617 "SAR $dst.hi,15" %}
6684 6618
6685 6619 ins_encode %{
6686 6620 __ movswl($dst$$Register, $mem$$Address);
6687 6621 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6688 6622 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
6689 6623 %}
6690 6624
6691 6625 ins_pipe(ialu_reg_mem);
6692 6626 %}
6693 6627
6694 6628 // Load Unsigned Short/Char (16bit unsigned)
6695 6629 instruct loadUS(eRegI dst, memory mem) %{
6696 6630 match(Set dst (LoadUS mem));
6697 6631
6698 6632 ins_cost(125);
6699 6633 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %}
6700 6634
6701 6635 ins_encode %{
6702 6636 __ movzwl($dst$$Register, $mem$$Address);
6703 6637 %}
6704 6638
6705 6639 ins_pipe(ialu_reg_mem);
6706 6640 %}
6707 6641
6708 6642 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6709 6643 instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6710 6644 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6711 6645
6712 6646 ins_cost(125);
6713 6647 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %}
6714 6648 ins_encode %{
6715 6649 __ movsbl($dst$$Register, $mem$$Address);
6716 6650 %}
6717 6651 ins_pipe(ialu_reg_mem);
6718 6652 %}
6719 6653
6720 6654 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6721 6655 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6722 6656 match(Set dst (ConvI2L (LoadUS mem)));
6723 6657 effect(KILL cr);
6724 6658
6725 6659 ins_cost(250);
6726 6660 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t"
6727 6661 "XOR $dst.hi,$dst.hi" %}
6728 6662
6729 6663 ins_encode %{
6730 6664 __ movzwl($dst$$Register, $mem$$Address);
6731 6665 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6732 6666 %}
6733 6667
6734 6668 ins_pipe(ialu_reg_mem);
6735 6669 %}
6736 6670
6737 6671 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6738 6672 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6739 6673 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6740 6674 effect(KILL cr);
6741 6675
6742 6676 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
6743 6677 "XOR $dst.hi,$dst.hi" %}
6744 6678 ins_encode %{
6745 6679 Register Rdst = $dst$$Register;
6746 6680 __ movzbl(Rdst, $mem$$Address);
6747 6681 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6748 6682 %}
6749 6683 ins_pipe(ialu_reg_mem);
6750 6684 %}
6751 6685
6752 6686 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
6753 6687 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
6754 6688 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6755 6689 effect(KILL cr);
6756 6690
6757 6691 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6758 6692 "XOR $dst.hi,$dst.hi\n\t"
6759 6693 "AND $dst.lo,$mask" %}
6760 6694 ins_encode %{
6761 6695 Register Rdst = $dst$$Register;
6762 6696 __ movzwl(Rdst, $mem$$Address);
6763 6697 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6764 6698 __ andl(Rdst, $mask$$constant);
6765 6699 %}
6766 6700 ins_pipe(ialu_reg_mem);
6767 6701 %}
6768 6702
6769 6703 // Load Integer
6770 6704 instruct loadI(eRegI dst, memory mem) %{
6771 6705 match(Set dst (LoadI mem));
6772 6706
6773 6707 ins_cost(125);
6774 6708 format %{ "MOV $dst,$mem\t# int" %}
6775 6709
6776 6710 ins_encode %{
6777 6711 __ movl($dst$$Register, $mem$$Address);
6778 6712 %}
6779 6713
6780 6714 ins_pipe(ialu_reg_mem);
6781 6715 %}
6782 6716
6783 6717 // Load Integer (32 bit signed) to Byte (8 bit signed)
6784 6718 instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6785 6719 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6786 6720
6787 6721 ins_cost(125);
6788 6722 format %{ "MOVSX $dst, $mem\t# int -> byte" %}
6789 6723 ins_encode %{
6790 6724 __ movsbl($dst$$Register, $mem$$Address);
6791 6725 %}
6792 6726 ins_pipe(ialu_reg_mem);
6793 6727 %}
6794 6728
6795 6729 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6796 6730 instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6797 6731 match(Set dst (AndI (LoadI mem) mask));
6798 6732
6799 6733 ins_cost(125);
6800 6734 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %}
6801 6735 ins_encode %{
6802 6736 __ movzbl($dst$$Register, $mem$$Address);
6803 6737 %}
6804 6738 ins_pipe(ialu_reg_mem);
6805 6739 %}
6806 6740
6807 6741 // Load Integer (32 bit signed) to Short (16 bit signed)
6808 6742 instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6809 6743 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6810 6744
6811 6745 ins_cost(125);
6812 6746 format %{ "MOVSX $dst, $mem\t# int -> short" %}
6813 6747 ins_encode %{
6814 6748 __ movswl($dst$$Register, $mem$$Address);
6815 6749 %}
6816 6750 ins_pipe(ialu_reg_mem);
6817 6751 %}
6818 6752
6819 6753 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6820 6754 instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6821 6755 match(Set dst (AndI (LoadI mem) mask));
6822 6756
6823 6757 ins_cost(125);
6824 6758 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %}
6825 6759 ins_encode %{
6826 6760 __ movzwl($dst$$Register, $mem$$Address);
6827 6761 %}
6828 6762 ins_pipe(ialu_reg_mem);
6829 6763 %}
6830 6764
6831 6765 // Load Integer into Long Register
6832 6766 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6833 6767 match(Set dst (ConvI2L (LoadI mem)));
6834 6768 effect(KILL cr);
6835 6769
6836 6770 ins_cost(375);
6837 6771 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t"
6838 6772 "MOV $dst.hi,$dst.lo\n\t"
6839 6773 "SAR $dst.hi,31" %}
6840 6774
6841 6775 ins_encode %{
6842 6776 __ movl($dst$$Register, $mem$$Address);
6843 6777 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6844 6778 __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6845 6779 %}
6846 6780
6847 6781 ins_pipe(ialu_reg_mem);
6848 6782 %}
6849 6783
6850 6784 // Load Integer with mask 0xFF into Long Register
6851 6785 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6852 6786 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6853 6787 effect(KILL cr);
6854 6788
6855 6789 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6856 6790 "XOR $dst.hi,$dst.hi" %}
6857 6791 ins_encode %{
6858 6792 Register Rdst = $dst$$Register;
6859 6793 __ movzbl(Rdst, $mem$$Address);
6860 6794 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6861 6795 %}
6862 6796 ins_pipe(ialu_reg_mem);
6863 6797 %}
6864 6798
6865 6799 // Load Integer with mask 0xFFFF into Long Register
6866 6800 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6867 6801 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6868 6802 effect(KILL cr);
6869 6803
6870 6804 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6871 6805 "XOR $dst.hi,$dst.hi" %}
6872 6806 ins_encode %{
6873 6807 Register Rdst = $dst$$Register;
6874 6808 __ movzwl(Rdst, $mem$$Address);
6875 6809 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6876 6810 %}
6877 6811 ins_pipe(ialu_reg_mem);
6878 6812 %}
6879 6813
6880 6814 // Load Integer with 32-bit mask into Long Register
6881 6815 instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6882 6816 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6883 6817 effect(KILL cr);
6884 6818
6885 6819 format %{ "MOV $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6886 6820 "XOR $dst.hi,$dst.hi\n\t"
6887 6821 "AND $dst.lo,$mask" %}
6888 6822 ins_encode %{
6889 6823 Register Rdst = $dst$$Register;
6890 6824 __ movl(Rdst, $mem$$Address);
6891 6825 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6892 6826 __ andl(Rdst, $mask$$constant);
6893 6827 %}
6894 6828 ins_pipe(ialu_reg_mem);
6895 6829 %}
6896 6830
6897 6831 // Load Unsigned Integer into Long Register
6898 6832 instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6899 6833 match(Set dst (LoadUI2L mem));
6900 6834 effect(KILL cr);
6901 6835
6902 6836 ins_cost(250);
6903 6837 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t"
6904 6838 "XOR $dst.hi,$dst.hi" %}
6905 6839
6906 6840 ins_encode %{
6907 6841 __ movl($dst$$Register, $mem$$Address);
6908 6842 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6909 6843 %}
6910 6844
6911 6845 ins_pipe(ialu_reg_mem);
6912 6846 %}
6913 6847
6914 6848 // Load Long. Cannot clobber address while loading, so restrict address
6915 6849 // register to ESI
6916 6850 instruct loadL(eRegL dst, load_long_memory mem) %{
6917 6851 predicate(!((LoadLNode*)n)->require_atomic_access());
6918 6852 match(Set dst (LoadL mem));
6919 6853
6920 6854 ins_cost(250);
6921 6855 format %{ "MOV $dst.lo,$mem\t# long\n\t"
6922 6856 "MOV $dst.hi,$mem+4" %}
6923 6857
6924 6858 ins_encode %{
6925 6859 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6926 6860 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6927 6861 __ movl($dst$$Register, Amemlo);
6928 6862 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6929 6863 %}
6930 6864
6931 6865 ins_pipe(ialu_reg_long_mem);
6932 6866 %}
6933 6867
6934 6868 // Volatile Load Long. Must be atomic, so do 64-bit FILD
6935 6869 // then store it down to the stack and reload on the int
6936 6870 // side.
6937 6871 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6938 6872 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6939 6873 match(Set dst (LoadL mem));
6940 6874
6941 6875 ins_cost(200);
6942 6876 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
6943 6877 "FISTp $dst" %}
6944 6878 ins_encode(enc_loadL_volatile(mem,dst));
6945 6879 ins_pipe( fpu_reg_mem );
6946 6880 %}
6947 6881
6948 6882 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6949 6883 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6950 6884 match(Set dst (LoadL mem));
6951 6885 effect(TEMP tmp);
6952 6886 ins_cost(180);
6953 6887 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6954 6888 "MOVSD $dst,$tmp" %}
6955 6889 ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6956 6890 ins_pipe( pipe_slow );
6957 6891 %}
6958 6892
6959 6893 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6960 6894 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6961 6895 match(Set dst (LoadL mem));
6962 6896 effect(TEMP tmp);
6963 6897 ins_cost(160);
6964 6898 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6965 6899 "MOVD $dst.lo,$tmp\n\t"
6966 6900 "PSRLQ $tmp,32\n\t"
6967 6901 "MOVD $dst.hi,$tmp" %}
6968 6902 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6969 6903 ins_pipe( pipe_slow );
6970 6904 %}
6971 6905
6972 6906 // Load Range
6973 6907 instruct loadRange(eRegI dst, memory mem) %{
6974 6908 match(Set dst (LoadRange mem));
6975 6909
6976 6910 ins_cost(125);
6977 6911 format %{ "MOV $dst,$mem" %}
6978 6912 opcode(0x8B);
6979 6913 ins_encode( OpcP, RegMem(dst,mem));
6980 6914 ins_pipe( ialu_reg_mem );
6981 6915 %}
6982 6916
6983 6917
6984 6918 // Load Pointer
6985 6919 instruct loadP(eRegP dst, memory mem) %{
6986 6920 match(Set dst (LoadP mem));
6987 6921
6988 6922 ins_cost(125);
6989 6923 format %{ "MOV $dst,$mem" %}
6990 6924 opcode(0x8B);
6991 6925 ins_encode( OpcP, RegMem(dst,mem));
6992 6926 ins_pipe( ialu_reg_mem );
6993 6927 %}
6994 6928
6995 6929 // Load Klass Pointer
6996 6930 instruct loadKlass(eRegP dst, memory mem) %{
6997 6931 match(Set dst (LoadKlass mem));
6998 6932
6999 6933 ins_cost(125);
7000 6934 format %{ "MOV $dst,$mem" %}
7001 6935 opcode(0x8B);
7002 6936 ins_encode( OpcP, RegMem(dst,mem));
7003 6937 ins_pipe( ialu_reg_mem );
7004 6938 %}
7005 6939
7006 6940 // Load Double
7007 6941 instruct loadD(regD dst, memory mem) %{
7008 6942 predicate(UseSSE<=1);
7009 6943 match(Set dst (LoadD mem));
7010 6944
7011 6945 ins_cost(150);
7012 6946 format %{ "FLD_D ST,$mem\n\t"
7013 6947 "FSTP $dst" %}
7014 6948 opcode(0xDD); /* DD /0 */
7015 6949 ins_encode( OpcP, RMopc_Mem(0x00,mem),
7016 6950 Pop_Reg_D(dst) );
7017 6951 ins_pipe( fpu_reg_mem );
7018 6952 %}
7019 6953
7020 6954 // Load Double to XMM
7021 6955 instruct loadXD(regXD dst, memory mem) %{
7022 6956 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
7023 6957 match(Set dst (LoadD mem));
7024 6958 ins_cost(145);
7025 6959 format %{ "MOVSD $dst,$mem" %}
7026 6960 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
7027 6961 ins_pipe( pipe_slow );
7028 6962 %}
7029 6963
7030 6964 instruct loadXD_partial(regXD dst, memory mem) %{
7031 6965 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
7032 6966 match(Set dst (LoadD mem));
7033 6967 ins_cost(145);
7034 6968 format %{ "MOVLPD $dst,$mem" %}
7035 6969 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
7036 6970 ins_pipe( pipe_slow );
7037 6971 %}
7038 6972
7039 6973 // Load to XMM register (single-precision floating point)
7040 6974 // MOVSS instruction
7041 6975 instruct loadX(regX dst, memory mem) %{
7042 6976 predicate(UseSSE>=1);
7043 6977 match(Set dst (LoadF mem));
7044 6978 ins_cost(145);
7045 6979 format %{ "MOVSS $dst,$mem" %}
7046 6980 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
7047 6981 ins_pipe( pipe_slow );
7048 6982 %}
7049 6983
7050 6984 // Load Float
7051 6985 instruct loadF(regF dst, memory mem) %{
7052 6986 predicate(UseSSE==0);
7053 6987 match(Set dst (LoadF mem));
7054 6988
7055 6989 ins_cost(150);
7056 6990 format %{ "FLD_S ST,$mem\n\t"
7057 6991 "FSTP $dst" %}
7058 6992 opcode(0xD9); /* D9 /0 */
7059 6993 ins_encode( OpcP, RMopc_Mem(0x00,mem),
7060 6994 Pop_Reg_F(dst) );
7061 6995 ins_pipe( fpu_reg_mem );
7062 6996 %}
7063 6997
7064 6998 // Load Aligned Packed Byte to XMM register
7065 6999 instruct loadA8B(regXD dst, memory mem) %{
7066 7000 predicate(UseSSE>=1);
7067 7001 match(Set dst (Load8B mem));
7068 7002 ins_cost(125);
7069 7003 format %{ "MOVQ $dst,$mem\t! packed8B" %}
7070 7004 ins_encode( movq_ld(dst, mem));
7071 7005 ins_pipe( pipe_slow );
7072 7006 %}
7073 7007
7074 7008 // Load Aligned Packed Short to XMM register
7075 7009 instruct loadA4S(regXD dst, memory mem) %{
7076 7010 predicate(UseSSE>=1);
7077 7011 match(Set dst (Load4S mem));
7078 7012 ins_cost(125);
7079 7013 format %{ "MOVQ $dst,$mem\t! packed4S" %}
7080 7014 ins_encode( movq_ld(dst, mem));
7081 7015 ins_pipe( pipe_slow );
7082 7016 %}
7083 7017
7084 7018 // Load Aligned Packed Char to XMM register
7085 7019 instruct loadA4C(regXD dst, memory mem) %{
7086 7020 predicate(UseSSE>=1);
7087 7021 match(Set dst (Load4C mem));
7088 7022 ins_cost(125);
7089 7023 format %{ "MOVQ $dst,$mem\t! packed4C" %}
7090 7024 ins_encode( movq_ld(dst, mem));
7091 7025 ins_pipe( pipe_slow );
7092 7026 %}
7093 7027
7094 7028 // Load Aligned Packed Integer to XMM register
7095 7029 instruct load2IU(regXD dst, memory mem) %{
7096 7030 predicate(UseSSE>=1);
7097 7031 match(Set dst (Load2I mem));
7098 7032 ins_cost(125);
7099 7033 format %{ "MOVQ $dst,$mem\t! packed2I" %}
7100 7034 ins_encode( movq_ld(dst, mem));
7101 7035 ins_pipe( pipe_slow );
7102 7036 %}
7103 7037
7104 7038 // Load Aligned Packed Single to XMM
7105 7039 instruct loadA2F(regXD dst, memory mem) %{
7106 7040 predicate(UseSSE>=1);
7107 7041 match(Set dst (Load2F mem));
7108 7042 ins_cost(145);
7109 7043 format %{ "MOVQ $dst,$mem\t! packed2F" %}
7110 7044 ins_encode( movq_ld(dst, mem));
7111 7045 ins_pipe( pipe_slow );
7112 7046 %}
7113 7047
7114 7048 // Load Effective Address
7115 7049 instruct leaP8(eRegP dst, indOffset8 mem) %{
7116 7050 match(Set dst mem);
7117 7051
7118 7052 ins_cost(110);
7119 7053 format %{ "LEA $dst,$mem" %}
7120 7054 opcode(0x8D);
7121 7055 ins_encode( OpcP, RegMem(dst,mem));
7122 7056 ins_pipe( ialu_reg_reg_fat );
7123 7057 %}
7124 7058
7125 7059 instruct leaP32(eRegP dst, indOffset32 mem) %{
7126 7060 match(Set dst mem);
7127 7061
7128 7062 ins_cost(110);
7129 7063 format %{ "LEA $dst,$mem" %}
7130 7064 opcode(0x8D);
7131 7065 ins_encode( OpcP, RegMem(dst,mem));
7132 7066 ins_pipe( ialu_reg_reg_fat );
7133 7067 %}
7134 7068
7135 7069 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
7136 7070 match(Set dst mem);
7137 7071
7138 7072 ins_cost(110);
7139 7073 format %{ "LEA $dst,$mem" %}
7140 7074 opcode(0x8D);
7141 7075 ins_encode( OpcP, RegMem(dst,mem));
7142 7076 ins_pipe( ialu_reg_reg_fat );
7143 7077 %}
7144 7078
7145 7079 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
7146 7080 match(Set dst mem);
7147 7081
7148 7082 ins_cost(110);
7149 7083 format %{ "LEA $dst,$mem" %}
7150 7084 opcode(0x8D);
7151 7085 ins_encode( OpcP, RegMem(dst,mem));
7152 7086 ins_pipe( ialu_reg_reg_fat );
7153 7087 %}
7154 7088
7155 7089 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
7156 7090 match(Set dst mem);
7157 7091
7158 7092 ins_cost(110);
7159 7093 format %{ "LEA $dst,$mem" %}
7160 7094 opcode(0x8D);
7161 7095 ins_encode( OpcP, RegMem(dst,mem));
7162 7096 ins_pipe( ialu_reg_reg_fat );
7163 7097 %}
7164 7098
7165 7099 // Load Constant
7166 7100 instruct loadConI(eRegI dst, immI src) %{
7167 7101 match(Set dst src);
7168 7102
7169 7103 format %{ "MOV $dst,$src" %}
7170 7104 ins_encode( LdImmI(dst, src) );
7171 7105 ins_pipe( ialu_reg_fat );
7172 7106 %}
7173 7107
7174 7108 // Load Constant zero
7175 7109 instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
7176 7110 match(Set dst src);
7177 7111 effect(KILL cr);
7178 7112
7179 7113 ins_cost(50);
7180 7114 format %{ "XOR $dst,$dst" %}
7181 7115 opcode(0x33); /* + rd */
7182 7116 ins_encode( OpcP, RegReg( dst, dst ) );
7183 7117 ins_pipe( ialu_reg );
7184 7118 %}
7185 7119
7186 7120 instruct loadConP(eRegP dst, immP src) %{
7187 7121 match(Set dst src);
7188 7122
7189 7123 format %{ "MOV $dst,$src" %}
7190 7124 opcode(0xB8); /* + rd */
7191 7125 ins_encode( LdImmP(dst, src) );
7192 7126 ins_pipe( ialu_reg_fat );
7193 7127 %}
7194 7128
7195 7129 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
7196 7130 match(Set dst src);
7197 7131 effect(KILL cr);
7198 7132 ins_cost(200);
7199 7133 format %{ "MOV $dst.lo,$src.lo\n\t"
7200 7134 "MOV $dst.hi,$src.hi" %}
7201 7135 opcode(0xB8);
7202 7136 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
7203 7137 ins_pipe( ialu_reg_long_fat );
7204 7138 %}
7205 7139
7206 7140 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
7207 7141 match(Set dst src);
↓ open down ↓ |
2350 lines elided |
↑ open up ↑ |
7208 7142 effect(KILL cr);
7209 7143 ins_cost(150);
7210 7144 format %{ "XOR $dst.lo,$dst.lo\n\t"
7211 7145 "XOR $dst.hi,$dst.hi" %}
7212 7146 opcode(0x33,0x33);
7213 7147 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
7214 7148 ins_pipe( ialu_reg_long );
7215 7149 %}
7216 7150
7217 7151 // The instruction usage is guarded by predicate in operand immF().
7218 -instruct loadConF(regF dst, immF src) %{
7219 - match(Set dst src);
7152 +instruct loadConF(regF dst, immF con) %{
7153 + match(Set dst con);
7220 7154 ins_cost(125);
7155 + format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
7156 + "FSTP $dst" %}
7157 + ins_encode %{
7158 + __ fld_s($constantaddress($con));
7159 + __ fstp_d($dst$$reg);
7160 + %}
7161 + ins_pipe(fpu_reg_con);
7162 +%}
7221 7163
7222 - format %{ "FLD_S ST,$src\n\t"
7164 +// The instruction usage is guarded by predicate in operand immF0().
7165 +instruct loadConF0(regF dst, immF0 con) %{
7166 + match(Set dst con);
7167 + ins_cost(125);
7168 + format %{ "FLDZ ST\n\t"
7223 7169 "FSTP $dst" %}
7224 - opcode(0xD9, 0x00); /* D9 /0 */
7225 - ins_encode(LdImmF(src), Pop_Reg_F(dst) );
7226 - ins_pipe( fpu_reg_con );
7170 + ins_encode %{
7171 + __ fldz();
7172 + __ fstp_d($dst$$reg);
7173 + %}
7174 + ins_pipe(fpu_reg_con);
7175 +%}
7176 +
7177 +// The instruction usage is guarded by predicate in operand immF1().
7178 +instruct loadConF1(regF dst, immF1 con) %{
7179 + match(Set dst con);
7180 + ins_cost(125);
7181 + format %{ "FLD1 ST\n\t"
7182 + "FSTP $dst" %}
7183 + ins_encode %{
7184 + __ fld1();
7185 + __ fstp_d($dst$$reg);
7186 + %}
7187 + ins_pipe(fpu_reg_con);
7227 7188 %}
7228 7189
7229 7190 // The instruction usage is guarded by predicate in operand immXF().
7230 7191 instruct loadConX(regX dst, immXF con) %{
7231 7192 match(Set dst con);
7232 7193 ins_cost(125);
7233 - format %{ "MOVSS $dst,[$con]" %}
7234 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con));
7235 - ins_pipe( pipe_slow );
7194 + format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
7195 + ins_encode %{
7196 + __ movflt($dst$$XMMRegister, $constantaddress($con));
7197 + %}
7198 + ins_pipe(pipe_slow);
7236 7199 %}
7237 7200
7238 7201 // The instruction usage is guarded by predicate in operand immXF0().
7239 7202 instruct loadConX0(regX dst, immXF0 src) %{
7240 7203 match(Set dst src);
7241 7204 ins_cost(100);
7242 7205 format %{ "XORPS $dst,$dst\t# float 0.0" %}
7243 - ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7244 - ins_pipe( pipe_slow );
7206 + ins_encode %{
7207 + __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7208 + %}
7209 + ins_pipe(pipe_slow);
7245 7210 %}
7246 7211
7247 7212 // The instruction usage is guarded by predicate in operand immD().
7248 -instruct loadConD(regD dst, immD src) %{
7249 - match(Set dst src);
7213 +instruct loadConD(regD dst, immD con) %{
7214 + match(Set dst con);
7215 + ins_cost(125);
7216 +
7217 + format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
7218 + "FSTP $dst" %}
7219 + ins_encode %{
7220 + __ fld_d($constantaddress($con));
7221 + __ fstp_d($dst$$reg);
7222 + %}
7223 + ins_pipe(fpu_reg_con);
7224 +%}
7225 +
7226 +// The instruction usage is guarded by predicate in operand immD0().
7227 +instruct loadConD0(regD dst, immD0 con) %{
7228 + match(Set dst con);
7229 + ins_cost(125);
7230 +
7231 + format %{ "FLDZ ST\n\t"
7232 + "FSTP $dst" %}
7233 + ins_encode %{
7234 + __ fldz();
7235 + __ fstp_d($dst$$reg);
7236 + %}
7237 + ins_pipe(fpu_reg_con);
7238 +%}
7239 +
7240 +// The instruction usage is guarded by predicate in operand immD1().
7241 +instruct loadConD1(regD dst, immD1 con) %{
7242 + match(Set dst con);
7250 7243 ins_cost(125);
7251 7244
7252 - format %{ "FLD_D ST,$src\n\t"
7245 + format %{ "FLD1 ST\n\t"
7253 7246 "FSTP $dst" %}
7254 - ins_encode(LdImmD(src), Pop_Reg_D(dst) );
7255 - ins_pipe( fpu_reg_con );
7247 + ins_encode %{
7248 + __ fld1();
7249 + __ fstp_d($dst$$reg);
7250 + %}
7251 + ins_pipe(fpu_reg_con);
7256 7252 %}
7257 7253
7258 7254 // The instruction usage is guarded by predicate in operand immXD().
7259 7255 instruct loadConXD(regXD dst, immXD con) %{
7260 7256 match(Set dst con);
7261 7257 ins_cost(125);
7262 - format %{ "MOVSD $dst,[$con]" %}
7263 - ins_encode(load_conXD(dst, con));
7264 - ins_pipe( pipe_slow );
7258 + format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
7259 + ins_encode %{
7260 + __ movdbl($dst$$XMMRegister, $constantaddress($con));
7261 + %}
7262 + ins_pipe(pipe_slow);
7265 7263 %}
7266 7264
7267 7265 // The instruction usage is guarded by predicate in operand immXD0().
7268 7266 instruct loadConXD0(regXD dst, immXD0 src) %{
7269 7267 match(Set dst src);
7270 7268 ins_cost(100);
7271 7269 format %{ "XORPD $dst,$dst\t# double 0.0" %}
7272 7270 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7273 7271 ins_pipe( pipe_slow );
7274 7272 %}
7275 7273
7276 7274 // Load Stack Slot
7277 7275 instruct loadSSI(eRegI dst, stackSlotI src) %{
7278 7276 match(Set dst src);
7279 7277 ins_cost(125);
7280 7278
7281 7279 format %{ "MOV $dst,$src" %}
7282 7280 opcode(0x8B);
7283 7281 ins_encode( OpcP, RegMem(dst,src));
7284 7282 ins_pipe( ialu_reg_mem );
7285 7283 %}
7286 7284
7287 7285 instruct loadSSL(eRegL dst, stackSlotL src) %{
7288 7286 match(Set dst src);
7289 7287
7290 7288 ins_cost(200);
7291 7289 format %{ "MOV $dst,$src.lo\n\t"
7292 7290 "MOV $dst+4,$src.hi" %}
7293 7291 opcode(0x8B, 0x8B);
7294 7292 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
7295 7293 ins_pipe( ialu_mem_long_reg );
7296 7294 %}
7297 7295
7298 7296 // Load Stack Slot
7299 7297 instruct loadSSP(eRegP dst, stackSlotP src) %{
7300 7298 match(Set dst src);
7301 7299 ins_cost(125);
7302 7300
7303 7301 format %{ "MOV $dst,$src" %}
7304 7302 opcode(0x8B);
7305 7303 ins_encode( OpcP, RegMem(dst,src));
7306 7304 ins_pipe( ialu_reg_mem );
7307 7305 %}
7308 7306
7309 7307 // Load Stack Slot
7310 7308 instruct loadSSF(regF dst, stackSlotF src) %{
7311 7309 match(Set dst src);
7312 7310 ins_cost(125);
7313 7311
7314 7312 format %{ "FLD_S $src\n\t"
7315 7313 "FSTP $dst" %}
7316 7314 opcode(0xD9); /* D9 /0, FLD m32real */
7317 7315 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7318 7316 Pop_Reg_F(dst) );
7319 7317 ins_pipe( fpu_reg_mem );
7320 7318 %}
7321 7319
7322 7320 // Load Stack Slot
7323 7321 instruct loadSSD(regD dst, stackSlotD src) %{
7324 7322 match(Set dst src);
7325 7323 ins_cost(125);
7326 7324
7327 7325 format %{ "FLD_D $src\n\t"
7328 7326 "FSTP $dst" %}
7329 7327 opcode(0xDD); /* DD /0, FLD m64real */
7330 7328 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7331 7329 Pop_Reg_D(dst) );
7332 7330 ins_pipe( fpu_reg_mem );
7333 7331 %}
7334 7332
7335 7333 // Prefetch instructions.
7336 7334 // Must be safe to execute with invalid address (cannot fault).
7337 7335
7338 7336 instruct prefetchr0( memory mem ) %{
7339 7337 predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7340 7338 match(PrefetchRead mem);
7341 7339 ins_cost(0);
7342 7340 size(0);
7343 7341 format %{ "PREFETCHR (non-SSE is empty encoding)" %}
7344 7342 ins_encode();
7345 7343 ins_pipe(empty);
7346 7344 %}
7347 7345
7348 7346 instruct prefetchr( memory mem ) %{
7349 7347 predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3);
7350 7348 match(PrefetchRead mem);
7351 7349 ins_cost(100);
7352 7350
7353 7351 format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
7354 7352 opcode(0x0F, 0x0d); /* Opcode 0F 0d /0 */
7355 7353 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7356 7354 ins_pipe(ialu_mem);
7357 7355 %}
7358 7356
7359 7357 instruct prefetchrNTA( memory mem ) %{
7360 7358 predicate(UseSSE>=1 && ReadPrefetchInstr==0);
7361 7359 match(PrefetchRead mem);
7362 7360 ins_cost(100);
7363 7361
7364 7362 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
7365 7363 opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */
7366 7364 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7367 7365 ins_pipe(ialu_mem);
7368 7366 %}
7369 7367
7370 7368 instruct prefetchrT0( memory mem ) %{
7371 7369 predicate(UseSSE>=1 && ReadPrefetchInstr==1);
7372 7370 match(PrefetchRead mem);
7373 7371 ins_cost(100);
7374 7372
7375 7373 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
7376 7374 opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
7377 7375 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7378 7376 ins_pipe(ialu_mem);
7379 7377 %}
7380 7378
7381 7379 instruct prefetchrT2( memory mem ) %{
7382 7380 predicate(UseSSE>=1 && ReadPrefetchInstr==2);
7383 7381 match(PrefetchRead mem);
7384 7382 ins_cost(100);
7385 7383
7386 7384 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
7387 7385 opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
7388 7386 ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7389 7387 ins_pipe(ialu_mem);
7390 7388 %}
7391 7389
7392 7390 instruct prefetchw0( memory mem ) %{
7393 7391 predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7394 7392 match(PrefetchWrite mem);
7395 7393 ins_cost(0);
7396 7394 size(0);
7397 7395 format %{ "Prefetch (non-SSE is empty encoding)" %}
7398 7396 ins_encode();
7399 7397 ins_pipe(empty);
7400 7398 %}
7401 7399
7402 7400 instruct prefetchw( memory mem ) %{
7403 7401 predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3);
7404 7402 match( PrefetchWrite mem );
7405 7403 ins_cost(100);
7406 7404
7407 7405 format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
7408 7406 opcode(0x0F, 0x0D); /* Opcode 0F 0D /1 */
7409 7407 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7410 7408 ins_pipe(ialu_mem);
7411 7409 %}
7412 7410
7413 7411 instruct prefetchwNTA( memory mem ) %{
7414 7412 predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
7415 7413 match(PrefetchWrite mem);
7416 7414 ins_cost(100);
7417 7415
7418 7416 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
7419 7417 opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */
7420 7418 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7421 7419 ins_pipe(ialu_mem);
7422 7420 %}
7423 7421
7424 7422 instruct prefetchwT0( memory mem ) %{
7425 7423 predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
7426 7424 match(PrefetchWrite mem);
7427 7425 ins_cost(100);
7428 7426
7429 7427 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
7430 7428 opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
7431 7429 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7432 7430 ins_pipe(ialu_mem);
7433 7431 %}
7434 7432
7435 7433 instruct prefetchwT2( memory mem ) %{
7436 7434 predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
7437 7435 match(PrefetchWrite mem);
7438 7436 ins_cost(100);
7439 7437
7440 7438 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
7441 7439 opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
7442 7440 ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7443 7441 ins_pipe(ialu_mem);
7444 7442 %}
7445 7443
7446 7444 //----------Store Instructions-------------------------------------------------
7447 7445
7448 7446 // Store Byte
7449 7447 instruct storeB(memory mem, xRegI src) %{
7450 7448 match(Set mem (StoreB mem src));
7451 7449
7452 7450 ins_cost(125);
7453 7451 format %{ "MOV8 $mem,$src" %}
7454 7452 opcode(0x88);
7455 7453 ins_encode( OpcP, RegMem( src, mem ) );
7456 7454 ins_pipe( ialu_mem_reg );
7457 7455 %}
7458 7456
7459 7457 // Store Char/Short
7460 7458 instruct storeC(memory mem, eRegI src) %{
7461 7459 match(Set mem (StoreC mem src));
7462 7460
7463 7461 ins_cost(125);
7464 7462 format %{ "MOV16 $mem,$src" %}
7465 7463 opcode(0x89, 0x66);
7466 7464 ins_encode( OpcS, OpcP, RegMem( src, mem ) );
7467 7465 ins_pipe( ialu_mem_reg );
7468 7466 %}
7469 7467
7470 7468 // Store Integer
7471 7469 instruct storeI(memory mem, eRegI src) %{
7472 7470 match(Set mem (StoreI mem src));
7473 7471
7474 7472 ins_cost(125);
7475 7473 format %{ "MOV $mem,$src" %}
7476 7474 opcode(0x89);
7477 7475 ins_encode( OpcP, RegMem( src, mem ) );
7478 7476 ins_pipe( ialu_mem_reg );
7479 7477 %}
7480 7478
7481 7479 // Store Long
7482 7480 instruct storeL(long_memory mem, eRegL src) %{
7483 7481 predicate(!((StoreLNode*)n)->require_atomic_access());
7484 7482 match(Set mem (StoreL mem src));
7485 7483
7486 7484 ins_cost(200);
7487 7485 format %{ "MOV $mem,$src.lo\n\t"
7488 7486 "MOV $mem+4,$src.hi" %}
7489 7487 opcode(0x89, 0x89);
7490 7488 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
7491 7489 ins_pipe( ialu_mem_long_reg );
7492 7490 %}
7493 7491
7494 7492 // Store Long to Integer
7495 7493 instruct storeL2I(memory mem, eRegL src) %{
7496 7494 match(Set mem (StoreI mem (ConvL2I src)));
7497 7495
7498 7496 format %{ "MOV $mem,$src.lo\t# long -> int" %}
7499 7497 ins_encode %{
7500 7498 __ movl($mem$$Address, $src$$Register);
7501 7499 %}
7502 7500 ins_pipe(ialu_mem_reg);
7503 7501 %}
7504 7502
7505 7503 // Volatile Store Long. Must be atomic, so move it into
7506 7504 // the FP TOS and then do a 64-bit FIST. Has to probe the
7507 7505 // target address before the store (for null-ptr checks)
7508 7506 // so the memory operand is used twice in the encoding.
7509 7507 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7510 7508 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7511 7509 match(Set mem (StoreL mem src));
7512 7510 effect( KILL cr );
7513 7511 ins_cost(400);
7514 7512 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7515 7513 "FILD $src\n\t"
7516 7514 "FISTp $mem\t # 64-bit atomic volatile long store" %}
7517 7515 opcode(0x3B);
7518 7516 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7519 7517 ins_pipe( fpu_reg_mem );
7520 7518 %}
7521 7519
7522 7520 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7523 7521 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7524 7522 match(Set mem (StoreL mem src));
7525 7523 effect( TEMP tmp, KILL cr );
7526 7524 ins_cost(380);
7527 7525 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7528 7526 "MOVSD $tmp,$src\n\t"
7529 7527 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7530 7528 opcode(0x3B);
7531 7529 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7532 7530 ins_pipe( pipe_slow );
7533 7531 %}
7534 7532
7535 7533 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7536 7534 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7537 7535 match(Set mem (StoreL mem src));
7538 7536 effect( TEMP tmp2 , TEMP tmp, KILL cr );
7539 7537 ins_cost(360);
7540 7538 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7541 7539 "MOVD $tmp,$src.lo\n\t"
7542 7540 "MOVD $tmp2,$src.hi\n\t"
7543 7541 "PUNPCKLDQ $tmp,$tmp2\n\t"
7544 7542 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7545 7543 opcode(0x3B);
7546 7544 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7547 7545 ins_pipe( pipe_slow );
7548 7546 %}
7549 7547
7550 7548 // Store Pointer; for storing unknown oops and raw pointers
7551 7549 instruct storeP(memory mem, anyRegP src) %{
7552 7550 match(Set mem (StoreP mem src));
7553 7551
7554 7552 ins_cost(125);
7555 7553 format %{ "MOV $mem,$src" %}
7556 7554 opcode(0x89);
7557 7555 ins_encode( OpcP, RegMem( src, mem ) );
7558 7556 ins_pipe( ialu_mem_reg );
7559 7557 %}
7560 7558
7561 7559 // Store Integer Immediate
7562 7560 instruct storeImmI(memory mem, immI src) %{
7563 7561 match(Set mem (StoreI mem src));
7564 7562
7565 7563 ins_cost(150);
7566 7564 format %{ "MOV $mem,$src" %}
7567 7565 opcode(0xC7); /* C7 /0 */
7568 7566 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
7569 7567 ins_pipe( ialu_mem_imm );
7570 7568 %}
7571 7569
7572 7570 // Store Short/Char Immediate
7573 7571 instruct storeImmI16(memory mem, immI16 src) %{
7574 7572 predicate(UseStoreImmI16);
7575 7573 match(Set mem (StoreC mem src));
7576 7574
7577 7575 ins_cost(150);
7578 7576 format %{ "MOV16 $mem,$src" %}
7579 7577 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7580 7578 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src ));
7581 7579 ins_pipe( ialu_mem_imm );
7582 7580 %}
7583 7581
7584 7582 // Store Pointer Immediate; null pointers or constant oops that do not
7585 7583 // need card-mark barriers.
7586 7584 instruct storeImmP(memory mem, immP src) %{
7587 7585 match(Set mem (StoreP mem src));
7588 7586
7589 7587 ins_cost(150);
7590 7588 format %{ "MOV $mem,$src" %}
7591 7589 opcode(0xC7); /* C7 /0 */
7592 7590 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
7593 7591 ins_pipe( ialu_mem_imm );
7594 7592 %}
7595 7593
7596 7594 // Store Byte Immediate
7597 7595 instruct storeImmB(memory mem, immI8 src) %{
7598 7596 match(Set mem (StoreB mem src));
7599 7597
7600 7598 ins_cost(150);
7601 7599 format %{ "MOV8 $mem,$src" %}
7602 7600 opcode(0xC6); /* C6 /0 */
7603 7601 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7604 7602 ins_pipe( ialu_mem_imm );
7605 7603 %}
7606 7604
7607 7605 // Store Aligned Packed Byte XMM register to memory
7608 7606 instruct storeA8B(memory mem, regXD src) %{
7609 7607 predicate(UseSSE>=1);
7610 7608 match(Set mem (Store8B mem src));
7611 7609 ins_cost(145);
7612 7610 format %{ "MOVQ $mem,$src\t! packed8B" %}
7613 7611 ins_encode( movq_st(mem, src));
7614 7612 ins_pipe( pipe_slow );
7615 7613 %}
7616 7614
7617 7615 // Store Aligned Packed Char/Short XMM register to memory
7618 7616 instruct storeA4C(memory mem, regXD src) %{
7619 7617 predicate(UseSSE>=1);
7620 7618 match(Set mem (Store4C mem src));
7621 7619 ins_cost(145);
7622 7620 format %{ "MOVQ $mem,$src\t! packed4C" %}
7623 7621 ins_encode( movq_st(mem, src));
7624 7622 ins_pipe( pipe_slow );
7625 7623 %}
7626 7624
7627 7625 // Store Aligned Packed Integer XMM register to memory
7628 7626 instruct storeA2I(memory mem, regXD src) %{
7629 7627 predicate(UseSSE>=1);
7630 7628 match(Set mem (Store2I mem src));
7631 7629 ins_cost(145);
7632 7630 format %{ "MOVQ $mem,$src\t! packed2I" %}
7633 7631 ins_encode( movq_st(mem, src));
7634 7632 ins_pipe( pipe_slow );
7635 7633 %}
7636 7634
7637 7635 // Store CMS card-mark Immediate
7638 7636 instruct storeImmCM(memory mem, immI8 src) %{
7639 7637 match(Set mem (StoreCM mem src));
7640 7638
7641 7639 ins_cost(150);
7642 7640 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
7643 7641 opcode(0xC6); /* C6 /0 */
7644 7642 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7645 7643 ins_pipe( ialu_mem_imm );
7646 7644 %}
7647 7645
7648 7646 // Store Double
7649 7647 instruct storeD( memory mem, regDPR1 src) %{
7650 7648 predicate(UseSSE<=1);
7651 7649 match(Set mem (StoreD mem src));
7652 7650
7653 7651 ins_cost(100);
7654 7652 format %{ "FST_D $mem,$src" %}
7655 7653 opcode(0xDD); /* DD /2 */
7656 7654 ins_encode( enc_FP_store(mem,src) );
7657 7655 ins_pipe( fpu_mem_reg );
7658 7656 %}
7659 7657
7660 7658 // Store double does rounding on x86
7661 7659 instruct storeD_rounded( memory mem, regDPR1 src) %{
7662 7660 predicate(UseSSE<=1);
7663 7661 match(Set mem (StoreD mem (RoundDouble src)));
7664 7662
7665 7663 ins_cost(100);
7666 7664 format %{ "FST_D $mem,$src\t# round" %}
7667 7665 opcode(0xDD); /* DD /2 */
7668 7666 ins_encode( enc_FP_store(mem,src) );
7669 7667 ins_pipe( fpu_mem_reg );
7670 7668 %}
7671 7669
7672 7670 // Store XMM register to memory (double-precision floating points)
7673 7671 // MOVSD instruction
7674 7672 instruct storeXD(memory mem, regXD src) %{
7675 7673 predicate(UseSSE>=2);
7676 7674 match(Set mem (StoreD mem src));
7677 7675 ins_cost(95);
7678 7676 format %{ "MOVSD $mem,$src" %}
7679 7677 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7680 7678 ins_pipe( pipe_slow );
7681 7679 %}
7682 7680
7683 7681 // Store XMM register to memory (single-precision floating point)
7684 7682 // MOVSS instruction
7685 7683 instruct storeX(memory mem, regX src) %{
7686 7684 predicate(UseSSE>=1);
7687 7685 match(Set mem (StoreF mem src));
7688 7686 ins_cost(95);
7689 7687 format %{ "MOVSS $mem,$src" %}
7690 7688 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7691 7689 ins_pipe( pipe_slow );
7692 7690 %}
7693 7691
7694 7692 // Store Aligned Packed Single Float XMM register to memory
7695 7693 instruct storeA2F(memory mem, regXD src) %{
7696 7694 predicate(UseSSE>=1);
7697 7695 match(Set mem (Store2F mem src));
7698 7696 ins_cost(145);
7699 7697 format %{ "MOVQ $mem,$src\t! packed2F" %}
7700 7698 ins_encode( movq_st(mem, src));
7701 7699 ins_pipe( pipe_slow );
7702 7700 %}
7703 7701
7704 7702 // Store Float
7705 7703 instruct storeF( memory mem, regFPR1 src) %{
7706 7704 predicate(UseSSE==0);
7707 7705 match(Set mem (StoreF mem src));
7708 7706
7709 7707 ins_cost(100);
7710 7708 format %{ "FST_S $mem,$src" %}
7711 7709 opcode(0xD9); /* D9 /2 */
7712 7710 ins_encode( enc_FP_store(mem,src) );
7713 7711 ins_pipe( fpu_mem_reg );
7714 7712 %}
7715 7713
7716 7714 // Store Float does rounding on x86
7717 7715 instruct storeF_rounded( memory mem, regFPR1 src) %{
7718 7716 predicate(UseSSE==0);
7719 7717 match(Set mem (StoreF mem (RoundFloat src)));
7720 7718
7721 7719 ins_cost(100);
7722 7720 format %{ "FST_S $mem,$src\t# round" %}
7723 7721 opcode(0xD9); /* D9 /2 */
7724 7722 ins_encode( enc_FP_store(mem,src) );
7725 7723 ins_pipe( fpu_mem_reg );
7726 7724 %}
7727 7725
7728 7726 // Store Float does rounding on x86
7729 7727 instruct storeF_Drounded( memory mem, regDPR1 src) %{
7730 7728 predicate(UseSSE<=1);
7731 7729 match(Set mem (StoreF mem (ConvD2F src)));
7732 7730
7733 7731 ins_cost(100);
7734 7732 format %{ "FST_S $mem,$src\t# D-round" %}
7735 7733 opcode(0xD9); /* D9 /2 */
7736 7734 ins_encode( enc_FP_store(mem,src) );
7737 7735 ins_pipe( fpu_mem_reg );
7738 7736 %}
7739 7737
7740 7738 // Store immediate Float value (it is faster than store from FPU register)
7741 7739 // The instruction usage is guarded by predicate in operand immF().
7742 7740 instruct storeF_imm( memory mem, immF src) %{
7743 7741 match(Set mem (StoreF mem src));
7744 7742
7745 7743 ins_cost(50);
7746 7744 format %{ "MOV $mem,$src\t# store float" %}
7747 7745 opcode(0xC7); /* C7 /0 */
7748 7746 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
7749 7747 ins_pipe( ialu_mem_imm );
7750 7748 %}
7751 7749
7752 7750 // Store immediate Float value (it is faster than store from XMM register)
7753 7751 // The instruction usage is guarded by predicate in operand immXF().
7754 7752 instruct storeX_imm( memory mem, immXF src) %{
7755 7753 match(Set mem (StoreF mem src));
7756 7754
7757 7755 ins_cost(50);
7758 7756 format %{ "MOV $mem,$src\t# store float" %}
7759 7757 opcode(0xC7); /* C7 /0 */
7760 7758 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src ));
7761 7759 ins_pipe( ialu_mem_imm );
7762 7760 %}
7763 7761
7764 7762 // Store Integer to stack slot
7765 7763 instruct storeSSI(stackSlotI dst, eRegI src) %{
7766 7764 match(Set dst src);
7767 7765
7768 7766 ins_cost(100);
7769 7767 format %{ "MOV $dst,$src" %}
7770 7768 opcode(0x89);
7771 7769 ins_encode( OpcPRegSS( dst, src ) );
7772 7770 ins_pipe( ialu_mem_reg );
7773 7771 %}
7774 7772
7775 7773 // Store Integer to stack slot
7776 7774 instruct storeSSP(stackSlotP dst, eRegP src) %{
7777 7775 match(Set dst src);
7778 7776
7779 7777 ins_cost(100);
7780 7778 format %{ "MOV $dst,$src" %}
7781 7779 opcode(0x89);
7782 7780 ins_encode( OpcPRegSS( dst, src ) );
7783 7781 ins_pipe( ialu_mem_reg );
7784 7782 %}
7785 7783
7786 7784 // Store Long to stack slot
7787 7785 instruct storeSSL(stackSlotL dst, eRegL src) %{
7788 7786 match(Set dst src);
7789 7787
7790 7788 ins_cost(200);
7791 7789 format %{ "MOV $dst,$src.lo\n\t"
7792 7790 "MOV $dst+4,$src.hi" %}
7793 7791 opcode(0x89, 0x89);
7794 7792 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7795 7793 ins_pipe( ialu_mem_long_reg );
7796 7794 %}
7797 7795
7798 7796 //----------MemBar Instructions-----------------------------------------------
7799 7797 // Memory barrier flavors
7800 7798
7801 7799 instruct membar_acquire() %{
7802 7800 match(MemBarAcquire);
7803 7801 ins_cost(400);
7804 7802
7805 7803 size(0);
7806 7804 format %{ "MEMBAR-acquire ! (empty encoding)" %}
7807 7805 ins_encode();
7808 7806 ins_pipe(empty);
7809 7807 %}
7810 7808
7811 7809 instruct membar_acquire_lock() %{
7812 7810 match(MemBarAcquire);
7813 7811 predicate(Matcher::prior_fast_lock(n));
7814 7812 ins_cost(0);
7815 7813
7816 7814 size(0);
7817 7815 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7818 7816 ins_encode( );
7819 7817 ins_pipe(empty);
7820 7818 %}
7821 7819
7822 7820 instruct membar_release() %{
7823 7821 match(MemBarRelease);
7824 7822 ins_cost(400);
7825 7823
7826 7824 size(0);
7827 7825 format %{ "MEMBAR-release ! (empty encoding)" %}
7828 7826 ins_encode( );
7829 7827 ins_pipe(empty);
7830 7828 %}
7831 7829
7832 7830 instruct membar_release_lock() %{
7833 7831 match(MemBarRelease);
7834 7832 predicate(Matcher::post_fast_unlock(n));
7835 7833 ins_cost(0);
7836 7834
7837 7835 size(0);
7838 7836 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7839 7837 ins_encode( );
7840 7838 ins_pipe(empty);
7841 7839 %}
7842 7840
7843 7841 instruct membar_volatile(eFlagsReg cr) %{
7844 7842 match(MemBarVolatile);
7845 7843 effect(KILL cr);
7846 7844 ins_cost(400);
7847 7845
7848 7846 format %{
7849 7847 $$template
7850 7848 if (os::is_MP()) {
7851 7849 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7852 7850 } else {
7853 7851 $$emit$$"MEMBAR-volatile ! (empty encoding)"
7854 7852 }
7855 7853 %}
7856 7854 ins_encode %{
7857 7855 __ membar(Assembler::StoreLoad);
7858 7856 %}
7859 7857 ins_pipe(pipe_slow);
7860 7858 %}
7861 7859
7862 7860 instruct unnecessary_membar_volatile() %{
7863 7861 match(MemBarVolatile);
7864 7862 predicate(Matcher::post_store_load_barrier(n));
7865 7863 ins_cost(0);
7866 7864
7867 7865 size(0);
7868 7866 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7869 7867 ins_encode( );
7870 7868 ins_pipe(empty);
7871 7869 %}
7872 7870
7873 7871 //----------Move Instructions--------------------------------------------------
7874 7872 instruct castX2P(eAXRegP dst, eAXRegI src) %{
7875 7873 match(Set dst (CastX2P src));
7876 7874 format %{ "# X2P $dst, $src" %}
7877 7875 ins_encode( /*empty encoding*/ );
7878 7876 ins_cost(0);
7879 7877 ins_pipe(empty);
7880 7878 %}
7881 7879
7882 7880 instruct castP2X(eRegI dst, eRegP src ) %{
7883 7881 match(Set dst (CastP2X src));
7884 7882 ins_cost(50);
7885 7883 format %{ "MOV $dst, $src\t# CastP2X" %}
7886 7884 ins_encode( enc_Copy( dst, src) );
7887 7885 ins_pipe( ialu_reg_reg );
7888 7886 %}
7889 7887
7890 7888 //----------Conditional Move---------------------------------------------------
7891 7889 // Conditional move
7892 7890 instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7893 7891 predicate(VM_Version::supports_cmov() );
7894 7892 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7895 7893 ins_cost(200);
7896 7894 format %{ "CMOV$cop $dst,$src" %}
7897 7895 opcode(0x0F,0x40);
7898 7896 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7899 7897 ins_pipe( pipe_cmov_reg );
7900 7898 %}
7901 7899
7902 7900 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7903 7901 predicate(VM_Version::supports_cmov() );
7904 7902 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7905 7903 ins_cost(200);
7906 7904 format %{ "CMOV$cop $dst,$src" %}
7907 7905 opcode(0x0F,0x40);
7908 7906 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7909 7907 ins_pipe( pipe_cmov_reg );
7910 7908 %}
7911 7909
7912 7910 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7913 7911 predicate(VM_Version::supports_cmov() );
7914 7912 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7915 7913 ins_cost(200);
7916 7914 expand %{
7917 7915 cmovI_regU(cop, cr, dst, src);
7918 7916 %}
7919 7917 %}
7920 7918
7921 7919 // Conditional move
7922 7920 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7923 7921 predicate(VM_Version::supports_cmov() );
7924 7922 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7925 7923 ins_cost(250);
7926 7924 format %{ "CMOV$cop $dst,$src" %}
7927 7925 opcode(0x0F,0x40);
7928 7926 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7929 7927 ins_pipe( pipe_cmov_mem );
7930 7928 %}
7931 7929
7932 7930 // Conditional move
7933 7931 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7934 7932 predicate(VM_Version::supports_cmov() );
7935 7933 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7936 7934 ins_cost(250);
7937 7935 format %{ "CMOV$cop $dst,$src" %}
7938 7936 opcode(0x0F,0x40);
7939 7937 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7940 7938 ins_pipe( pipe_cmov_mem );
7941 7939 %}
7942 7940
7943 7941 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7944 7942 predicate(VM_Version::supports_cmov() );
7945 7943 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7946 7944 ins_cost(250);
7947 7945 expand %{
7948 7946 cmovI_memU(cop, cr, dst, src);
7949 7947 %}
7950 7948 %}
7951 7949
7952 7950 // Conditional move
7953 7951 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7954 7952 predicate(VM_Version::supports_cmov() );
7955 7953 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7956 7954 ins_cost(200);
7957 7955 format %{ "CMOV$cop $dst,$src\t# ptr" %}
7958 7956 opcode(0x0F,0x40);
7959 7957 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7960 7958 ins_pipe( pipe_cmov_reg );
7961 7959 %}
7962 7960
7963 7961 // Conditional move (non-P6 version)
7964 7962 // Note: a CMoveP is generated for stubs and native wrappers
7965 7963 // regardless of whether we are on a P6, so we
7966 7964 // emulate a cmov here
7967 7965 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7968 7966 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7969 7967 ins_cost(300);
7970 7968 format %{ "Jn$cop skip\n\t"
7971 7969 "MOV $dst,$src\t# pointer\n"
7972 7970 "skip:" %}
7973 7971 opcode(0x8b);
7974 7972 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7975 7973 ins_pipe( pipe_cmov_reg );
7976 7974 %}
7977 7975
7978 7976 // Conditional move
7979 7977 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7980 7978 predicate(VM_Version::supports_cmov() );
7981 7979 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7982 7980 ins_cost(200);
7983 7981 format %{ "CMOV$cop $dst,$src\t# ptr" %}
7984 7982 opcode(0x0F,0x40);
7985 7983 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7986 7984 ins_pipe( pipe_cmov_reg );
7987 7985 %}
7988 7986
7989 7987 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7990 7988 predicate(VM_Version::supports_cmov() );
7991 7989 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7992 7990 ins_cost(200);
7993 7991 expand %{
7994 7992 cmovP_regU(cop, cr, dst, src);
7995 7993 %}
7996 7994 %}
7997 7995
7998 7996 // DISABLED: Requires the ADLC to emit a bottom_type call that
7999 7997 // correctly meets the two pointer arguments; one is an incoming
8000 7998 // register but the other is a memory operand. ALSO appears to
8001 7999 // be buggy with implicit null checks.
8002 8000 //
8003 8001 //// Conditional move
8004 8002 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
8005 8003 // predicate(VM_Version::supports_cmov() );
8006 8004 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8007 8005 // ins_cost(250);
8008 8006 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
8009 8007 // opcode(0x0F,0x40);
8010 8008 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8011 8009 // ins_pipe( pipe_cmov_mem );
8012 8010 //%}
8013 8011 //
8014 8012 //// Conditional move
8015 8013 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
8016 8014 // predicate(VM_Version::supports_cmov() );
8017 8015 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8018 8016 // ins_cost(250);
8019 8017 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
8020 8018 // opcode(0x0F,0x40);
8021 8019 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8022 8020 // ins_pipe( pipe_cmov_mem );
8023 8021 //%}
8024 8022
8025 8023 // Conditional move
8026 8024 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
8027 8025 predicate(UseSSE<=1);
8028 8026 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8029 8027 ins_cost(200);
8030 8028 format %{ "FCMOV$cop $dst,$src\t# double" %}
8031 8029 opcode(0xDA);
8032 8030 ins_encode( enc_cmov_d(cop,src) );
8033 8031 ins_pipe( pipe_cmovD_reg );
8034 8032 %}
8035 8033
8036 8034 // Conditional move
8037 8035 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
8038 8036 predicate(UseSSE==0);
8039 8037 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8040 8038 ins_cost(200);
8041 8039 format %{ "FCMOV$cop $dst,$src\t# float" %}
8042 8040 opcode(0xDA);
8043 8041 ins_encode( enc_cmov_d(cop,src) );
8044 8042 ins_pipe( pipe_cmovD_reg );
8045 8043 %}
8046 8044
8047 8045 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8048 8046 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
8049 8047 predicate(UseSSE<=1);
8050 8048 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8051 8049 ins_cost(200);
8052 8050 format %{ "Jn$cop skip\n\t"
8053 8051 "MOV $dst,$src\t# double\n"
8054 8052 "skip:" %}
8055 8053 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
8056 8054 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
8057 8055 ins_pipe( pipe_cmovD_reg );
8058 8056 %}
8059 8057
8060 8058 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8061 8059 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
8062 8060 predicate(UseSSE==0);
8063 8061 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8064 8062 ins_cost(200);
8065 8063 format %{ "Jn$cop skip\n\t"
8066 8064 "MOV $dst,$src\t# float\n"
8067 8065 "skip:" %}
8068 8066 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
8069 8067 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
8070 8068 ins_pipe( pipe_cmovD_reg );
8071 8069 %}
8072 8070
8073 8071 // No CMOVE with SSE/SSE2
8074 8072 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
8075 8073 predicate (UseSSE>=1);
8076 8074 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8077 8075 ins_cost(200);
8078 8076 format %{ "Jn$cop skip\n\t"
8079 8077 "MOVSS $dst,$src\t# float\n"
8080 8078 "skip:" %}
8081 8079 ins_encode %{
8082 8080 Label skip;
8083 8081 // Invert sense of branch from sense of CMOV
8084 8082 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8085 8083 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8086 8084 __ bind(skip);
8087 8085 %}
8088 8086 ins_pipe( pipe_slow );
8089 8087 %}
8090 8088
8091 8089 // No CMOVE with SSE/SSE2
8092 8090 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
8093 8091 predicate (UseSSE>=2);
8094 8092 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8095 8093 ins_cost(200);
8096 8094 format %{ "Jn$cop skip\n\t"
8097 8095 "MOVSD $dst,$src\t# float\n"
8098 8096 "skip:" %}
8099 8097 ins_encode %{
8100 8098 Label skip;
8101 8099 // Invert sense of branch from sense of CMOV
8102 8100 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8103 8101 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8104 8102 __ bind(skip);
8105 8103 %}
8106 8104 ins_pipe( pipe_slow );
8107 8105 %}
8108 8106
8109 8107 // unsigned version
8110 8108 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
8111 8109 predicate (UseSSE>=1);
8112 8110 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8113 8111 ins_cost(200);
8114 8112 format %{ "Jn$cop skip\n\t"
8115 8113 "MOVSS $dst,$src\t# float\n"
8116 8114 "skip:" %}
8117 8115 ins_encode %{
8118 8116 Label skip;
8119 8117 // Invert sense of branch from sense of CMOV
8120 8118 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8121 8119 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8122 8120 __ bind(skip);
8123 8121 %}
8124 8122 ins_pipe( pipe_slow );
8125 8123 %}
8126 8124
8127 8125 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
8128 8126 predicate (UseSSE>=1);
8129 8127 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8130 8128 ins_cost(200);
8131 8129 expand %{
8132 8130 fcmovX_regU(cop, cr, dst, src);
8133 8131 %}
8134 8132 %}
8135 8133
8136 8134 // unsigned version
8137 8135 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
8138 8136 predicate (UseSSE>=2);
8139 8137 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8140 8138 ins_cost(200);
8141 8139 format %{ "Jn$cop skip\n\t"
8142 8140 "MOVSD $dst,$src\t# float\n"
8143 8141 "skip:" %}
8144 8142 ins_encode %{
8145 8143 Label skip;
8146 8144 // Invert sense of branch from sense of CMOV
8147 8145 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8148 8146 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8149 8147 __ bind(skip);
8150 8148 %}
8151 8149 ins_pipe( pipe_slow );
8152 8150 %}
8153 8151
8154 8152 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
8155 8153 predicate (UseSSE>=2);
8156 8154 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8157 8155 ins_cost(200);
8158 8156 expand %{
8159 8157 fcmovXD_regU(cop, cr, dst, src);
8160 8158 %}
8161 8159 %}
8162 8160
8163 8161 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
8164 8162 predicate(VM_Version::supports_cmov() );
8165 8163 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8166 8164 ins_cost(200);
8167 8165 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8168 8166 "CMOV$cop $dst.hi,$src.hi" %}
8169 8167 opcode(0x0F,0x40);
8170 8168 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8171 8169 ins_pipe( pipe_cmov_reg_long );
8172 8170 %}
8173 8171
8174 8172 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
8175 8173 predicate(VM_Version::supports_cmov() );
8176 8174 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8177 8175 ins_cost(200);
8178 8176 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8179 8177 "CMOV$cop $dst.hi,$src.hi" %}
8180 8178 opcode(0x0F,0x40);
8181 8179 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8182 8180 ins_pipe( pipe_cmov_reg_long );
8183 8181 %}
8184 8182
8185 8183 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
8186 8184 predicate(VM_Version::supports_cmov() );
8187 8185 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8188 8186 ins_cost(200);
8189 8187 expand %{
8190 8188 cmovL_regU(cop, cr, dst, src);
8191 8189 %}
8192 8190 %}
8193 8191
8194 8192 //----------Arithmetic Instructions--------------------------------------------
8195 8193 //----------Addition Instructions----------------------------------------------
8196 8194 // Integer Addition Instructions
8197 8195 instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8198 8196 match(Set dst (AddI dst src));
8199 8197 effect(KILL cr);
8200 8198
8201 8199 size(2);
8202 8200 format %{ "ADD $dst,$src" %}
8203 8201 opcode(0x03);
8204 8202 ins_encode( OpcP, RegReg( dst, src) );
8205 8203 ins_pipe( ialu_reg_reg );
8206 8204 %}
8207 8205
8208 8206 instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8209 8207 match(Set dst (AddI dst src));
8210 8208 effect(KILL cr);
8211 8209
8212 8210 format %{ "ADD $dst,$src" %}
8213 8211 opcode(0x81, 0x00); /* /0 id */
8214 8212 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8215 8213 ins_pipe( ialu_reg );
8216 8214 %}
8217 8215
8218 8216 instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
8219 8217 predicate(UseIncDec);
8220 8218 match(Set dst (AddI dst src));
8221 8219 effect(KILL cr);
8222 8220
8223 8221 size(1);
8224 8222 format %{ "INC $dst" %}
8225 8223 opcode(0x40); /* */
8226 8224 ins_encode( Opc_plus( primary, dst ) );
8227 8225 ins_pipe( ialu_reg );
8228 8226 %}
8229 8227
8230 8228 instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
8231 8229 match(Set dst (AddI src0 src1));
8232 8230 ins_cost(110);
8233 8231
8234 8232 format %{ "LEA $dst,[$src0 + $src1]" %}
8235 8233 opcode(0x8D); /* 0x8D /r */
8236 8234 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8237 8235 ins_pipe( ialu_reg_reg );
8238 8236 %}
8239 8237
8240 8238 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
8241 8239 match(Set dst (AddP src0 src1));
8242 8240 ins_cost(110);
8243 8241
8244 8242 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %}
8245 8243 opcode(0x8D); /* 0x8D /r */
8246 8244 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8247 8245 ins_pipe( ialu_reg_reg );
8248 8246 %}
8249 8247
8250 8248 instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
8251 8249 predicate(UseIncDec);
8252 8250 match(Set dst (AddI dst src));
8253 8251 effect(KILL cr);
8254 8252
8255 8253 size(1);
8256 8254 format %{ "DEC $dst" %}
8257 8255 opcode(0x48); /* */
8258 8256 ins_encode( Opc_plus( primary, dst ) );
8259 8257 ins_pipe( ialu_reg );
8260 8258 %}
8261 8259
8262 8260 instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
8263 8261 match(Set dst (AddP dst src));
8264 8262 effect(KILL cr);
8265 8263
8266 8264 size(2);
8267 8265 format %{ "ADD $dst,$src" %}
8268 8266 opcode(0x03);
8269 8267 ins_encode( OpcP, RegReg( dst, src) );
8270 8268 ins_pipe( ialu_reg_reg );
8271 8269 %}
8272 8270
8273 8271 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
8274 8272 match(Set dst (AddP dst src));
8275 8273 effect(KILL cr);
8276 8274
8277 8275 format %{ "ADD $dst,$src" %}
8278 8276 opcode(0x81,0x00); /* Opcode 81 /0 id */
8279 8277 // ins_encode( RegImm( dst, src) );
8280 8278 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8281 8279 ins_pipe( ialu_reg );
8282 8280 %}
8283 8281
8284 8282 instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8285 8283 match(Set dst (AddI dst (LoadI src)));
8286 8284 effect(KILL cr);
8287 8285
8288 8286 ins_cost(125);
8289 8287 format %{ "ADD $dst,$src" %}
8290 8288 opcode(0x03);
8291 8289 ins_encode( OpcP, RegMem( dst, src) );
8292 8290 ins_pipe( ialu_reg_mem );
8293 8291 %}
8294 8292
8295 8293 instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8296 8294 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8297 8295 effect(KILL cr);
8298 8296
8299 8297 ins_cost(150);
8300 8298 format %{ "ADD $dst,$src" %}
8301 8299 opcode(0x01); /* Opcode 01 /r */
8302 8300 ins_encode( OpcP, RegMem( src, dst ) );
8303 8301 ins_pipe( ialu_mem_reg );
8304 8302 %}
8305 8303
8306 8304 // Add Memory with Immediate
8307 8305 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8308 8306 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8309 8307 effect(KILL cr);
8310 8308
8311 8309 ins_cost(125);
8312 8310 format %{ "ADD $dst,$src" %}
8313 8311 opcode(0x81); /* Opcode 81 /0 id */
8314 8312 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
8315 8313 ins_pipe( ialu_mem_imm );
8316 8314 %}
8317 8315
8318 8316 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
8319 8317 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8320 8318 effect(KILL cr);
8321 8319
8322 8320 ins_cost(125);
8323 8321 format %{ "INC $dst" %}
8324 8322 opcode(0xFF); /* Opcode FF /0 */
8325 8323 ins_encode( OpcP, RMopc_Mem(0x00,dst));
8326 8324 ins_pipe( ialu_mem_imm );
8327 8325 %}
8328 8326
8329 8327 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
8330 8328 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8331 8329 effect(KILL cr);
8332 8330
8333 8331 ins_cost(125);
8334 8332 format %{ "DEC $dst" %}
8335 8333 opcode(0xFF); /* Opcode FF /1 */
8336 8334 ins_encode( OpcP, RMopc_Mem(0x01,dst));
8337 8335 ins_pipe( ialu_mem_imm );
8338 8336 %}
8339 8337
8340 8338
8341 8339 instruct checkCastPP( eRegP dst ) %{
8342 8340 match(Set dst (CheckCastPP dst));
8343 8341
8344 8342 size(0);
8345 8343 format %{ "#checkcastPP of $dst" %}
8346 8344 ins_encode( /*empty encoding*/ );
8347 8345 ins_pipe( empty );
8348 8346 %}
8349 8347
8350 8348 instruct castPP( eRegP dst ) %{
8351 8349 match(Set dst (CastPP dst));
8352 8350 format %{ "#castPP of $dst" %}
8353 8351 ins_encode( /*empty encoding*/ );
8354 8352 ins_pipe( empty );
8355 8353 %}
8356 8354
8357 8355 instruct castII( eRegI dst ) %{
8358 8356 match(Set dst (CastII dst));
8359 8357 format %{ "#castII of $dst" %}
8360 8358 ins_encode( /*empty encoding*/ );
8361 8359 ins_cost(0);
8362 8360 ins_pipe( empty );
8363 8361 %}
8364 8362
8365 8363
8366 8364 // Load-locked - same as a regular pointer load when used with compare-swap
8367 8365 instruct loadPLocked(eRegP dst, memory mem) %{
8368 8366 match(Set dst (LoadPLocked mem));
8369 8367
8370 8368 ins_cost(125);
8371 8369 format %{ "MOV $dst,$mem\t# Load ptr. locked" %}
8372 8370 opcode(0x8B);
8373 8371 ins_encode( OpcP, RegMem(dst,mem));
8374 8372 ins_pipe( ialu_reg_mem );
8375 8373 %}
8376 8374
8377 8375 // LoadLong-locked - same as a volatile long load when used with compare-swap
8378 8376 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8379 8377 predicate(UseSSE<=1);
8380 8378 match(Set dst (LoadLLocked mem));
8381 8379
8382 8380 ins_cost(200);
8383 8381 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
8384 8382 "FISTp $dst" %}
8385 8383 ins_encode(enc_loadL_volatile(mem,dst));
8386 8384 ins_pipe( fpu_reg_mem );
8387 8385 %}
8388 8386
8389 8387 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8390 8388 predicate(UseSSE>=2);
8391 8389 match(Set dst (LoadLLocked mem));
8392 8390 effect(TEMP tmp);
8393 8391 ins_cost(180);
8394 8392 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
8395 8393 "MOVSD $dst,$tmp" %}
8396 8394 ins_encode(enc_loadLX_volatile(mem, dst, tmp));
8397 8395 ins_pipe( pipe_slow );
8398 8396 %}
8399 8397
8400 8398 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8401 8399 predicate(UseSSE>=2);
8402 8400 match(Set dst (LoadLLocked mem));
8403 8401 effect(TEMP tmp);
8404 8402 ins_cost(160);
8405 8403 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
8406 8404 "MOVD $dst.lo,$tmp\n\t"
8407 8405 "PSRLQ $tmp,32\n\t"
8408 8406 "MOVD $dst.hi,$tmp" %}
8409 8407 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
8410 8408 ins_pipe( pipe_slow );
8411 8409 %}
8412 8410
8413 8411 // Conditional-store of the updated heap-top.
8414 8412 // Used during allocation of the shared heap.
8415 8413 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.
8416 8414 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8417 8415 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8418 8416 // EAX is killed if there is contention, but then it's also unused.
8419 8417 // In the common case of no contention, EAX holds the new oop address.
8420 8418 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8421 8419 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8422 8420 ins_pipe( pipe_cmpxchg );
8423 8421 %}
8424 8422
8425 8423 // Conditional-store of an int value.
8426 8424 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.
8427 8425 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8428 8426 match(Set cr (StoreIConditional mem (Binary oldval newval)));
8429 8427 effect(KILL oldval);
8430 8428 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
8431 8429 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
8432 8430 ins_pipe( pipe_cmpxchg );
8433 8431 %}
8434 8432
8435 8433 // Conditional-store of a long value.
8436 8434 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel.
8437 8435 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8438 8436 match(Set cr (StoreLConditional mem (Binary oldval newval)));
8439 8437 effect(KILL oldval);
8440 8438 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
8441 8439 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
8442 8440 "XCHG EBX,ECX"
8443 8441 %}
8444 8442 ins_encode %{
8445 8443 // Note: we need to swap rbx, and rcx before and after the
8446 8444 // cmpxchg8 instruction because the instruction uses
8447 8445 // rcx as the high order word of the new value to store but
8448 8446 // our register encoding uses rbx.
8449 8447 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8450 8448 if( os::is_MP() )
8451 8449 __ lock();
8452 8450 __ cmpxchg8($mem$$Address);
8453 8451 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8454 8452 %}
8455 8453 ins_pipe( pipe_cmpxchg );
8456 8454 %}
8457 8455
8458 8456 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8459 8457
8460 8458 instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8461 8459 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8462 8460 effect(KILL cr, KILL oldval);
8463 8461 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8464 8462 "MOV $res,0\n\t"
8465 8463 "JNE,s fail\n\t"
8466 8464 "MOV $res,1\n"
8467 8465 "fail:" %}
8468 8466 ins_encode( enc_cmpxchg8(mem_ptr),
8469 8467 enc_flags_ne_to_boolean(res) );
8470 8468 ins_pipe( pipe_cmpxchg );
8471 8469 %}
8472 8470
8473 8471 instruct compareAndSwapP( eRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
8474 8472 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8475 8473 effect(KILL cr, KILL oldval);
8476 8474 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8477 8475 "MOV $res,0\n\t"
8478 8476 "JNE,s fail\n\t"
8479 8477 "MOV $res,1\n"
8480 8478 "fail:" %}
8481 8479 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8482 8480 ins_pipe( pipe_cmpxchg );
8483 8481 %}
8484 8482
8485 8483 instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
8486 8484 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8487 8485 effect(KILL cr, KILL oldval);
8488 8486 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8489 8487 "MOV $res,0\n\t"
8490 8488 "JNE,s fail\n\t"
8491 8489 "MOV $res,1\n"
8492 8490 "fail:" %}
8493 8491 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8494 8492 ins_pipe( pipe_cmpxchg );
8495 8493 %}
8496 8494
8497 8495 //----------Subtraction Instructions-------------------------------------------
8498 8496 // Integer Subtraction Instructions
8499 8497 instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8500 8498 match(Set dst (SubI dst src));
8501 8499 effect(KILL cr);
8502 8500
8503 8501 size(2);
8504 8502 format %{ "SUB $dst,$src" %}
8505 8503 opcode(0x2B);
8506 8504 ins_encode( OpcP, RegReg( dst, src) );
8507 8505 ins_pipe( ialu_reg_reg );
8508 8506 %}
8509 8507
8510 8508 instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8511 8509 match(Set dst (SubI dst src));
8512 8510 effect(KILL cr);
8513 8511
8514 8512 format %{ "SUB $dst,$src" %}
8515 8513 opcode(0x81,0x05); /* Opcode 81 /5 */
8516 8514 // ins_encode( RegImm( dst, src) );
8517 8515 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8518 8516 ins_pipe( ialu_reg );
8519 8517 %}
8520 8518
8521 8519 instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8522 8520 match(Set dst (SubI dst (LoadI src)));
8523 8521 effect(KILL cr);
8524 8522
8525 8523 ins_cost(125);
8526 8524 format %{ "SUB $dst,$src" %}
8527 8525 opcode(0x2B);
8528 8526 ins_encode( OpcP, RegMem( dst, src) );
8529 8527 ins_pipe( ialu_reg_mem );
8530 8528 %}
8531 8529
8532 8530 instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8533 8531 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8534 8532 effect(KILL cr);
8535 8533
8536 8534 ins_cost(150);
8537 8535 format %{ "SUB $dst,$src" %}
8538 8536 opcode(0x29); /* Opcode 29 /r */
8539 8537 ins_encode( OpcP, RegMem( src, dst ) );
8540 8538 ins_pipe( ialu_mem_reg );
8541 8539 %}
8542 8540
8543 8541 // Subtract from a pointer
8544 8542 instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8545 8543 match(Set dst (AddP dst (SubI zero src)));
8546 8544 effect(KILL cr);
8547 8545
8548 8546 size(2);
8549 8547 format %{ "SUB $dst,$src" %}
8550 8548 opcode(0x2B);
8551 8549 ins_encode( OpcP, RegReg( dst, src) );
8552 8550 ins_pipe( ialu_reg_reg );
8553 8551 %}
8554 8552
8555 8553 instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8556 8554 match(Set dst (SubI zero dst));
8557 8555 effect(KILL cr);
8558 8556
8559 8557 size(2);
8560 8558 format %{ "NEG $dst" %}
8561 8559 opcode(0xF7,0x03); // Opcode F7 /3
8562 8560 ins_encode( OpcP, RegOpc( dst ) );
8563 8561 ins_pipe( ialu_reg );
8564 8562 %}
8565 8563
8566 8564
8567 8565 //----------Multiplication/Division Instructions-------------------------------
8568 8566 // Integer Multiplication Instructions
8569 8567 // Multiply Register
8570 8568 instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8571 8569 match(Set dst (MulI dst src));
8572 8570 effect(KILL cr);
8573 8571
8574 8572 size(3);
8575 8573 ins_cost(300);
8576 8574 format %{ "IMUL $dst,$src" %}
8577 8575 opcode(0xAF, 0x0F);
8578 8576 ins_encode( OpcS, OpcP, RegReg( dst, src) );
8579 8577 ins_pipe( ialu_reg_reg_alu0 );
8580 8578 %}
8581 8579
8582 8580 // Multiply 32-bit Immediate
8583 8581 instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8584 8582 match(Set dst (MulI src imm));
8585 8583 effect(KILL cr);
8586 8584
8587 8585 ins_cost(300);
8588 8586 format %{ "IMUL $dst,$src,$imm" %}
8589 8587 opcode(0x69); /* 69 /r id */
8590 8588 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8591 8589 ins_pipe( ialu_reg_reg_alu0 );
8592 8590 %}
8593 8591
8594 8592 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8595 8593 match(Set dst src);
8596 8594 effect(KILL cr);
8597 8595
8598 8596 // Note that this is artificially increased to make it more expensive than loadConL
8599 8597 ins_cost(250);
8600 8598 format %{ "MOV EAX,$src\t// low word only" %}
8601 8599 opcode(0xB8);
8602 8600 ins_encode( LdImmL_Lo(dst, src) );
8603 8601 ins_pipe( ialu_reg_fat );
8604 8602 %}
8605 8603
8606 8604 // Multiply by 32-bit Immediate, taking the shifted high order results
8607 8605 // (special case for shift by 32)
8608 8606 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8609 8607 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8610 8608 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8611 8609 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8612 8610 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8613 8611 effect(USE src1, KILL cr);
8614 8612
8615 8613 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8616 8614 ins_cost(0*100 + 1*400 - 150);
8617 8615 format %{ "IMUL EDX:EAX,$src1" %}
8618 8616 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8619 8617 ins_pipe( pipe_slow );
8620 8618 %}
8621 8619
8622 8620 // Multiply by 32-bit Immediate, taking the shifted high order results
8623 8621 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8624 8622 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8625 8623 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8626 8624 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8627 8625 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8628 8626 effect(USE src1, KILL cr);
8629 8627
8630 8628 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8631 8629 ins_cost(1*100 + 1*400 - 150);
8632 8630 format %{ "IMUL EDX:EAX,$src1\n\t"
8633 8631 "SAR EDX,$cnt-32" %}
8634 8632 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8635 8633 ins_pipe( pipe_slow );
8636 8634 %}
8637 8635
8638 8636 // Multiply Memory 32-bit Immediate
8639 8637 instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8640 8638 match(Set dst (MulI (LoadI src) imm));
8641 8639 effect(KILL cr);
8642 8640
8643 8641 ins_cost(300);
8644 8642 format %{ "IMUL $dst,$src,$imm" %}
8645 8643 opcode(0x69); /* 69 /r id */
8646 8644 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8647 8645 ins_pipe( ialu_reg_mem_alu0 );
8648 8646 %}
8649 8647
8650 8648 // Multiply Memory
8651 8649 instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8652 8650 match(Set dst (MulI dst (LoadI src)));
8653 8651 effect(KILL cr);
8654 8652
8655 8653 ins_cost(350);
8656 8654 format %{ "IMUL $dst,$src" %}
8657 8655 opcode(0xAF, 0x0F);
8658 8656 ins_encode( OpcS, OpcP, RegMem( dst, src) );
8659 8657 ins_pipe( ialu_reg_mem_alu0 );
8660 8658 %}
8661 8659
8662 8660 // Multiply Register Int to Long
8663 8661 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8664 8662 // Basic Idea: long = (long)int * (long)int
8665 8663 match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8666 8664 effect(DEF dst, USE src, USE src1, KILL flags);
8667 8665
8668 8666 ins_cost(300);
8669 8667 format %{ "IMUL $dst,$src1" %}
8670 8668
8671 8669 ins_encode( long_int_multiply( dst, src1 ) );
8672 8670 ins_pipe( ialu_reg_reg_alu0 );
8673 8671 %}
8674 8672
8675 8673 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8676 8674 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
8677 8675 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8678 8676 effect(KILL flags);
8679 8677
8680 8678 ins_cost(300);
8681 8679 format %{ "MUL $dst,$src1" %}
8682 8680
8683 8681 ins_encode( long_uint_multiply(dst, src1) );
8684 8682 ins_pipe( ialu_reg_reg_alu0 );
8685 8683 %}
8686 8684
8687 8685 // Multiply Register Long
8688 8686 instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8689 8687 match(Set dst (MulL dst src));
8690 8688 effect(KILL cr, TEMP tmp);
8691 8689 ins_cost(4*100+3*400);
8692 8690 // Basic idea: lo(result) = lo(x_lo * y_lo)
8693 8691 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8694 8692 format %{ "MOV $tmp,$src.lo\n\t"
8695 8693 "IMUL $tmp,EDX\n\t"
8696 8694 "MOV EDX,$src.hi\n\t"
8697 8695 "IMUL EDX,EAX\n\t"
8698 8696 "ADD $tmp,EDX\n\t"
8699 8697 "MUL EDX:EAX,$src.lo\n\t"
8700 8698 "ADD EDX,$tmp" %}
8701 8699 ins_encode( long_multiply( dst, src, tmp ) );
8702 8700 ins_pipe( pipe_slow );
8703 8701 %}
8704 8702
8705 8703 // Multiply Register Long where the left operand's high 32 bits are zero
8706 8704 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8707 8705 predicate(is_operand_hi32_zero(n->in(1)));
8708 8706 match(Set dst (MulL dst src));
8709 8707 effect(KILL cr, TEMP tmp);
8710 8708 ins_cost(2*100+2*400);
8711 8709 // Basic idea: lo(result) = lo(x_lo * y_lo)
8712 8710 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
8713 8711 format %{ "MOV $tmp,$src.hi\n\t"
8714 8712 "IMUL $tmp,EAX\n\t"
8715 8713 "MUL EDX:EAX,$src.lo\n\t"
8716 8714 "ADD EDX,$tmp" %}
8717 8715 ins_encode %{
8718 8716 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
8719 8717 __ imull($tmp$$Register, rax);
8720 8718 __ mull($src$$Register);
8721 8719 __ addl(rdx, $tmp$$Register);
8722 8720 %}
8723 8721 ins_pipe( pipe_slow );
8724 8722 %}
8725 8723
8726 8724 // Multiply Register Long where the right operand's high 32 bits are zero
8727 8725 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8728 8726 predicate(is_operand_hi32_zero(n->in(2)));
8729 8727 match(Set dst (MulL dst src));
8730 8728 effect(KILL cr, TEMP tmp);
8731 8729 ins_cost(2*100+2*400);
8732 8730 // Basic idea: lo(result) = lo(x_lo * y_lo)
8733 8731 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
8734 8732 format %{ "MOV $tmp,$src.lo\n\t"
8735 8733 "IMUL $tmp,EDX\n\t"
8736 8734 "MUL EDX:EAX,$src.lo\n\t"
8737 8735 "ADD EDX,$tmp" %}
8738 8736 ins_encode %{
8739 8737 __ movl($tmp$$Register, $src$$Register);
8740 8738 __ imull($tmp$$Register, rdx);
8741 8739 __ mull($src$$Register);
8742 8740 __ addl(rdx, $tmp$$Register);
8743 8741 %}
8744 8742 ins_pipe( pipe_slow );
8745 8743 %}
8746 8744
8747 8745 // Multiply Register Long where the left and the right operands' high 32 bits are zero
8748 8746 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
8749 8747 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
8750 8748 match(Set dst (MulL dst src));
8751 8749 effect(KILL cr);
8752 8750 ins_cost(1*400);
8753 8751 // Basic idea: lo(result) = lo(x_lo * y_lo)
8754 8752 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
8755 8753 format %{ "MUL EDX:EAX,$src.lo\n\t" %}
8756 8754 ins_encode %{
8757 8755 __ mull($src$$Register);
8758 8756 %}
8759 8757 ins_pipe( pipe_slow );
8760 8758 %}
8761 8759
8762 8760 // Multiply Register Long by small constant
8763 8761 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8764 8762 match(Set dst (MulL dst src));
8765 8763 effect(KILL cr, TEMP tmp);
8766 8764 ins_cost(2*100+2*400);
8767 8765 size(12);
8768 8766 // Basic idea: lo(result) = lo(src * EAX)
8769 8767 // hi(result) = hi(src * EAX) + lo(src * EDX)
8770 8768 format %{ "IMUL $tmp,EDX,$src\n\t"
8771 8769 "MOV EDX,$src\n\t"
8772 8770 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t"
8773 8771 "ADD EDX,$tmp" %}
8774 8772 ins_encode( long_multiply_con( dst, src, tmp ) );
8775 8773 ins_pipe( pipe_slow );
8776 8774 %}
8777 8775
8778 8776 // Integer DIV with Register
8779 8777 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8780 8778 match(Set rax (DivI rax div));
8781 8779 effect(KILL rdx, KILL cr);
8782 8780 size(26);
8783 8781 ins_cost(30*100+10*100);
8784 8782 format %{ "CMP EAX,0x80000000\n\t"
8785 8783 "JNE,s normal\n\t"
8786 8784 "XOR EDX,EDX\n\t"
8787 8785 "CMP ECX,-1\n\t"
8788 8786 "JE,s done\n"
8789 8787 "normal: CDQ\n\t"
8790 8788 "IDIV $div\n\t"
8791 8789 "done:" %}
8792 8790 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8793 8791 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8794 8792 ins_pipe( ialu_reg_reg_alu0 );
8795 8793 %}
8796 8794
8797 8795 // Divide Register Long
8798 8796 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8799 8797 match(Set dst (DivL src1 src2));
8800 8798 effect( KILL cr, KILL cx, KILL bx );
8801 8799 ins_cost(10000);
8802 8800 format %{ "PUSH $src1.hi\n\t"
8803 8801 "PUSH $src1.lo\n\t"
8804 8802 "PUSH $src2.hi\n\t"
8805 8803 "PUSH $src2.lo\n\t"
8806 8804 "CALL SharedRuntime::ldiv\n\t"
8807 8805 "ADD ESP,16" %}
8808 8806 ins_encode( long_div(src1,src2) );
8809 8807 ins_pipe( pipe_slow );
8810 8808 %}
8811 8809
8812 8810 // Integer DIVMOD with Register, both quotient and mod results
8813 8811 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8814 8812 match(DivModI rax div);
8815 8813 effect(KILL cr);
8816 8814 size(26);
8817 8815 ins_cost(30*100+10*100);
8818 8816 format %{ "CMP EAX,0x80000000\n\t"
8819 8817 "JNE,s normal\n\t"
8820 8818 "XOR EDX,EDX\n\t"
8821 8819 "CMP ECX,-1\n\t"
8822 8820 "JE,s done\n"
8823 8821 "normal: CDQ\n\t"
8824 8822 "IDIV $div\n\t"
8825 8823 "done:" %}
8826 8824 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8827 8825 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8828 8826 ins_pipe( pipe_slow );
8829 8827 %}
8830 8828
8831 8829 // Integer MOD with Register
8832 8830 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8833 8831 match(Set rdx (ModI rax div));
8834 8832 effect(KILL rax, KILL cr);
8835 8833
8836 8834 size(26);
8837 8835 ins_cost(300);
8838 8836 format %{ "CDQ\n\t"
8839 8837 "IDIV $div" %}
8840 8838 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8841 8839 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8842 8840 ins_pipe( ialu_reg_reg_alu0 );
8843 8841 %}
8844 8842
8845 8843 // Remainder Register Long
8846 8844 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8847 8845 match(Set dst (ModL src1 src2));
8848 8846 effect( KILL cr, KILL cx, KILL bx );
8849 8847 ins_cost(10000);
8850 8848 format %{ "PUSH $src1.hi\n\t"
8851 8849 "PUSH $src1.lo\n\t"
8852 8850 "PUSH $src2.hi\n\t"
8853 8851 "PUSH $src2.lo\n\t"
8854 8852 "CALL SharedRuntime::lrem\n\t"
8855 8853 "ADD ESP,16" %}
8856 8854 ins_encode( long_mod(src1,src2) );
8857 8855 ins_pipe( pipe_slow );
8858 8856 %}
8859 8857
8860 8858 // Divide Register Long (no special case since divisor != -1)
8861 8859 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8862 8860 match(Set dst (DivL dst imm));
8863 8861 effect( TEMP tmp, TEMP tmp2, KILL cr );
8864 8862 ins_cost(1000);
8865 8863 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8866 8864 "XOR $tmp2,$tmp2\n\t"
8867 8865 "CMP $tmp,EDX\n\t"
8868 8866 "JA,s fast\n\t"
8869 8867 "MOV $tmp2,EAX\n\t"
8870 8868 "MOV EAX,EDX\n\t"
8871 8869 "MOV EDX,0\n\t"
8872 8870 "JLE,s pos\n\t"
8873 8871 "LNEG EAX : $tmp2\n\t"
8874 8872 "DIV $tmp # unsigned division\n\t"
8875 8873 "XCHG EAX,$tmp2\n\t"
8876 8874 "DIV $tmp\n\t"
8877 8875 "LNEG $tmp2 : EAX\n\t"
8878 8876 "JMP,s done\n"
8879 8877 "pos:\n\t"
8880 8878 "DIV $tmp\n\t"
8881 8879 "XCHG EAX,$tmp2\n"
8882 8880 "fast:\n\t"
8883 8881 "DIV $tmp\n"
8884 8882 "done:\n\t"
8885 8883 "MOV EDX,$tmp2\n\t"
8886 8884 "NEG EDX:EAX # if $imm < 0" %}
8887 8885 ins_encode %{
8888 8886 int con = (int)$imm$$constant;
8889 8887 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8890 8888 int pcon = (con > 0) ? con : -con;
8891 8889 Label Lfast, Lpos, Ldone;
8892 8890
8893 8891 __ movl($tmp$$Register, pcon);
8894 8892 __ xorl($tmp2$$Register,$tmp2$$Register);
8895 8893 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8896 8894 __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8897 8895
8898 8896 __ movl($tmp2$$Register, $dst$$Register); // save
8899 8897 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8900 8898 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8901 8899 __ jccb(Assembler::lessEqual, Lpos); // result is positive
8902 8900
8903 8901 // Negative dividend.
8904 8902 // convert value to positive to use unsigned division
8905 8903 __ lneg($dst$$Register, $tmp2$$Register);
8906 8904 __ divl($tmp$$Register);
8907 8905 __ xchgl($dst$$Register, $tmp2$$Register);
8908 8906 __ divl($tmp$$Register);
8909 8907 // revert result back to negative
8910 8908 __ lneg($tmp2$$Register, $dst$$Register);
8911 8909 __ jmpb(Ldone);
8912 8910
8913 8911 __ bind(Lpos);
8914 8912 __ divl($tmp$$Register); // Use unsigned division
8915 8913 __ xchgl($dst$$Register, $tmp2$$Register);
8916 8914 // Fallthrow for final divide, tmp2 has 32 bit hi result
8917 8915
8918 8916 __ bind(Lfast);
8919 8917 // fast path: src is positive
8920 8918 __ divl($tmp$$Register); // Use unsigned division
8921 8919
8922 8920 __ bind(Ldone);
8923 8921 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8924 8922 if (con < 0) {
8925 8923 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8926 8924 }
8927 8925 %}
8928 8926 ins_pipe( pipe_slow );
8929 8927 %}
8930 8928
8931 8929 // Remainder Register Long (remainder fit into 32 bits)
8932 8930 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8933 8931 match(Set dst (ModL dst imm));
8934 8932 effect( TEMP tmp, TEMP tmp2, KILL cr );
8935 8933 ins_cost(1000);
8936 8934 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8937 8935 "CMP $tmp,EDX\n\t"
8938 8936 "JA,s fast\n\t"
8939 8937 "MOV $tmp2,EAX\n\t"
8940 8938 "MOV EAX,EDX\n\t"
8941 8939 "MOV EDX,0\n\t"
8942 8940 "JLE,s pos\n\t"
8943 8941 "LNEG EAX : $tmp2\n\t"
8944 8942 "DIV $tmp # unsigned division\n\t"
8945 8943 "MOV EAX,$tmp2\n\t"
8946 8944 "DIV $tmp\n\t"
8947 8945 "NEG EDX\n\t"
8948 8946 "JMP,s done\n"
8949 8947 "pos:\n\t"
8950 8948 "DIV $tmp\n\t"
8951 8949 "MOV EAX,$tmp2\n"
8952 8950 "fast:\n\t"
8953 8951 "DIV $tmp\n"
8954 8952 "done:\n\t"
8955 8953 "MOV EAX,EDX\n\t"
8956 8954 "SAR EDX,31\n\t" %}
8957 8955 ins_encode %{
8958 8956 int con = (int)$imm$$constant;
8959 8957 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8960 8958 int pcon = (con > 0) ? con : -con;
8961 8959 Label Lfast, Lpos, Ldone;
8962 8960
8963 8961 __ movl($tmp$$Register, pcon);
8964 8962 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8965 8963 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8966 8964
8967 8965 __ movl($tmp2$$Register, $dst$$Register); // save
8968 8966 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8969 8967 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8970 8968 __ jccb(Assembler::lessEqual, Lpos); // result is positive
8971 8969
8972 8970 // Negative dividend.
8973 8971 // convert value to positive to use unsigned division
8974 8972 __ lneg($dst$$Register, $tmp2$$Register);
8975 8973 __ divl($tmp$$Register);
8976 8974 __ movl($dst$$Register, $tmp2$$Register);
8977 8975 __ divl($tmp$$Register);
8978 8976 // revert remainder back to negative
8979 8977 __ negl(HIGH_FROM_LOW($dst$$Register));
8980 8978 __ jmpb(Ldone);
8981 8979
8982 8980 __ bind(Lpos);
8983 8981 __ divl($tmp$$Register);
8984 8982 __ movl($dst$$Register, $tmp2$$Register);
8985 8983
8986 8984 __ bind(Lfast);
8987 8985 // fast path: src is positive
8988 8986 __ divl($tmp$$Register);
8989 8987
8990 8988 __ bind(Ldone);
8991 8989 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8992 8990 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8993 8991
8994 8992 %}
8995 8993 ins_pipe( pipe_slow );
8996 8994 %}
8997 8995
8998 8996 // Integer Shift Instructions
8999 8997 // Shift Left by one
9000 8998 instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9001 8999 match(Set dst (LShiftI dst shift));
9002 9000 effect(KILL cr);
9003 9001
9004 9002 size(2);
9005 9003 format %{ "SHL $dst,$shift" %}
9006 9004 opcode(0xD1, 0x4); /* D1 /4 */
9007 9005 ins_encode( OpcP, RegOpc( dst ) );
9008 9006 ins_pipe( ialu_reg );
9009 9007 %}
9010 9008
9011 9009 // Shift Left by 8-bit immediate
9012 9010 instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9013 9011 match(Set dst (LShiftI dst shift));
9014 9012 effect(KILL cr);
9015 9013
9016 9014 size(3);
9017 9015 format %{ "SHL $dst,$shift" %}
9018 9016 opcode(0xC1, 0x4); /* C1 /4 ib */
9019 9017 ins_encode( RegOpcImm( dst, shift) );
9020 9018 ins_pipe( ialu_reg );
9021 9019 %}
9022 9020
9023 9021 // Shift Left by variable
9024 9022 instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9025 9023 match(Set dst (LShiftI dst shift));
9026 9024 effect(KILL cr);
9027 9025
9028 9026 size(2);
9029 9027 format %{ "SHL $dst,$shift" %}
9030 9028 opcode(0xD3, 0x4); /* D3 /4 */
9031 9029 ins_encode( OpcP, RegOpc( dst ) );
9032 9030 ins_pipe( ialu_reg_reg );
9033 9031 %}
9034 9032
9035 9033 // Arithmetic shift right by one
9036 9034 instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9037 9035 match(Set dst (RShiftI dst shift));
9038 9036 effect(KILL cr);
9039 9037
9040 9038 size(2);
9041 9039 format %{ "SAR $dst,$shift" %}
9042 9040 opcode(0xD1, 0x7); /* D1 /7 */
9043 9041 ins_encode( OpcP, RegOpc( dst ) );
9044 9042 ins_pipe( ialu_reg );
9045 9043 %}
9046 9044
9047 9045 // Arithmetic shift right by one
9048 9046 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
9049 9047 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9050 9048 effect(KILL cr);
9051 9049 format %{ "SAR $dst,$shift" %}
9052 9050 opcode(0xD1, 0x7); /* D1 /7 */
9053 9051 ins_encode( OpcP, RMopc_Mem(secondary,dst) );
9054 9052 ins_pipe( ialu_mem_imm );
9055 9053 %}
9056 9054
9057 9055 // Arithmetic Shift Right by 8-bit immediate
9058 9056 instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9059 9057 match(Set dst (RShiftI dst shift));
9060 9058 effect(KILL cr);
9061 9059
9062 9060 size(3);
9063 9061 format %{ "SAR $dst,$shift" %}
9064 9062 opcode(0xC1, 0x7); /* C1 /7 ib */
9065 9063 ins_encode( RegOpcImm( dst, shift ) );
9066 9064 ins_pipe( ialu_mem_imm );
9067 9065 %}
9068 9066
9069 9067 // Arithmetic Shift Right by 8-bit immediate
9070 9068 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
9071 9069 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9072 9070 effect(KILL cr);
9073 9071
9074 9072 format %{ "SAR $dst,$shift" %}
9075 9073 opcode(0xC1, 0x7); /* C1 /7 ib */
9076 9074 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
9077 9075 ins_pipe( ialu_mem_imm );
9078 9076 %}
9079 9077
9080 9078 // Arithmetic Shift Right by variable
9081 9079 instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9082 9080 match(Set dst (RShiftI dst shift));
9083 9081 effect(KILL cr);
9084 9082
9085 9083 size(2);
9086 9084 format %{ "SAR $dst,$shift" %}
9087 9085 opcode(0xD3, 0x7); /* D3 /7 */
9088 9086 ins_encode( OpcP, RegOpc( dst ) );
9089 9087 ins_pipe( ialu_reg_reg );
9090 9088 %}
9091 9089
9092 9090 // Logical shift right by one
9093 9091 instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9094 9092 match(Set dst (URShiftI dst shift));
9095 9093 effect(KILL cr);
9096 9094
9097 9095 size(2);
9098 9096 format %{ "SHR $dst,$shift" %}
9099 9097 opcode(0xD1, 0x5); /* D1 /5 */
9100 9098 ins_encode( OpcP, RegOpc( dst ) );
9101 9099 ins_pipe( ialu_reg );
9102 9100 %}
9103 9101
9104 9102 // Logical Shift Right by 8-bit immediate
9105 9103 instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9106 9104 match(Set dst (URShiftI dst shift));
9107 9105 effect(KILL cr);
9108 9106
9109 9107 size(3);
9110 9108 format %{ "SHR $dst,$shift" %}
9111 9109 opcode(0xC1, 0x5); /* C1 /5 ib */
9112 9110 ins_encode( RegOpcImm( dst, shift) );
9113 9111 ins_pipe( ialu_reg );
9114 9112 %}
9115 9113
9116 9114
9117 9115 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9118 9116 // This idiom is used by the compiler for the i2b bytecode.
9119 9117 instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
9120 9118 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9121 9119
9122 9120 size(3);
9123 9121 format %{ "MOVSX $dst,$src :8" %}
9124 9122 ins_encode %{
9125 9123 __ movsbl($dst$$Register, $src$$Register);
9126 9124 %}
9127 9125 ins_pipe(ialu_reg_reg);
9128 9126 %}
9129 9127
9130 9128 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9131 9129 // This idiom is used by the compiler the i2s bytecode.
9132 9130 instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
9133 9131 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9134 9132
9135 9133 size(3);
9136 9134 format %{ "MOVSX $dst,$src :16" %}
9137 9135 ins_encode %{
9138 9136 __ movswl($dst$$Register, $src$$Register);
9139 9137 %}
9140 9138 ins_pipe(ialu_reg_reg);
9141 9139 %}
9142 9140
9143 9141
9144 9142 // Logical Shift Right by variable
9145 9143 instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9146 9144 match(Set dst (URShiftI dst shift));
9147 9145 effect(KILL cr);
9148 9146
9149 9147 size(2);
9150 9148 format %{ "SHR $dst,$shift" %}
9151 9149 opcode(0xD3, 0x5); /* D3 /5 */
9152 9150 ins_encode( OpcP, RegOpc( dst ) );
9153 9151 ins_pipe( ialu_reg_reg );
9154 9152 %}
9155 9153
9156 9154
9157 9155 //----------Logical Instructions-----------------------------------------------
9158 9156 //----------Integer Logical Instructions---------------------------------------
9159 9157 // And Instructions
9160 9158 // And Register with Register
9161 9159 instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9162 9160 match(Set dst (AndI dst src));
9163 9161 effect(KILL cr);
9164 9162
9165 9163 size(2);
9166 9164 format %{ "AND $dst,$src" %}
9167 9165 opcode(0x23);
9168 9166 ins_encode( OpcP, RegReg( dst, src) );
9169 9167 ins_pipe( ialu_reg_reg );
9170 9168 %}
9171 9169
9172 9170 // And Register with Immediate
9173 9171 instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9174 9172 match(Set dst (AndI dst src));
9175 9173 effect(KILL cr);
9176 9174
9177 9175 format %{ "AND $dst,$src" %}
9178 9176 opcode(0x81,0x04); /* Opcode 81 /4 */
9179 9177 // ins_encode( RegImm( dst, src) );
9180 9178 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9181 9179 ins_pipe( ialu_reg );
9182 9180 %}
9183 9181
9184 9182 // And Register with Memory
9185 9183 instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9186 9184 match(Set dst (AndI dst (LoadI src)));
9187 9185 effect(KILL cr);
9188 9186
9189 9187 ins_cost(125);
9190 9188 format %{ "AND $dst,$src" %}
9191 9189 opcode(0x23);
9192 9190 ins_encode( OpcP, RegMem( dst, src) );
9193 9191 ins_pipe( ialu_reg_mem );
9194 9192 %}
9195 9193
9196 9194 // And Memory with Register
9197 9195 instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9198 9196 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9199 9197 effect(KILL cr);
9200 9198
9201 9199 ins_cost(150);
9202 9200 format %{ "AND $dst,$src" %}
9203 9201 opcode(0x21); /* Opcode 21 /r */
9204 9202 ins_encode( OpcP, RegMem( src, dst ) );
9205 9203 ins_pipe( ialu_mem_reg );
9206 9204 %}
9207 9205
9208 9206 // And Memory with Immediate
9209 9207 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9210 9208 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9211 9209 effect(KILL cr);
9212 9210
9213 9211 ins_cost(125);
9214 9212 format %{ "AND $dst,$src" %}
9215 9213 opcode(0x81, 0x4); /* Opcode 81 /4 id */
9216 9214 // ins_encode( MemImm( dst, src) );
9217 9215 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9218 9216 ins_pipe( ialu_mem_imm );
9219 9217 %}
9220 9218
9221 9219 // Or Instructions
9222 9220 // Or Register with Register
9223 9221 instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9224 9222 match(Set dst (OrI dst src));
9225 9223 effect(KILL cr);
9226 9224
9227 9225 size(2);
9228 9226 format %{ "OR $dst,$src" %}
9229 9227 opcode(0x0B);
9230 9228 ins_encode( OpcP, RegReg( dst, src) );
9231 9229 ins_pipe( ialu_reg_reg );
9232 9230 %}
9233 9231
9234 9232 instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
9235 9233 match(Set dst (OrI dst (CastP2X src)));
9236 9234 effect(KILL cr);
9237 9235
9238 9236 size(2);
9239 9237 format %{ "OR $dst,$src" %}
9240 9238 opcode(0x0B);
9241 9239 ins_encode( OpcP, RegReg( dst, src) );
9242 9240 ins_pipe( ialu_reg_reg );
9243 9241 %}
9244 9242
9245 9243
9246 9244 // Or Register with Immediate
9247 9245 instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9248 9246 match(Set dst (OrI dst src));
9249 9247 effect(KILL cr);
9250 9248
9251 9249 format %{ "OR $dst,$src" %}
9252 9250 opcode(0x81,0x01); /* Opcode 81 /1 id */
9253 9251 // ins_encode( RegImm( dst, src) );
9254 9252 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9255 9253 ins_pipe( ialu_reg );
9256 9254 %}
9257 9255
9258 9256 // Or Register with Memory
9259 9257 instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9260 9258 match(Set dst (OrI dst (LoadI src)));
9261 9259 effect(KILL cr);
9262 9260
9263 9261 ins_cost(125);
9264 9262 format %{ "OR $dst,$src" %}
9265 9263 opcode(0x0B);
9266 9264 ins_encode( OpcP, RegMem( dst, src) );
9267 9265 ins_pipe( ialu_reg_mem );
9268 9266 %}
9269 9267
9270 9268 // Or Memory with Register
9271 9269 instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9272 9270 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9273 9271 effect(KILL cr);
9274 9272
9275 9273 ins_cost(150);
9276 9274 format %{ "OR $dst,$src" %}
9277 9275 opcode(0x09); /* Opcode 09 /r */
9278 9276 ins_encode( OpcP, RegMem( src, dst ) );
9279 9277 ins_pipe( ialu_mem_reg );
9280 9278 %}
9281 9279
9282 9280 // Or Memory with Immediate
9283 9281 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9284 9282 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9285 9283 effect(KILL cr);
9286 9284
9287 9285 ins_cost(125);
9288 9286 format %{ "OR $dst,$src" %}
9289 9287 opcode(0x81,0x1); /* Opcode 81 /1 id */
9290 9288 // ins_encode( MemImm( dst, src) );
9291 9289 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9292 9290 ins_pipe( ialu_mem_imm );
9293 9291 %}
9294 9292
9295 9293 // ROL/ROR
9296 9294 // ROL expand
9297 9295 instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9298 9296 effect(USE_DEF dst, USE shift, KILL cr);
9299 9297
9300 9298 format %{ "ROL $dst, $shift" %}
9301 9299 opcode(0xD1, 0x0); /* Opcode D1 /0 */
9302 9300 ins_encode( OpcP, RegOpc( dst ));
9303 9301 ins_pipe( ialu_reg );
9304 9302 %}
9305 9303
9306 9304 instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9307 9305 effect(USE_DEF dst, USE shift, KILL cr);
9308 9306
9309 9307 format %{ "ROL $dst, $shift" %}
9310 9308 opcode(0xC1, 0x0); /*Opcode /C1 /0 */
9311 9309 ins_encode( RegOpcImm(dst, shift) );
9312 9310 ins_pipe(ialu_reg);
9313 9311 %}
9314 9312
9315 9313 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
9316 9314 effect(USE_DEF dst, USE shift, KILL cr);
9317 9315
9318 9316 format %{ "ROL $dst, $shift" %}
9319 9317 opcode(0xD3, 0x0); /* Opcode D3 /0 */
9320 9318 ins_encode(OpcP, RegOpc(dst));
9321 9319 ins_pipe( ialu_reg_reg );
9322 9320 %}
9323 9321 // end of ROL expand
9324 9322
9325 9323 // ROL 32bit by one once
9326 9324 instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
9327 9325 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9328 9326
9329 9327 expand %{
9330 9328 rolI_eReg_imm1(dst, lshift, cr);
9331 9329 %}
9332 9330 %}
9333 9331
9334 9332 // ROL 32bit var by imm8 once
9335 9333 instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
9336 9334 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9337 9335 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9338 9336
9339 9337 expand %{
9340 9338 rolI_eReg_imm8(dst, lshift, cr);
9341 9339 %}
9342 9340 %}
9343 9341
9344 9342 // ROL 32bit var by var once
9345 9343 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9346 9344 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9347 9345
9348 9346 expand %{
9349 9347 rolI_eReg_CL(dst, shift, cr);
9350 9348 %}
9351 9349 %}
9352 9350
9353 9351 // ROL 32bit var by var once
9354 9352 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9355 9353 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9356 9354
9357 9355 expand %{
9358 9356 rolI_eReg_CL(dst, shift, cr);
9359 9357 %}
9360 9358 %}
9361 9359
9362 9360 // ROR expand
9363 9361 instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9364 9362 effect(USE_DEF dst, USE shift, KILL cr);
9365 9363
9366 9364 format %{ "ROR $dst, $shift" %}
9367 9365 opcode(0xD1,0x1); /* Opcode D1 /1 */
9368 9366 ins_encode( OpcP, RegOpc( dst ) );
9369 9367 ins_pipe( ialu_reg );
9370 9368 %}
9371 9369
9372 9370 instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9373 9371 effect (USE_DEF dst, USE shift, KILL cr);
9374 9372
9375 9373 format %{ "ROR $dst, $shift" %}
9376 9374 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
9377 9375 ins_encode( RegOpcImm(dst, shift) );
9378 9376 ins_pipe( ialu_reg );
9379 9377 %}
9380 9378
9381 9379 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
9382 9380 effect(USE_DEF dst, USE shift, KILL cr);
9383 9381
9384 9382 format %{ "ROR $dst, $shift" %}
9385 9383 opcode(0xD3, 0x1); /* Opcode D3 /1 */
9386 9384 ins_encode(OpcP, RegOpc(dst));
9387 9385 ins_pipe( ialu_reg_reg );
9388 9386 %}
9389 9387 // end of ROR expand
9390 9388
9391 9389 // ROR right once
9392 9390 instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
9393 9391 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9394 9392
9395 9393 expand %{
9396 9394 rorI_eReg_imm1(dst, rshift, cr);
9397 9395 %}
9398 9396 %}
9399 9397
9400 9398 // ROR 32bit by immI8 once
9401 9399 instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
9402 9400 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9403 9401 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9404 9402
9405 9403 expand %{
9406 9404 rorI_eReg_imm8(dst, rshift, cr);
9407 9405 %}
9408 9406 %}
9409 9407
9410 9408 // ROR 32bit var by var once
9411 9409 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9412 9410 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9413 9411
9414 9412 expand %{
9415 9413 rorI_eReg_CL(dst, shift, cr);
9416 9414 %}
9417 9415 %}
9418 9416
9419 9417 // ROR 32bit var by var once
9420 9418 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9421 9419 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9422 9420
9423 9421 expand %{
9424 9422 rorI_eReg_CL(dst, shift, cr);
9425 9423 %}
9426 9424 %}
9427 9425
9428 9426 // Xor Instructions
9429 9427 // Xor Register with Register
9430 9428 instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9431 9429 match(Set dst (XorI dst src));
9432 9430 effect(KILL cr);
9433 9431
9434 9432 size(2);
9435 9433 format %{ "XOR $dst,$src" %}
9436 9434 opcode(0x33);
9437 9435 ins_encode( OpcP, RegReg( dst, src) );
9438 9436 ins_pipe( ialu_reg_reg );
9439 9437 %}
9440 9438
9441 9439 // Xor Register with Immediate -1
9442 9440 instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
9443 9441 match(Set dst (XorI dst imm));
9444 9442
9445 9443 size(2);
9446 9444 format %{ "NOT $dst" %}
9447 9445 ins_encode %{
9448 9446 __ notl($dst$$Register);
9449 9447 %}
9450 9448 ins_pipe( ialu_reg );
9451 9449 %}
9452 9450
9453 9451 // Xor Register with Immediate
9454 9452 instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9455 9453 match(Set dst (XorI dst src));
9456 9454 effect(KILL cr);
9457 9455
9458 9456 format %{ "XOR $dst,$src" %}
9459 9457 opcode(0x81,0x06); /* Opcode 81 /6 id */
9460 9458 // ins_encode( RegImm( dst, src) );
9461 9459 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9462 9460 ins_pipe( ialu_reg );
9463 9461 %}
9464 9462
9465 9463 // Xor Register with Memory
9466 9464 instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9467 9465 match(Set dst (XorI dst (LoadI src)));
9468 9466 effect(KILL cr);
9469 9467
9470 9468 ins_cost(125);
9471 9469 format %{ "XOR $dst,$src" %}
9472 9470 opcode(0x33);
9473 9471 ins_encode( OpcP, RegMem(dst, src) );
9474 9472 ins_pipe( ialu_reg_mem );
9475 9473 %}
9476 9474
9477 9475 // Xor Memory with Register
9478 9476 instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9479 9477 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9480 9478 effect(KILL cr);
9481 9479
9482 9480 ins_cost(150);
9483 9481 format %{ "XOR $dst,$src" %}
9484 9482 opcode(0x31); /* Opcode 31 /r */
9485 9483 ins_encode( OpcP, RegMem( src, dst ) );
9486 9484 ins_pipe( ialu_mem_reg );
9487 9485 %}
9488 9486
9489 9487 // Xor Memory with Immediate
9490 9488 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9491 9489 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9492 9490 effect(KILL cr);
9493 9491
9494 9492 ins_cost(125);
9495 9493 format %{ "XOR $dst,$src" %}
9496 9494 opcode(0x81,0x6); /* Opcode 81 /6 id */
9497 9495 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9498 9496 ins_pipe( ialu_mem_imm );
9499 9497 %}
9500 9498
9501 9499 //----------Convert Int to Boolean---------------------------------------------
9502 9500
9503 9501 instruct movI_nocopy(eRegI dst, eRegI src) %{
9504 9502 effect( DEF dst, USE src );
9505 9503 format %{ "MOV $dst,$src" %}
9506 9504 ins_encode( enc_Copy( dst, src) );
9507 9505 ins_pipe( ialu_reg_reg );
9508 9506 %}
9509 9507
9510 9508 instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
9511 9509 effect( USE_DEF dst, USE src, KILL cr );
9512 9510
9513 9511 size(4);
9514 9512 format %{ "NEG $dst\n\t"
9515 9513 "ADC $dst,$src" %}
9516 9514 ins_encode( neg_reg(dst),
9517 9515 OpcRegReg(0x13,dst,src) );
9518 9516 ins_pipe( ialu_reg_reg_long );
9519 9517 %}
9520 9518
9521 9519 instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
9522 9520 match(Set dst (Conv2B src));
9523 9521
9524 9522 expand %{
9525 9523 movI_nocopy(dst,src);
9526 9524 ci2b(dst,src,cr);
9527 9525 %}
9528 9526 %}
9529 9527
9530 9528 instruct movP_nocopy(eRegI dst, eRegP src) %{
9531 9529 effect( DEF dst, USE src );
9532 9530 format %{ "MOV $dst,$src" %}
9533 9531 ins_encode( enc_Copy( dst, src) );
9534 9532 ins_pipe( ialu_reg_reg );
9535 9533 %}
9536 9534
9537 9535 instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
9538 9536 effect( USE_DEF dst, USE src, KILL cr );
9539 9537 format %{ "NEG $dst\n\t"
9540 9538 "ADC $dst,$src" %}
9541 9539 ins_encode( neg_reg(dst),
9542 9540 OpcRegReg(0x13,dst,src) );
9543 9541 ins_pipe( ialu_reg_reg_long );
9544 9542 %}
9545 9543
9546 9544 instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
9547 9545 match(Set dst (Conv2B src));
9548 9546
9549 9547 expand %{
9550 9548 movP_nocopy(dst,src);
9551 9549 cp2b(dst,src,cr);
9552 9550 %}
9553 9551 %}
9554 9552
9555 9553 instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
9556 9554 match(Set dst (CmpLTMask p q));
9557 9555 effect( KILL cr );
9558 9556 ins_cost(400);
9559 9557
9560 9558 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
9561 9559 format %{ "XOR $dst,$dst\n\t"
9562 9560 "CMP $p,$q\n\t"
9563 9561 "SETlt $dst\n\t"
9564 9562 "NEG $dst" %}
9565 9563 ins_encode( OpcRegReg(0x33,dst,dst),
9566 9564 OpcRegReg(0x3B,p,q),
9567 9565 setLT_reg(dst), neg_reg(dst) );
9568 9566 ins_pipe( pipe_slow );
9569 9567 %}
9570 9568
9571 9569 instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9572 9570 match(Set dst (CmpLTMask dst zero));
9573 9571 effect( DEF dst, KILL cr );
9574 9572 ins_cost(100);
9575 9573
9576 9574 format %{ "SAR $dst,31" %}
9577 9575 opcode(0xC1, 0x7); /* C1 /7 ib */
9578 9576 ins_encode( RegOpcImm( dst, 0x1F ) );
9579 9577 ins_pipe( ialu_reg );
9580 9578 %}
9581 9579
9582 9580
9583 9581 instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9584 9582 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9585 9583 effect( KILL tmp, KILL cr );
9586 9584 ins_cost(400);
9587 9585 // annoyingly, $tmp has no edges so you cant ask for it in
9588 9586 // any format or encoding
9589 9587 format %{ "SUB $p,$q\n\t"
9590 9588 "SBB ECX,ECX\n\t"
9591 9589 "AND ECX,$y\n\t"
9592 9590 "ADD $p,ECX" %}
9593 9591 ins_encode( enc_cmpLTP(p,q,y,tmp) );
9594 9592 ins_pipe( pipe_cmplt );
9595 9593 %}
9596 9594
9597 9595 /* If I enable this, I encourage spilling in the inner loop of compress.
9598 9596 instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9599 9597 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9600 9598 effect( USE_KILL tmp, KILL cr );
9601 9599 ins_cost(400);
9602 9600
9603 9601 format %{ "SUB $p,$q\n\t"
9604 9602 "SBB ECX,ECX\n\t"
9605 9603 "AND ECX,$y\n\t"
9606 9604 "ADD $p,ECX" %}
9607 9605 ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9608 9606 %}
9609 9607 */
9610 9608
9611 9609 //----------Long Instructions------------------------------------------------
9612 9610 // Add Long Register with Register
9613 9611 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9614 9612 match(Set dst (AddL dst src));
9615 9613 effect(KILL cr);
9616 9614 ins_cost(200);
9617 9615 format %{ "ADD $dst.lo,$src.lo\n\t"
9618 9616 "ADC $dst.hi,$src.hi" %}
9619 9617 opcode(0x03, 0x13);
9620 9618 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9621 9619 ins_pipe( ialu_reg_reg_long );
9622 9620 %}
9623 9621
9624 9622 // Add Long Register with Immediate
9625 9623 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9626 9624 match(Set dst (AddL dst src));
9627 9625 effect(KILL cr);
9628 9626 format %{ "ADD $dst.lo,$src.lo\n\t"
9629 9627 "ADC $dst.hi,$src.hi" %}
9630 9628 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */
9631 9629 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9632 9630 ins_pipe( ialu_reg_long );
9633 9631 %}
9634 9632
9635 9633 // Add Long Register with Memory
9636 9634 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9637 9635 match(Set dst (AddL dst (LoadL mem)));
9638 9636 effect(KILL cr);
9639 9637 ins_cost(125);
9640 9638 format %{ "ADD $dst.lo,$mem\n\t"
9641 9639 "ADC $dst.hi,$mem+4" %}
9642 9640 opcode(0x03, 0x13);
9643 9641 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9644 9642 ins_pipe( ialu_reg_long_mem );
9645 9643 %}
9646 9644
9647 9645 // Subtract Long Register with Register.
9648 9646 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9649 9647 match(Set dst (SubL dst src));
9650 9648 effect(KILL cr);
9651 9649 ins_cost(200);
9652 9650 format %{ "SUB $dst.lo,$src.lo\n\t"
9653 9651 "SBB $dst.hi,$src.hi" %}
9654 9652 opcode(0x2B, 0x1B);
9655 9653 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9656 9654 ins_pipe( ialu_reg_reg_long );
9657 9655 %}
9658 9656
9659 9657 // Subtract Long Register with Immediate
9660 9658 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9661 9659 match(Set dst (SubL dst src));
9662 9660 effect(KILL cr);
9663 9661 format %{ "SUB $dst.lo,$src.lo\n\t"
9664 9662 "SBB $dst.hi,$src.hi" %}
9665 9663 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */
9666 9664 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9667 9665 ins_pipe( ialu_reg_long );
9668 9666 %}
9669 9667
9670 9668 // Subtract Long Register with Memory
9671 9669 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9672 9670 match(Set dst (SubL dst (LoadL mem)));
9673 9671 effect(KILL cr);
9674 9672 ins_cost(125);
9675 9673 format %{ "SUB $dst.lo,$mem\n\t"
9676 9674 "SBB $dst.hi,$mem+4" %}
9677 9675 opcode(0x2B, 0x1B);
9678 9676 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9679 9677 ins_pipe( ialu_reg_long_mem );
9680 9678 %}
9681 9679
9682 9680 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9683 9681 match(Set dst (SubL zero dst));
9684 9682 effect(KILL cr);
9685 9683 ins_cost(300);
9686 9684 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %}
9687 9685 ins_encode( neg_long(dst) );
9688 9686 ins_pipe( ialu_reg_reg_long );
9689 9687 %}
9690 9688
9691 9689 // And Long Register with Register
9692 9690 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9693 9691 match(Set dst (AndL dst src));
9694 9692 effect(KILL cr);
9695 9693 format %{ "AND $dst.lo,$src.lo\n\t"
9696 9694 "AND $dst.hi,$src.hi" %}
9697 9695 opcode(0x23,0x23);
9698 9696 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9699 9697 ins_pipe( ialu_reg_reg_long );
9700 9698 %}
9701 9699
9702 9700 // And Long Register with Immediate
9703 9701 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9704 9702 match(Set dst (AndL dst src));
9705 9703 effect(KILL cr);
9706 9704 format %{ "AND $dst.lo,$src.lo\n\t"
9707 9705 "AND $dst.hi,$src.hi" %}
9708 9706 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */
9709 9707 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9710 9708 ins_pipe( ialu_reg_long );
9711 9709 %}
9712 9710
9713 9711 // And Long Register with Memory
9714 9712 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9715 9713 match(Set dst (AndL dst (LoadL mem)));
9716 9714 effect(KILL cr);
9717 9715 ins_cost(125);
9718 9716 format %{ "AND $dst.lo,$mem\n\t"
9719 9717 "AND $dst.hi,$mem+4" %}
9720 9718 opcode(0x23, 0x23);
9721 9719 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9722 9720 ins_pipe( ialu_reg_long_mem );
9723 9721 %}
9724 9722
9725 9723 // Or Long Register with Register
9726 9724 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9727 9725 match(Set dst (OrL dst src));
9728 9726 effect(KILL cr);
9729 9727 format %{ "OR $dst.lo,$src.lo\n\t"
9730 9728 "OR $dst.hi,$src.hi" %}
9731 9729 opcode(0x0B,0x0B);
9732 9730 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9733 9731 ins_pipe( ialu_reg_reg_long );
9734 9732 %}
9735 9733
9736 9734 // Or Long Register with Immediate
9737 9735 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9738 9736 match(Set dst (OrL dst src));
9739 9737 effect(KILL cr);
9740 9738 format %{ "OR $dst.lo,$src.lo\n\t"
9741 9739 "OR $dst.hi,$src.hi" %}
9742 9740 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */
9743 9741 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9744 9742 ins_pipe( ialu_reg_long );
9745 9743 %}
9746 9744
9747 9745 // Or Long Register with Memory
9748 9746 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9749 9747 match(Set dst (OrL dst (LoadL mem)));
9750 9748 effect(KILL cr);
9751 9749 ins_cost(125);
9752 9750 format %{ "OR $dst.lo,$mem\n\t"
9753 9751 "OR $dst.hi,$mem+4" %}
9754 9752 opcode(0x0B,0x0B);
9755 9753 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9756 9754 ins_pipe( ialu_reg_long_mem );
9757 9755 %}
9758 9756
9759 9757 // Xor Long Register with Register
9760 9758 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9761 9759 match(Set dst (XorL dst src));
9762 9760 effect(KILL cr);
9763 9761 format %{ "XOR $dst.lo,$src.lo\n\t"
9764 9762 "XOR $dst.hi,$src.hi" %}
9765 9763 opcode(0x33,0x33);
9766 9764 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9767 9765 ins_pipe( ialu_reg_reg_long );
9768 9766 %}
9769 9767
9770 9768 // Xor Long Register with Immediate -1
9771 9769 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9772 9770 match(Set dst (XorL dst imm));
9773 9771 format %{ "NOT $dst.lo\n\t"
9774 9772 "NOT $dst.hi" %}
9775 9773 ins_encode %{
9776 9774 __ notl($dst$$Register);
9777 9775 __ notl(HIGH_FROM_LOW($dst$$Register));
9778 9776 %}
9779 9777 ins_pipe( ialu_reg_long );
9780 9778 %}
9781 9779
9782 9780 // Xor Long Register with Immediate
9783 9781 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9784 9782 match(Set dst (XorL dst src));
9785 9783 effect(KILL cr);
9786 9784 format %{ "XOR $dst.lo,$src.lo\n\t"
9787 9785 "XOR $dst.hi,$src.hi" %}
9788 9786 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */
9789 9787 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9790 9788 ins_pipe( ialu_reg_long );
9791 9789 %}
9792 9790
9793 9791 // Xor Long Register with Memory
9794 9792 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9795 9793 match(Set dst (XorL dst (LoadL mem)));
9796 9794 effect(KILL cr);
9797 9795 ins_cost(125);
9798 9796 format %{ "XOR $dst.lo,$mem\n\t"
9799 9797 "XOR $dst.hi,$mem+4" %}
9800 9798 opcode(0x33,0x33);
9801 9799 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9802 9800 ins_pipe( ialu_reg_long_mem );
9803 9801 %}
9804 9802
9805 9803 // Shift Left Long by 1
9806 9804 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9807 9805 predicate(UseNewLongLShift);
9808 9806 match(Set dst (LShiftL dst cnt));
9809 9807 effect(KILL cr);
9810 9808 ins_cost(100);
9811 9809 format %{ "ADD $dst.lo,$dst.lo\n\t"
9812 9810 "ADC $dst.hi,$dst.hi" %}
9813 9811 ins_encode %{
9814 9812 __ addl($dst$$Register,$dst$$Register);
9815 9813 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9816 9814 %}
9817 9815 ins_pipe( ialu_reg_long );
9818 9816 %}
9819 9817
9820 9818 // Shift Left Long by 2
9821 9819 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9822 9820 predicate(UseNewLongLShift);
9823 9821 match(Set dst (LShiftL dst cnt));
9824 9822 effect(KILL cr);
9825 9823 ins_cost(100);
9826 9824 format %{ "ADD $dst.lo,$dst.lo\n\t"
9827 9825 "ADC $dst.hi,$dst.hi\n\t"
9828 9826 "ADD $dst.lo,$dst.lo\n\t"
9829 9827 "ADC $dst.hi,$dst.hi" %}
9830 9828 ins_encode %{
9831 9829 __ addl($dst$$Register,$dst$$Register);
9832 9830 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9833 9831 __ addl($dst$$Register,$dst$$Register);
9834 9832 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9835 9833 %}
9836 9834 ins_pipe( ialu_reg_long );
9837 9835 %}
9838 9836
9839 9837 // Shift Left Long by 3
9840 9838 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9841 9839 predicate(UseNewLongLShift);
9842 9840 match(Set dst (LShiftL dst cnt));
9843 9841 effect(KILL cr);
9844 9842 ins_cost(100);
9845 9843 format %{ "ADD $dst.lo,$dst.lo\n\t"
9846 9844 "ADC $dst.hi,$dst.hi\n\t"
9847 9845 "ADD $dst.lo,$dst.lo\n\t"
9848 9846 "ADC $dst.hi,$dst.hi\n\t"
9849 9847 "ADD $dst.lo,$dst.lo\n\t"
9850 9848 "ADC $dst.hi,$dst.hi" %}
9851 9849 ins_encode %{
9852 9850 __ addl($dst$$Register,$dst$$Register);
9853 9851 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9854 9852 __ addl($dst$$Register,$dst$$Register);
9855 9853 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9856 9854 __ addl($dst$$Register,$dst$$Register);
9857 9855 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9858 9856 %}
9859 9857 ins_pipe( ialu_reg_long );
9860 9858 %}
9861 9859
9862 9860 // Shift Left Long by 1-31
9863 9861 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9864 9862 match(Set dst (LShiftL dst cnt));
9865 9863 effect(KILL cr);
9866 9864 ins_cost(200);
9867 9865 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t"
9868 9866 "SHL $dst.lo,$cnt" %}
9869 9867 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */
9870 9868 ins_encode( move_long_small_shift(dst,cnt) );
9871 9869 ins_pipe( ialu_reg_long );
9872 9870 %}
9873 9871
9874 9872 // Shift Left Long by 32-63
9875 9873 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9876 9874 match(Set dst (LShiftL dst cnt));
9877 9875 effect(KILL cr);
9878 9876 ins_cost(300);
9879 9877 format %{ "MOV $dst.hi,$dst.lo\n"
9880 9878 "\tSHL $dst.hi,$cnt-32\n"
9881 9879 "\tXOR $dst.lo,$dst.lo" %}
9882 9880 opcode(0xC1, 0x4); /* C1 /4 ib */
9883 9881 ins_encode( move_long_big_shift_clr(dst,cnt) );
9884 9882 ins_pipe( ialu_reg_long );
9885 9883 %}
9886 9884
9887 9885 // Shift Left Long by variable
9888 9886 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9889 9887 match(Set dst (LShiftL dst shift));
9890 9888 effect(KILL cr);
9891 9889 ins_cost(500+200);
9892 9890 size(17);
9893 9891 format %{ "TEST $shift,32\n\t"
9894 9892 "JEQ,s small\n\t"
9895 9893 "MOV $dst.hi,$dst.lo\n\t"
9896 9894 "XOR $dst.lo,$dst.lo\n"
9897 9895 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t"
9898 9896 "SHL $dst.lo,$shift" %}
9899 9897 ins_encode( shift_left_long( dst, shift ) );
9900 9898 ins_pipe( pipe_slow );
9901 9899 %}
9902 9900
9903 9901 // Shift Right Long by 1-31
9904 9902 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9905 9903 match(Set dst (URShiftL dst cnt));
9906 9904 effect(KILL cr);
9907 9905 ins_cost(200);
9908 9906 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9909 9907 "SHR $dst.hi,$cnt" %}
9910 9908 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */
9911 9909 ins_encode( move_long_small_shift(dst,cnt) );
9912 9910 ins_pipe( ialu_reg_long );
9913 9911 %}
9914 9912
9915 9913 // Shift Right Long by 32-63
9916 9914 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9917 9915 match(Set dst (URShiftL dst cnt));
9918 9916 effect(KILL cr);
9919 9917 ins_cost(300);
9920 9918 format %{ "MOV $dst.lo,$dst.hi\n"
9921 9919 "\tSHR $dst.lo,$cnt-32\n"
9922 9920 "\tXOR $dst.hi,$dst.hi" %}
9923 9921 opcode(0xC1, 0x5); /* C1 /5 ib */
9924 9922 ins_encode( move_long_big_shift_clr(dst,cnt) );
9925 9923 ins_pipe( ialu_reg_long );
9926 9924 %}
9927 9925
9928 9926 // Shift Right Long by variable
9929 9927 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9930 9928 match(Set dst (URShiftL dst shift));
9931 9929 effect(KILL cr);
9932 9930 ins_cost(600);
9933 9931 size(17);
9934 9932 format %{ "TEST $shift,32\n\t"
9935 9933 "JEQ,s small\n\t"
9936 9934 "MOV $dst.lo,$dst.hi\n\t"
9937 9935 "XOR $dst.hi,$dst.hi\n"
9938 9936 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9939 9937 "SHR $dst.hi,$shift" %}
9940 9938 ins_encode( shift_right_long( dst, shift ) );
9941 9939 ins_pipe( pipe_slow );
9942 9940 %}
9943 9941
9944 9942 // Shift Right Long by 1-31
9945 9943 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9946 9944 match(Set dst (RShiftL dst cnt));
9947 9945 effect(KILL cr);
9948 9946 ins_cost(200);
9949 9947 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9950 9948 "SAR $dst.hi,$cnt" %}
9951 9949 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */
9952 9950 ins_encode( move_long_small_shift(dst,cnt) );
9953 9951 ins_pipe( ialu_reg_long );
9954 9952 %}
9955 9953
9956 9954 // Shift Right Long by 32-63
9957 9955 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9958 9956 match(Set dst (RShiftL dst cnt));
9959 9957 effect(KILL cr);
9960 9958 ins_cost(300);
9961 9959 format %{ "MOV $dst.lo,$dst.hi\n"
9962 9960 "\tSAR $dst.lo,$cnt-32\n"
9963 9961 "\tSAR $dst.hi,31" %}
9964 9962 opcode(0xC1, 0x7); /* C1 /7 ib */
9965 9963 ins_encode( move_long_big_shift_sign(dst,cnt) );
9966 9964 ins_pipe( ialu_reg_long );
9967 9965 %}
9968 9966
9969 9967 // Shift Right arithmetic Long by variable
9970 9968 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9971 9969 match(Set dst (RShiftL dst shift));
9972 9970 effect(KILL cr);
9973 9971 ins_cost(600);
9974 9972 size(18);
9975 9973 format %{ "TEST $shift,32\n\t"
9976 9974 "JEQ,s small\n\t"
9977 9975 "MOV $dst.lo,$dst.hi\n\t"
9978 9976 "SAR $dst.hi,31\n"
9979 9977 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9980 9978 "SAR $dst.hi,$shift" %}
9981 9979 ins_encode( shift_right_arith_long( dst, shift ) );
9982 9980 ins_pipe( pipe_slow );
9983 9981 %}
9984 9982
9985 9983
9986 9984 //----------Double Instructions------------------------------------------------
9987 9985 // Double Math
9988 9986
9989 9987 // Compare & branch
9990 9988
9991 9989 // P6 version of float compare, sets condition codes in EFLAGS
9992 9990 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9993 9991 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9994 9992 match(Set cr (CmpD src1 src2));
9995 9993 effect(KILL rax);
9996 9994 ins_cost(150);
9997 9995 format %{ "FLD $src1\n\t"
9998 9996 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9999 9997 "JNP exit\n\t"
10000 9998 "MOV ah,1 // saw a NaN, set CF\n\t"
10001 9999 "SAHF\n"
10002 10000 "exit:\tNOP // avoid branch to branch" %}
10003 10001 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10004 10002 ins_encode( Push_Reg_D(src1),
10005 10003 OpcP, RegOpc(src2),
10006 10004 cmpF_P6_fixup );
10007 10005 ins_pipe( pipe_slow );
10008 10006 %}
10009 10007
10010 10008 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
10011 10009 predicate(VM_Version::supports_cmov() && UseSSE <=1);
10012 10010 match(Set cr (CmpD src1 src2));
10013 10011 ins_cost(150);
10014 10012 format %{ "FLD $src1\n\t"
10015 10013 "FUCOMIP ST,$src2 // P6 instruction" %}
10016 10014 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10017 10015 ins_encode( Push_Reg_D(src1),
10018 10016 OpcP, RegOpc(src2));
10019 10017 ins_pipe( pipe_slow );
10020 10018 %}
10021 10019
10022 10020 // Compare & branch
10023 10021 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
10024 10022 predicate(UseSSE<=1);
10025 10023 match(Set cr (CmpD src1 src2));
10026 10024 effect(KILL rax);
10027 10025 ins_cost(200);
10028 10026 format %{ "FLD $src1\n\t"
10029 10027 "FCOMp $src2\n\t"
10030 10028 "FNSTSW AX\n\t"
10031 10029 "TEST AX,0x400\n\t"
10032 10030 "JZ,s flags\n\t"
10033 10031 "MOV AH,1\t# unordered treat as LT\n"
10034 10032 "flags:\tSAHF" %}
10035 10033 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10036 10034 ins_encode( Push_Reg_D(src1),
10037 10035 OpcP, RegOpc(src2),
10038 10036 fpu_flags);
10039 10037 ins_pipe( pipe_slow );
10040 10038 %}
10041 10039
10042 10040 // Compare vs zero into -1,0,1
10043 10041 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
10044 10042 predicate(UseSSE<=1);
10045 10043 match(Set dst (CmpD3 src1 zero));
10046 10044 effect(KILL cr, KILL rax);
10047 10045 ins_cost(280);
10048 10046 format %{ "FTSTD $dst,$src1" %}
10049 10047 opcode(0xE4, 0xD9);
10050 10048 ins_encode( Push_Reg_D(src1),
10051 10049 OpcS, OpcP, PopFPU,
10052 10050 CmpF_Result(dst));
10053 10051 ins_pipe( pipe_slow );
10054 10052 %}
10055 10053
10056 10054 // Compare into -1,0,1
10057 10055 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
10058 10056 predicate(UseSSE<=1);
10059 10057 match(Set dst (CmpD3 src1 src2));
10060 10058 effect(KILL cr, KILL rax);
10061 10059 ins_cost(300);
10062 10060 format %{ "FCMPD $dst,$src1,$src2" %}
10063 10061 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10064 10062 ins_encode( Push_Reg_D(src1),
10065 10063 OpcP, RegOpc(src2),
10066 10064 CmpF_Result(dst));
10067 10065 ins_pipe( pipe_slow );
10068 10066 %}
10069 10067
10070 10068 // float compare and set condition codes in EFLAGS by XMM regs
10071 10069 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
10072 10070 predicate(UseSSE>=2);
10073 10071 match(Set cr (CmpD dst src));
10074 10072 effect(KILL rax);
10075 10073 ins_cost(125);
10076 10074 format %{ "COMISD $dst,$src\n"
10077 10075 "\tJNP exit\n"
10078 10076 "\tMOV ah,1 // saw a NaN, set CF\n"
10079 10077 "\tSAHF\n"
10080 10078 "exit:\tNOP // avoid branch to branch" %}
10081 10079 opcode(0x66, 0x0F, 0x2F);
10082 10080 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
10083 10081 ins_pipe( pipe_slow );
10084 10082 %}
10085 10083
10086 10084 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
10087 10085 predicate(UseSSE>=2);
10088 10086 match(Set cr (CmpD dst src));
10089 10087 ins_cost(100);
10090 10088 format %{ "COMISD $dst,$src" %}
10091 10089 opcode(0x66, 0x0F, 0x2F);
10092 10090 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
10093 10091 ins_pipe( pipe_slow );
10094 10092 %}
10095 10093
10096 10094 // float compare and set condition codes in EFLAGS by XMM regs
10097 10095 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
10098 10096 predicate(UseSSE>=2);
10099 10097 match(Set cr (CmpD dst (LoadD src)));
10100 10098 effect(KILL rax);
10101 10099 ins_cost(145);
10102 10100 format %{ "COMISD $dst,$src\n"
10103 10101 "\tJNP exit\n"
10104 10102 "\tMOV ah,1 // saw a NaN, set CF\n"
10105 10103 "\tSAHF\n"
10106 10104 "exit:\tNOP // avoid branch to branch" %}
10107 10105 opcode(0x66, 0x0F, 0x2F);
10108 10106 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
10109 10107 ins_pipe( pipe_slow );
10110 10108 %}
10111 10109
10112 10110 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
10113 10111 predicate(UseSSE>=2);
10114 10112 match(Set cr (CmpD dst (LoadD src)));
10115 10113 ins_cost(100);
10116 10114 format %{ "COMISD $dst,$src" %}
10117 10115 opcode(0x66, 0x0F, 0x2F);
10118 10116 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
10119 10117 ins_pipe( pipe_slow );
10120 10118 %}
10121 10119
10122 10120 // Compare into -1,0,1 in XMM
10123 10121 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
10124 10122 predicate(UseSSE>=2);
10125 10123 match(Set dst (CmpD3 src1 src2));
10126 10124 effect(KILL cr);
10127 10125 ins_cost(255);
10128 10126 format %{ "XOR $dst,$dst\n"
10129 10127 "\tCOMISD $src1,$src2\n"
10130 10128 "\tJP,s nan\n"
10131 10129 "\tJEQ,s exit\n"
10132 10130 "\tJA,s inc\n"
10133 10131 "nan:\tDEC $dst\n"
10134 10132 "\tJMP,s exit\n"
10135 10133 "inc:\tINC $dst\n"
10136 10134 "exit:"
10137 10135 %}
10138 10136 opcode(0x66, 0x0F, 0x2F);
10139 10137 ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
10140 10138 CmpX_Result(dst));
10141 10139 ins_pipe( pipe_slow );
10142 10140 %}
10143 10141
10144 10142 // Compare into -1,0,1 in XMM and memory
10145 10143 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
10146 10144 predicate(UseSSE>=2);
10147 10145 match(Set dst (CmpD3 src1 (LoadD mem)));
10148 10146 effect(KILL cr);
10149 10147 ins_cost(275);
10150 10148 format %{ "COMISD $src1,$mem\n"
10151 10149 "\tMOV $dst,0\t\t# do not blow flags\n"
10152 10150 "\tJP,s nan\n"
10153 10151 "\tJEQ,s exit\n"
10154 10152 "\tJA,s inc\n"
10155 10153 "nan:\tDEC $dst\n"
10156 10154 "\tJMP,s exit\n"
10157 10155 "inc:\tINC $dst\n"
10158 10156 "exit:"
10159 10157 %}
10160 10158 opcode(0x66, 0x0F, 0x2F);
10161 10159 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
10162 10160 LdImmI(dst,0x0), CmpX_Result(dst));
10163 10161 ins_pipe( pipe_slow );
10164 10162 %}
10165 10163
10166 10164
10167 10165 instruct subD_reg(regD dst, regD src) %{
10168 10166 predicate (UseSSE <=1);
10169 10167 match(Set dst (SubD dst src));
10170 10168
10171 10169 format %{ "FLD $src\n\t"
10172 10170 "DSUBp $dst,ST" %}
10173 10171 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10174 10172 ins_cost(150);
10175 10173 ins_encode( Push_Reg_D(src),
10176 10174 OpcP, RegOpc(dst) );
10177 10175 ins_pipe( fpu_reg_reg );
10178 10176 %}
10179 10177
10180 10178 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10181 10179 predicate (UseSSE <=1);
10182 10180 match(Set dst (RoundDouble (SubD src1 src2)));
10183 10181 ins_cost(250);
10184 10182
10185 10183 format %{ "FLD $src2\n\t"
10186 10184 "DSUB ST,$src1\n\t"
10187 10185 "FSTP_D $dst\t# D-round" %}
10188 10186 opcode(0xD8, 0x5);
10189 10187 ins_encode( Push_Reg_D(src2),
10190 10188 OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10191 10189 ins_pipe( fpu_mem_reg_reg );
10192 10190 %}
10193 10191
10194 10192
10195 10193 instruct subD_reg_mem(regD dst, memory src) %{
10196 10194 predicate (UseSSE <=1);
10197 10195 match(Set dst (SubD dst (LoadD src)));
10198 10196 ins_cost(150);
10199 10197
10200 10198 format %{ "FLD $src\n\t"
10201 10199 "DSUBp $dst,ST" %}
10202 10200 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
10203 10201 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10204 10202 OpcP, RegOpc(dst) );
10205 10203 ins_pipe( fpu_reg_mem );
10206 10204 %}
10207 10205
10208 10206 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
10209 10207 predicate (UseSSE<=1);
10210 10208 match(Set dst (AbsD src));
10211 10209 ins_cost(100);
10212 10210 format %{ "FABS" %}
10213 10211 opcode(0xE1, 0xD9);
10214 10212 ins_encode( OpcS, OpcP );
10215 10213 ins_pipe( fpu_reg_reg );
10216 10214 %}
10217 10215
10218 10216 instruct absXD_reg( regXD dst ) %{
10219 10217 predicate(UseSSE>=2);
10220 10218 match(Set dst (AbsD dst));
10221 10219 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
10222 10220 ins_encode( AbsXD_encoding(dst));
10223 10221 ins_pipe( pipe_slow );
10224 10222 %}
10225 10223
10226 10224 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
10227 10225 predicate(UseSSE<=1);
10228 10226 match(Set dst (NegD src));
10229 10227 ins_cost(100);
10230 10228 format %{ "FCHS" %}
10231 10229 opcode(0xE0, 0xD9);
10232 10230 ins_encode( OpcS, OpcP );
10233 10231 ins_pipe( fpu_reg_reg );
10234 10232 %}
10235 10233
10236 10234 instruct negXD_reg( regXD dst ) %{
10237 10235 predicate(UseSSE>=2);
10238 10236 match(Set dst (NegD dst));
10239 10237 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
10240 10238 ins_encode %{
10241 10239 __ xorpd($dst$$XMMRegister,
10242 10240 ExternalAddress((address)double_signflip_pool));
10243 10241 %}
10244 10242 ins_pipe( pipe_slow );
10245 10243 %}
10246 10244
10247 10245 instruct addD_reg(regD dst, regD src) %{
10248 10246 predicate(UseSSE<=1);
10249 10247 match(Set dst (AddD dst src));
10250 10248 format %{ "FLD $src\n\t"
10251 10249 "DADD $dst,ST" %}
10252 10250 size(4);
10253 10251 ins_cost(150);
10254 10252 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10255 10253 ins_encode( Push_Reg_D(src),
10256 10254 OpcP, RegOpc(dst) );
10257 10255 ins_pipe( fpu_reg_reg );
10258 10256 %}
10259 10257
10260 10258
10261 10259 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10262 10260 predicate(UseSSE<=1);
10263 10261 match(Set dst (RoundDouble (AddD src1 src2)));
10264 10262 ins_cost(250);
10265 10263
10266 10264 format %{ "FLD $src2\n\t"
10267 10265 "DADD ST,$src1\n\t"
10268 10266 "FSTP_D $dst\t# D-round" %}
10269 10267 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
10270 10268 ins_encode( Push_Reg_D(src2),
10271 10269 OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10272 10270 ins_pipe( fpu_mem_reg_reg );
10273 10271 %}
10274 10272
10275 10273
10276 10274 instruct addD_reg_mem(regD dst, memory src) %{
10277 10275 predicate(UseSSE<=1);
10278 10276 match(Set dst (AddD dst (LoadD src)));
10279 10277 ins_cost(150);
10280 10278
10281 10279 format %{ "FLD $src\n\t"
10282 10280 "DADDp $dst,ST" %}
10283 10281 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
10284 10282 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10285 10283 OpcP, RegOpc(dst) );
10286 10284 ins_pipe( fpu_reg_mem );
10287 10285 %}
10288 10286
10289 10287 // add-to-memory
10290 10288 instruct addD_mem_reg(memory dst, regD src) %{
10291 10289 predicate(UseSSE<=1);
10292 10290 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
10293 10291 ins_cost(150);
10294 10292
10295 10293 format %{ "FLD_D $dst\n\t"
↓ open down ↓ |
3021 lines elided |
↑ open up ↑ |
10296 10294 "DADD ST,$src\n\t"
10297 10295 "FST_D $dst" %}
10298 10296 opcode(0xDD, 0x0);
10299 10297 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
10300 10298 Opcode(0xD8), RegOpc(src),
10301 10299 set_instruction_start,
10302 10300 Opcode(0xDD), RMopc_Mem(0x03,dst) );
10303 10301 ins_pipe( fpu_reg_mem );
10304 10302 %}
10305 10303
10306 -instruct addD_reg_imm1(regD dst, immD1 src) %{
10304 +instruct addD_reg_imm1(regD dst, immD1 con) %{
10307 10305 predicate(UseSSE<=1);
10308 - match(Set dst (AddD dst src));
10306 + match(Set dst (AddD dst con));
10309 10307 ins_cost(125);
10310 10308 format %{ "FLD1\n\t"
10311 10309 "DADDp $dst,ST" %}
10312 - opcode(0xDE, 0x00);
10313 - ins_encode( LdImmD(src),
10314 - OpcP, RegOpc(dst) );
10315 - ins_pipe( fpu_reg );
10310 + ins_encode %{
10311 + __ fld1();
10312 + __ faddp($dst$$reg);
10313 + %}
10314 + ins_pipe(fpu_reg);
10316 10315 %}
10317 10316
10318 -instruct addD_reg_imm(regD dst, immD src) %{
10317 +instruct addD_reg_imm(regD dst, immD con) %{
10319 10318 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10320 - match(Set dst (AddD dst src));
10319 + match(Set dst (AddD dst con));
10321 10320 ins_cost(200);
10322 - format %{ "FLD_D [$src]\n\t"
10321 + format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10323 10322 "DADDp $dst,ST" %}
10324 - opcode(0xDE, 0x00); /* DE /0 */
10325 - ins_encode( LdImmD(src),
10326 - OpcP, RegOpc(dst));
10327 - ins_pipe( fpu_reg_mem );
10323 + ins_encode %{
10324 + __ fld_d($constantaddress($con));
10325 + __ faddp($dst$$reg);
10326 + %}
10327 + ins_pipe(fpu_reg_mem);
10328 10328 %}
10329 10329
10330 10330 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
10331 10331 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
10332 10332 match(Set dst (RoundDouble (AddD src con)));
10333 10333 ins_cost(200);
10334 - format %{ "FLD_D [$con]\n\t"
10334 + format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10335 10335 "DADD ST,$src\n\t"
10336 10336 "FSTP_D $dst\t# D-round" %}
10337 - opcode(0xD8, 0x00); /* D8 /0 */
10338 - ins_encode( LdImmD(con),
10339 - OpcP, RegOpc(src), Pop_Mem_D(dst));
10340 - ins_pipe( fpu_mem_reg_con );
10337 + ins_encode %{
10338 + __ fld_d($constantaddress($con));
10339 + __ fadd($src$$reg);
10340 + __ fstp_d(Address(rsp, $dst$$disp));
10341 + %}
10342 + ins_pipe(fpu_mem_reg_con);
10341 10343 %}
10342 10344
10343 10345 // Add two double precision floating point values in xmm
10344 10346 instruct addXD_reg(regXD dst, regXD src) %{
10345 10347 predicate(UseSSE>=2);
10346 10348 match(Set dst (AddD dst src));
10347 10349 format %{ "ADDSD $dst,$src" %}
10348 10350 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10349 10351 ins_pipe( pipe_slow );
10350 10352 %}
10351 10353
10352 10354 instruct addXD_imm(regXD dst, immXD con) %{
10353 10355 predicate(UseSSE>=2);
10354 10356 match(Set dst (AddD dst con));
10355 - format %{ "ADDSD $dst,[$con]" %}
10356 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) );
10357 - ins_pipe( pipe_slow );
10357 + format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10358 + ins_encode %{
10359 + __ addsd($dst$$XMMRegister, $constantaddress($con));
10360 + %}
10361 + ins_pipe(pipe_slow);
10358 10362 %}
10359 10363
10360 10364 instruct addXD_mem(regXD dst, memory mem) %{
10361 10365 predicate(UseSSE>=2);
10362 10366 match(Set dst (AddD dst (LoadD mem)));
10363 10367 format %{ "ADDSD $dst,$mem" %}
10364 10368 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10365 10369 ins_pipe( pipe_slow );
10366 10370 %}
10367 10371
10368 10372 // Sub two double precision floating point values in xmm
10369 10373 instruct subXD_reg(regXD dst, regXD src) %{
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
10370 10374 predicate(UseSSE>=2);
10371 10375 match(Set dst (SubD dst src));
10372 10376 format %{ "SUBSD $dst,$src" %}
10373 10377 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10374 10378 ins_pipe( pipe_slow );
10375 10379 %}
10376 10380
10377 10381 instruct subXD_imm(regXD dst, immXD con) %{
10378 10382 predicate(UseSSE>=2);
10379 10383 match(Set dst (SubD dst con));
10380 - format %{ "SUBSD $dst,[$con]" %}
10381 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) );
10382 - ins_pipe( pipe_slow );
10384 + format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10385 + ins_encode %{
10386 + __ subsd($dst$$XMMRegister, $constantaddress($con));
10387 + %}
10388 + ins_pipe(pipe_slow);
10383 10389 %}
10384 10390
10385 10391 instruct subXD_mem(regXD dst, memory mem) %{
10386 10392 predicate(UseSSE>=2);
10387 10393 match(Set dst (SubD dst (LoadD mem)));
10388 10394 format %{ "SUBSD $dst,$mem" %}
10389 10395 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10390 10396 ins_pipe( pipe_slow );
10391 10397 %}
10392 10398
10393 10399 // Mul two double precision floating point values in xmm
10394 10400 instruct mulXD_reg(regXD dst, regXD src) %{
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
10395 10401 predicate(UseSSE>=2);
10396 10402 match(Set dst (MulD dst src));
10397 10403 format %{ "MULSD $dst,$src" %}
10398 10404 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10399 10405 ins_pipe( pipe_slow );
10400 10406 %}
10401 10407
10402 10408 instruct mulXD_imm(regXD dst, immXD con) %{
10403 10409 predicate(UseSSE>=2);
10404 10410 match(Set dst (MulD dst con));
10405 - format %{ "MULSD $dst,[$con]" %}
10406 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) );
10407 - ins_pipe( pipe_slow );
10411 + format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10412 + ins_encode %{
10413 + __ mulsd($dst$$XMMRegister, $constantaddress($con));
10414 + %}
10415 + ins_pipe(pipe_slow);
10408 10416 %}
10409 10417
10410 10418 instruct mulXD_mem(regXD dst, memory mem) %{
10411 10419 predicate(UseSSE>=2);
10412 10420 match(Set dst (MulD dst (LoadD mem)));
10413 10421 format %{ "MULSD $dst,$mem" %}
10414 10422 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10415 10423 ins_pipe( pipe_slow );
10416 10424 %}
10417 10425
10418 10426 // Div two double precision floating point values in xmm
10419 10427 instruct divXD_reg(regXD dst, regXD src) %{
10420 10428 predicate(UseSSE>=2);
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
10421 10429 match(Set dst (DivD dst src));
10422 10430 format %{ "DIVSD $dst,$src" %}
10423 10431 opcode(0xF2, 0x0F, 0x5E);
10424 10432 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10425 10433 ins_pipe( pipe_slow );
10426 10434 %}
10427 10435
10428 10436 instruct divXD_imm(regXD dst, immXD con) %{
10429 10437 predicate(UseSSE>=2);
10430 10438 match(Set dst (DivD dst con));
10431 - format %{ "DIVSD $dst,[$con]" %}
10432 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con));
10433 - ins_pipe( pipe_slow );
10439 + format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10440 + ins_encode %{
10441 + __ divsd($dst$$XMMRegister, $constantaddress($con));
10442 + %}
10443 + ins_pipe(pipe_slow);
10434 10444 %}
10435 10445
10436 10446 instruct divXD_mem(regXD dst, memory mem) %{
10437 10447 predicate(UseSSE>=2);
10438 10448 match(Set dst (DivD dst (LoadD mem)));
10439 10449 format %{ "DIVSD $dst,$mem" %}
10440 10450 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10441 10451 ins_pipe( pipe_slow );
10442 10452 %}
10443 10453
10444 10454
10445 10455 instruct mulD_reg(regD dst, regD src) %{
10446 10456 predicate(UseSSE<=1);
10447 10457 match(Set dst (MulD dst src));
10448 10458 format %{ "FLD $src\n\t"
10449 10459 "DMULp $dst,ST" %}
10450 10460 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10451 10461 ins_cost(150);
10452 10462 ins_encode( Push_Reg_D(src),
10453 10463 OpcP, RegOpc(dst) );
10454 10464 ins_pipe( fpu_reg_reg );
10455 10465 %}
10456 10466
10457 10467 // Strict FP instruction biases argument before multiply then
10458 10468 // biases result to avoid double rounding of subnormals.
10459 10469 //
10460 10470 // scale arg1 by multiplying arg1 by 2^(-15360)
10461 10471 // load arg2
10462 10472 // multiply scaled arg1 by arg2
10463 10473 // rescale product by 2^(15360)
10464 10474 //
10465 10475 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
10466 10476 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10467 10477 match(Set dst (MulD dst src));
10468 10478 ins_cost(1); // Select this instruction for all strict FP double multiplies
10469 10479
10470 10480 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10471 10481 "DMULp $dst,ST\n\t"
10472 10482 "FLD $src\n\t"
10473 10483 "DMULp $dst,ST\n\t"
↓ open down ↓ |
30 lines elided |
↑ open up ↑ |
10474 10484 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10475 10485 "DMULp $dst,ST\n\t" %}
10476 10486 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10477 10487 ins_encode( strictfp_bias1(dst),
10478 10488 Push_Reg_D(src),
10479 10489 OpcP, RegOpc(dst),
10480 10490 strictfp_bias2(dst) );
10481 10491 ins_pipe( fpu_reg_reg );
10482 10492 %}
10483 10493
10484 -instruct mulD_reg_imm(regD dst, immD src) %{
10494 +instruct mulD_reg_imm(regD dst, immD con) %{
10485 10495 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10486 - match(Set dst (MulD dst src));
10496 + match(Set dst (MulD dst con));
10487 10497 ins_cost(200);
10488 - format %{ "FLD_D [$src]\n\t"
10498 + format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10489 10499 "DMULp $dst,ST" %}
10490 - opcode(0xDE, 0x1); /* DE /1 */
10491 - ins_encode( LdImmD(src),
10492 - OpcP, RegOpc(dst) );
10493 - ins_pipe( fpu_reg_mem );
10500 + ins_encode %{
10501 + __ fld_d($constantaddress($con));
10502 + __ fmulp($dst$$reg);
10503 + %}
10504 + ins_pipe(fpu_reg_mem);
10494 10505 %}
10495 10506
10496 10507
10497 10508 instruct mulD_reg_mem(regD dst, memory src) %{
10498 10509 predicate( UseSSE<=1 );
10499 10510 match(Set dst (MulD dst (LoadD src)));
10500 10511 ins_cost(200);
10501 10512 format %{ "FLD_D $src\n\t"
10502 10513 "DMULp $dst,ST" %}
10503 10514 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
10504 10515 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10505 10516 OpcP, RegOpc(dst) );
10506 10517 ins_pipe( fpu_reg_mem );
10507 10518 %}
10508 10519
10509 10520 //
10510 10521 // Cisc-alternate to reg-reg multiply
10511 10522 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
10512 10523 predicate( UseSSE<=1 );
10513 10524 match(Set dst (MulD src (LoadD mem)));
10514 10525 ins_cost(250);
10515 10526 format %{ "FLD_D $mem\n\t"
10516 10527 "DMUL ST,$src\n\t"
10517 10528 "FSTP_D $dst" %}
10518 10529 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
10519 10530 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10520 10531 OpcReg_F(src),
10521 10532 Pop_Reg_D(dst) );
10522 10533 ins_pipe( fpu_reg_reg_mem );
10523 10534 %}
10524 10535
10525 10536
10526 10537 // MACRO3 -- addD a mulD
10527 10538 // This instruction is a '2-address' instruction in that the result goes
10528 10539 // back to src2. This eliminates a move from the macro; possibly the
10529 10540 // register allocator will have to add it back (and maybe not).
10530 10541 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
10531 10542 predicate( UseSSE<=1 );
10532 10543 match(Set src2 (AddD (MulD src0 src1) src2));
10533 10544 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10534 10545 "DMUL ST,$src1\n\t"
10535 10546 "DADDp $src2,ST" %}
10536 10547 ins_cost(250);
10537 10548 opcode(0xDD); /* LoadD DD /0 */
10538 10549 ins_encode( Push_Reg_F(src0),
10539 10550 FMul_ST_reg(src1),
10540 10551 FAddP_reg_ST(src2) );
10541 10552 ins_pipe( fpu_reg_reg_reg );
10542 10553 %}
10543 10554
10544 10555
10545 10556 // MACRO3 -- subD a mulD
10546 10557 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
10547 10558 predicate( UseSSE<=1 );
10548 10559 match(Set src2 (SubD (MulD src0 src1) src2));
10549 10560 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10550 10561 "DMUL ST,$src1\n\t"
10551 10562 "DSUBRp $src2,ST" %}
10552 10563 ins_cost(250);
10553 10564 ins_encode( Push_Reg_F(src0),
10554 10565 FMul_ST_reg(src1),
10555 10566 Opcode(0xDE), Opc_plus(0xE0,src2));
10556 10567 ins_pipe( fpu_reg_reg_reg );
10557 10568 %}
10558 10569
10559 10570
10560 10571 instruct divD_reg(regD dst, regD src) %{
10561 10572 predicate( UseSSE<=1 );
10562 10573 match(Set dst (DivD dst src));
10563 10574
10564 10575 format %{ "FLD $src\n\t"
10565 10576 "FDIVp $dst,ST" %}
10566 10577 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10567 10578 ins_cost(150);
10568 10579 ins_encode( Push_Reg_D(src),
10569 10580 OpcP, RegOpc(dst) );
10570 10581 ins_pipe( fpu_reg_reg );
10571 10582 %}
10572 10583
10573 10584 // Strict FP instruction biases argument before division then
10574 10585 // biases result, to avoid double rounding of subnormals.
10575 10586 //
10576 10587 // scale dividend by multiplying dividend by 2^(-15360)
10577 10588 // load divisor
10578 10589 // divide scaled dividend by divisor
10579 10590 // rescale quotient by 2^(15360)
10580 10591 //
10581 10592 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
10582 10593 predicate (UseSSE<=1);
10583 10594 match(Set dst (DivD dst src));
10584 10595 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10585 10596 ins_cost(01);
10586 10597
10587 10598 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10588 10599 "DMULp $dst,ST\n\t"
10589 10600 "FLD $src\n\t"
10590 10601 "FDIVp $dst,ST\n\t"
10591 10602 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10592 10603 "DMULp $dst,ST\n\t" %}
10593 10604 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10594 10605 ins_encode( strictfp_bias1(dst),
10595 10606 Push_Reg_D(src),
10596 10607 OpcP, RegOpc(dst),
10597 10608 strictfp_bias2(dst) );
10598 10609 ins_pipe( fpu_reg_reg );
10599 10610 %}
10600 10611
10601 10612 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10602 10613 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10603 10614 match(Set dst (RoundDouble (DivD src1 src2)));
10604 10615
10605 10616 format %{ "FLD $src1\n\t"
10606 10617 "FDIV ST,$src2\n\t"
10607 10618 "FSTP_D $dst\t# D-round" %}
10608 10619 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10609 10620 ins_encode( Push_Reg_D(src1),
10610 10621 OpcP, RegOpc(src2), Pop_Mem_D(dst) );
10611 10622 ins_pipe( fpu_mem_reg_reg );
10612 10623 %}
10613 10624
10614 10625
10615 10626 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
10616 10627 predicate(UseSSE<=1);
10617 10628 match(Set dst (ModD dst src));
10618 10629 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
10619 10630
10620 10631 format %{ "DMOD $dst,$src" %}
10621 10632 ins_cost(250);
10622 10633 ins_encode(Push_Reg_Mod_D(dst, src),
10623 10634 emitModD(),
10624 10635 Push_Result_Mod_D(src),
10625 10636 Pop_Reg_D(dst));
10626 10637 ins_pipe( pipe_slow );
10627 10638 %}
10628 10639
10629 10640 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
10630 10641 predicate(UseSSE>=2);
10631 10642 match(Set dst (ModD src0 src1));
10632 10643 effect(KILL rax, KILL cr);
10633 10644
10634 10645 format %{ "SUB ESP,8\t # DMOD\n"
10635 10646 "\tMOVSD [ESP+0],$src1\n"
10636 10647 "\tFLD_D [ESP+0]\n"
10637 10648 "\tMOVSD [ESP+0],$src0\n"
10638 10649 "\tFLD_D [ESP+0]\n"
10639 10650 "loop:\tFPREM\n"
10640 10651 "\tFWAIT\n"
10641 10652 "\tFNSTSW AX\n"
10642 10653 "\tSAHF\n"
10643 10654 "\tJP loop\n"
10644 10655 "\tFSTP_D [ESP+0]\n"
10645 10656 "\tMOVSD $dst,[ESP+0]\n"
10646 10657 "\tADD ESP,8\n"
10647 10658 "\tFSTP ST0\t # Restore FPU Stack"
10648 10659 %}
10649 10660 ins_cost(250);
10650 10661 ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
10651 10662 ins_pipe( pipe_slow );
10652 10663 %}
10653 10664
10654 10665 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
10655 10666 predicate (UseSSE<=1);
10656 10667 match(Set dst (SinD src));
10657 10668 ins_cost(1800);
10658 10669 format %{ "DSIN $dst" %}
10659 10670 opcode(0xD9, 0xFE);
10660 10671 ins_encode( OpcP, OpcS );
10661 10672 ins_pipe( pipe_slow );
10662 10673 %}
10663 10674
10664 10675 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
10665 10676 predicate (UseSSE>=2);
10666 10677 match(Set dst (SinD dst));
10667 10678 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10668 10679 ins_cost(1800);
10669 10680 format %{ "DSIN $dst" %}
10670 10681 opcode(0xD9, 0xFE);
10671 10682 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10672 10683 ins_pipe( pipe_slow );
10673 10684 %}
10674 10685
10675 10686 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
10676 10687 predicate (UseSSE<=1);
10677 10688 match(Set dst (CosD src));
10678 10689 ins_cost(1800);
10679 10690 format %{ "DCOS $dst" %}
10680 10691 opcode(0xD9, 0xFF);
10681 10692 ins_encode( OpcP, OpcS );
10682 10693 ins_pipe( pipe_slow );
10683 10694 %}
10684 10695
10685 10696 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
10686 10697 predicate (UseSSE>=2);
10687 10698 match(Set dst (CosD dst));
10688 10699 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10689 10700 ins_cost(1800);
10690 10701 format %{ "DCOS $dst" %}
10691 10702 opcode(0xD9, 0xFF);
10692 10703 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10693 10704 ins_pipe( pipe_slow );
10694 10705 %}
10695 10706
10696 10707 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10697 10708 predicate (UseSSE<=1);
10698 10709 match(Set dst(TanD src));
10699 10710 format %{ "DTAN $dst" %}
10700 10711 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
10701 10712 Opcode(0xDD), Opcode(0xD8)); // fstp st
10702 10713 ins_pipe( pipe_slow );
10703 10714 %}
10704 10715
10705 10716 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10706 10717 predicate (UseSSE>=2);
10707 10718 match(Set dst(TanD dst));
10708 10719 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10709 10720 format %{ "DTAN $dst" %}
10710 10721 ins_encode( Push_SrcXD(dst),
10711 10722 Opcode(0xD9), Opcode(0xF2), // fptan
10712 10723 Opcode(0xDD), Opcode(0xD8), // fstp st
10713 10724 Push_ResultXD(dst) );
10714 10725 ins_pipe( pipe_slow );
10715 10726 %}
10716 10727
10717 10728 instruct atanD_reg(regD dst, regD src) %{
10718 10729 predicate (UseSSE<=1);
10719 10730 match(Set dst(AtanD dst src));
10720 10731 format %{ "DATA $dst,$src" %}
10721 10732 opcode(0xD9, 0xF3);
10722 10733 ins_encode( Push_Reg_D(src),
10723 10734 OpcP, OpcS, RegOpc(dst) );
10724 10735 ins_pipe( pipe_slow );
10725 10736 %}
10726 10737
10727 10738 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10728 10739 predicate (UseSSE>=2);
10729 10740 match(Set dst(AtanD dst src));
10730 10741 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10731 10742 format %{ "DATA $dst,$src" %}
10732 10743 opcode(0xD9, 0xF3);
10733 10744 ins_encode( Push_SrcXD(src),
10734 10745 OpcP, OpcS, Push_ResultXD(dst) );
10735 10746 ins_pipe( pipe_slow );
10736 10747 %}
10737 10748
10738 10749 instruct sqrtD_reg(regD dst, regD src) %{
10739 10750 predicate (UseSSE<=1);
10740 10751 match(Set dst (SqrtD src));
10741 10752 format %{ "DSQRT $dst,$src" %}
10742 10753 opcode(0xFA, 0xD9);
10743 10754 ins_encode( Push_Reg_D(src),
10744 10755 OpcS, OpcP, Pop_Reg_D(dst) );
10745 10756 ins_pipe( pipe_slow );
10746 10757 %}
10747 10758
10748 10759 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10749 10760 predicate (UseSSE<=1);
10750 10761 match(Set Y (PowD X Y)); // Raise X to the Yth power
10751 10762 effect(KILL rax, KILL rbx, KILL rcx);
10752 10763 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10753 10764 "FLD_D $X\n\t"
10754 10765 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
10755 10766
10756 10767 "FDUP \t\t\t# Q Q\n\t"
10757 10768 "FRNDINT\t\t\t# int(Q) Q\n\t"
10758 10769 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10759 10770 "FISTP dword [ESP]\n\t"
10760 10771 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10761 10772 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10762 10773 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10763 10774 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10764 10775 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10765 10776 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10766 10777 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10767 10778 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10768 10779 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10769 10780 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10770 10781 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10771 10782 "MOV [ESP+0],0\n\t"
10772 10783 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10773 10784
10774 10785 "ADD ESP,8"
10775 10786 %}
10776 10787 ins_encode( push_stack_temp_qword,
10777 10788 Push_Reg_D(X),
10778 10789 Opcode(0xD9), Opcode(0xF1), // fyl2x
10779 10790 pow_exp_core_encoding,
10780 10791 pop_stack_temp_qword);
10781 10792 ins_pipe( pipe_slow );
10782 10793 %}
10783 10794
10784 10795 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10785 10796 predicate (UseSSE>=2);
10786 10797 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
10787 10798 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10788 10799 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10789 10800 "MOVSD [ESP],$src1\n\t"
10790 10801 "FLD FPR1,$src1\n\t"
10791 10802 "MOVSD [ESP],$src0\n\t"
10792 10803 "FLD FPR1,$src0\n\t"
10793 10804 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
10794 10805
10795 10806 "FDUP \t\t\t# Q Q\n\t"
10796 10807 "FRNDINT\t\t\t# int(Q) Q\n\t"
10797 10808 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10798 10809 "FISTP dword [ESP]\n\t"
10799 10810 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10800 10811 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10801 10812 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10802 10813 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10803 10814 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10804 10815 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10805 10816 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10806 10817 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10807 10818 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10808 10819 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10809 10820 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10810 10821 "MOV [ESP+0],0\n\t"
10811 10822 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10812 10823
10813 10824 "FST_D [ESP]\n\t"
10814 10825 "MOVSD $dst,[ESP]\n\t"
10815 10826 "ADD ESP,8"
10816 10827 %}
10817 10828 ins_encode( push_stack_temp_qword,
10818 10829 push_xmm_to_fpr1(src1),
10819 10830 push_xmm_to_fpr1(src0),
10820 10831 Opcode(0xD9), Opcode(0xF1), // fyl2x
10821 10832 pow_exp_core_encoding,
10822 10833 Push_ResultXD(dst) );
10823 10834 ins_pipe( pipe_slow );
10824 10835 %}
10825 10836
10826 10837
10827 10838 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10828 10839 predicate (UseSSE<=1);
10829 10840 match(Set dpr1 (ExpD dpr1));
10830 10841 effect(KILL rax, KILL rbx, KILL rcx);
10831 10842 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding"
10832 10843 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10833 10844 "FMULP \t\t\t# Q=X*log2(e)\n\t"
10834 10845
10835 10846 "FDUP \t\t\t# Q Q\n\t"
10836 10847 "FRNDINT\t\t\t# int(Q) Q\n\t"
10837 10848 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10838 10849 "FISTP dword [ESP]\n\t"
10839 10850 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10840 10851 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10841 10852 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10842 10853 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10843 10854 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10844 10855 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10845 10856 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10846 10857 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10847 10858 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10848 10859 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10849 10860 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10850 10861 "MOV [ESP+0],0\n\t"
10851 10862 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10852 10863
10853 10864 "ADD ESP,8"
10854 10865 %}
10855 10866 ins_encode( push_stack_temp_qword,
10856 10867 Opcode(0xD9), Opcode(0xEA), // fldl2e
10857 10868 Opcode(0xDE), Opcode(0xC9), // fmulp
10858 10869 pow_exp_core_encoding,
10859 10870 pop_stack_temp_qword);
10860 10871 ins_pipe( pipe_slow );
10861 10872 %}
10862 10873
10863 10874 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10864 10875 predicate (UseSSE>=2);
10865 10876 match(Set dst (ExpD src));
10866 10877 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10867 10878 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t"
10868 10879 "MOVSD [ESP],$src\n\t"
10869 10880 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10870 10881 "FMULP \t\t\t# Q=X*log2(e) X\n\t"
10871 10882
10872 10883 "FDUP \t\t\t# Q Q\n\t"
10873 10884 "FRNDINT\t\t\t# int(Q) Q\n\t"
10874 10885 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10875 10886 "FISTP dword [ESP]\n\t"
10876 10887 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10877 10888 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10878 10889 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10879 10890 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10880 10891 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10881 10892 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10882 10893 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10883 10894 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10884 10895 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10885 10896 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10886 10897 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10887 10898 "MOV [ESP+0],0\n\t"
10888 10899 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10889 10900
10890 10901 "FST_D [ESP]\n\t"
10891 10902 "MOVSD $dst,[ESP]\n\t"
10892 10903 "ADD ESP,8"
10893 10904 %}
10894 10905 ins_encode( Push_SrcXD(src),
10895 10906 Opcode(0xD9), Opcode(0xEA), // fldl2e
10896 10907 Opcode(0xDE), Opcode(0xC9), // fmulp
10897 10908 pow_exp_core_encoding,
10898 10909 Push_ResultXD(dst) );
10899 10910 ins_pipe( pipe_slow );
10900 10911 %}
10901 10912
10902 10913
10903 10914
10904 10915 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10905 10916 predicate (UseSSE<=1);
10906 10917 // The source Double operand on FPU stack
10907 10918 match(Set dst (Log10D src));
10908 10919 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10909 10920 // fxch ; swap ST(0) with ST(1)
10910 10921 // fyl2x ; compute log_10(2) * log_2(x)
10911 10922 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10912 10923 "FXCH \n\t"
10913 10924 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10914 10925 %}
10915 10926 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10916 10927 Opcode(0xD9), Opcode(0xC9), // fxch
10917 10928 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10918 10929
10919 10930 ins_pipe( pipe_slow );
10920 10931 %}
10921 10932
10922 10933 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10923 10934 predicate (UseSSE>=2);
10924 10935 effect(KILL cr);
10925 10936 match(Set dst (Log10D src));
10926 10937 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10927 10938 // fyl2x ; compute log_10(2) * log_2(x)
10928 10939 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10929 10940 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10930 10941 %}
10931 10942 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10932 10943 Push_SrcXD(src),
10933 10944 Opcode(0xD9), Opcode(0xF1), // fyl2x
10934 10945 Push_ResultXD(dst));
10935 10946
10936 10947 ins_pipe( pipe_slow );
10937 10948 %}
10938 10949
10939 10950 instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10940 10951 predicate (UseSSE<=1);
10941 10952 // The source Double operand on FPU stack
10942 10953 match(Set dst (LogD src));
10943 10954 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10944 10955 // fxch ; swap ST(0) with ST(1)
10945 10956 // fyl2x ; compute log_e(2) * log_2(x)
10946 10957 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10947 10958 "FXCH \n\t"
10948 10959 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10949 10960 %}
10950 10961 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10951 10962 Opcode(0xD9), Opcode(0xC9), // fxch
10952 10963 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10953 10964
10954 10965 ins_pipe( pipe_slow );
10955 10966 %}
10956 10967
10957 10968 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10958 10969 predicate (UseSSE>=2);
10959 10970 effect(KILL cr);
10960 10971 // The source and result Double operands in XMM registers
10961 10972 match(Set dst (LogD src));
10962 10973 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10963 10974 // fyl2x ; compute log_e(2) * log_2(x)
10964 10975 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10965 10976 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10966 10977 %}
10967 10978 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10968 10979 Push_SrcXD(src),
10969 10980 Opcode(0xD9), Opcode(0xF1), // fyl2x
10970 10981 Push_ResultXD(dst));
10971 10982 ins_pipe( pipe_slow );
10972 10983 %}
10973 10984
10974 10985 //-------------Float Instructions-------------------------------
10975 10986 // Float Math
10976 10987
10977 10988 // Code for float compare:
10978 10989 // fcompp();
10979 10990 // fwait(); fnstsw_ax();
10980 10991 // sahf();
10981 10992 // movl(dst, unordered_result);
10982 10993 // jcc(Assembler::parity, exit);
10983 10994 // movl(dst, less_result);
10984 10995 // jcc(Assembler::below, exit);
10985 10996 // movl(dst, equal_result);
10986 10997 // jcc(Assembler::equal, exit);
10987 10998 // movl(dst, greater_result);
10988 10999 // exit:
10989 11000
10990 11001 // P6 version of float compare, sets condition codes in EFLAGS
10991 11002 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10992 11003 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10993 11004 match(Set cr (CmpF src1 src2));
10994 11005 effect(KILL rax);
10995 11006 ins_cost(150);
10996 11007 format %{ "FLD $src1\n\t"
10997 11008 "FUCOMIP ST,$src2 // P6 instruction\n\t"
10998 11009 "JNP exit\n\t"
10999 11010 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
11000 11011 "SAHF\n"
11001 11012 "exit:\tNOP // avoid branch to branch" %}
11002 11013 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11003 11014 ins_encode( Push_Reg_D(src1),
11004 11015 OpcP, RegOpc(src2),
11005 11016 cmpF_P6_fixup );
11006 11017 ins_pipe( pipe_slow );
11007 11018 %}
11008 11019
11009 11020 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
11010 11021 predicate(VM_Version::supports_cmov() && UseSSE == 0);
11011 11022 match(Set cr (CmpF src1 src2));
11012 11023 ins_cost(100);
11013 11024 format %{ "FLD $src1\n\t"
11014 11025 "FUCOMIP ST,$src2 // P6 instruction" %}
11015 11026 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11016 11027 ins_encode( Push_Reg_D(src1),
11017 11028 OpcP, RegOpc(src2));
11018 11029 ins_pipe( pipe_slow );
11019 11030 %}
11020 11031
11021 11032
11022 11033 // Compare & branch
11023 11034 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
11024 11035 predicate(UseSSE == 0);
11025 11036 match(Set cr (CmpF src1 src2));
11026 11037 effect(KILL rax);
11027 11038 ins_cost(200);
11028 11039 format %{ "FLD $src1\n\t"
11029 11040 "FCOMp $src2\n\t"
11030 11041 "FNSTSW AX\n\t"
11031 11042 "TEST AX,0x400\n\t"
11032 11043 "JZ,s flags\n\t"
11033 11044 "MOV AH,1\t# unordered treat as LT\n"
11034 11045 "flags:\tSAHF" %}
11035 11046 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11036 11047 ins_encode( Push_Reg_D(src1),
11037 11048 OpcP, RegOpc(src2),
11038 11049 fpu_flags);
11039 11050 ins_pipe( pipe_slow );
11040 11051 %}
11041 11052
11042 11053 // Compare vs zero into -1,0,1
11043 11054 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
11044 11055 predicate(UseSSE == 0);
11045 11056 match(Set dst (CmpF3 src1 zero));
11046 11057 effect(KILL cr, KILL rax);
11047 11058 ins_cost(280);
11048 11059 format %{ "FTSTF $dst,$src1" %}
11049 11060 opcode(0xE4, 0xD9);
11050 11061 ins_encode( Push_Reg_D(src1),
11051 11062 OpcS, OpcP, PopFPU,
11052 11063 CmpF_Result(dst));
11053 11064 ins_pipe( pipe_slow );
11054 11065 %}
11055 11066
11056 11067 // Compare into -1,0,1
11057 11068 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11058 11069 predicate(UseSSE == 0);
11059 11070 match(Set dst (CmpF3 src1 src2));
11060 11071 effect(KILL cr, KILL rax);
11061 11072 ins_cost(300);
11062 11073 format %{ "FCMPF $dst,$src1,$src2" %}
11063 11074 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11064 11075 ins_encode( Push_Reg_D(src1),
11065 11076 OpcP, RegOpc(src2),
11066 11077 CmpF_Result(dst));
11067 11078 ins_pipe( pipe_slow );
11068 11079 %}
11069 11080
11070 11081 // float compare and set condition codes in EFLAGS by XMM regs
11071 11082 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
11072 11083 predicate(UseSSE>=1);
11073 11084 match(Set cr (CmpF dst src));
11074 11085 effect(KILL rax);
11075 11086 ins_cost(145);
11076 11087 format %{ "COMISS $dst,$src\n"
11077 11088 "\tJNP exit\n"
11078 11089 "\tMOV ah,1 // saw a NaN, set CF\n"
11079 11090 "\tSAHF\n"
11080 11091 "exit:\tNOP // avoid branch to branch" %}
11081 11092 opcode(0x0F, 0x2F);
11082 11093 ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
11083 11094 ins_pipe( pipe_slow );
11084 11095 %}
11085 11096
11086 11097 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
11087 11098 predicate(UseSSE>=1);
11088 11099 match(Set cr (CmpF dst src));
11089 11100 ins_cost(100);
11090 11101 format %{ "COMISS $dst,$src" %}
11091 11102 opcode(0x0F, 0x2F);
11092 11103 ins_encode(OpcP, OpcS, RegReg(dst, src));
11093 11104 ins_pipe( pipe_slow );
11094 11105 %}
11095 11106
11096 11107 // float compare and set condition codes in EFLAGS by XMM regs
11097 11108 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
11098 11109 predicate(UseSSE>=1);
11099 11110 match(Set cr (CmpF dst (LoadF src)));
11100 11111 effect(KILL rax);
11101 11112 ins_cost(165);
11102 11113 format %{ "COMISS $dst,$src\n"
11103 11114 "\tJNP exit\n"
11104 11115 "\tMOV ah,1 // saw a NaN, set CF\n"
11105 11116 "\tSAHF\n"
11106 11117 "exit:\tNOP // avoid branch to branch" %}
11107 11118 opcode(0x0F, 0x2F);
11108 11119 ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
11109 11120 ins_pipe( pipe_slow );
11110 11121 %}
11111 11122
11112 11123 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
11113 11124 predicate(UseSSE>=1);
11114 11125 match(Set cr (CmpF dst (LoadF src)));
11115 11126 ins_cost(100);
11116 11127 format %{ "COMISS $dst,$src" %}
11117 11128 opcode(0x0F, 0x2F);
11118 11129 ins_encode(OpcP, OpcS, RegMem(dst, src));
11119 11130 ins_pipe( pipe_slow );
11120 11131 %}
11121 11132
11122 11133 // Compare into -1,0,1 in XMM
11123 11134 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
11124 11135 predicate(UseSSE>=1);
11125 11136 match(Set dst (CmpF3 src1 src2));
11126 11137 effect(KILL cr);
11127 11138 ins_cost(255);
11128 11139 format %{ "XOR $dst,$dst\n"
11129 11140 "\tCOMISS $src1,$src2\n"
11130 11141 "\tJP,s nan\n"
11131 11142 "\tJEQ,s exit\n"
11132 11143 "\tJA,s inc\n"
11133 11144 "nan:\tDEC $dst\n"
11134 11145 "\tJMP,s exit\n"
11135 11146 "inc:\tINC $dst\n"
11136 11147 "exit:"
11137 11148 %}
11138 11149 opcode(0x0F, 0x2F);
11139 11150 ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
11140 11151 ins_pipe( pipe_slow );
11141 11152 %}
11142 11153
11143 11154 // Compare into -1,0,1 in XMM and memory
11144 11155 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
11145 11156 predicate(UseSSE>=1);
11146 11157 match(Set dst (CmpF3 src1 (LoadF mem)));
11147 11158 effect(KILL cr);
11148 11159 ins_cost(275);
11149 11160 format %{ "COMISS $src1,$mem\n"
11150 11161 "\tMOV $dst,0\t\t# do not blow flags\n"
11151 11162 "\tJP,s nan\n"
11152 11163 "\tJEQ,s exit\n"
11153 11164 "\tJA,s inc\n"
11154 11165 "nan:\tDEC $dst\n"
11155 11166 "\tJMP,s exit\n"
11156 11167 "inc:\tINC $dst\n"
11157 11168 "exit:"
11158 11169 %}
11159 11170 opcode(0x0F, 0x2F);
11160 11171 ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
11161 11172 ins_pipe( pipe_slow );
11162 11173 %}
11163 11174
11164 11175 // Spill to obtain 24-bit precision
11165 11176 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
11166 11177 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11167 11178 match(Set dst (SubF src1 src2));
11168 11179
11169 11180 format %{ "FSUB $dst,$src1 - $src2" %}
11170 11181 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
11171 11182 ins_encode( Push_Reg_F(src1),
11172 11183 OpcReg_F(src2),
11173 11184 Pop_Mem_F(dst) );
11174 11185 ins_pipe( fpu_mem_reg_reg );
11175 11186 %}
11176 11187 //
11177 11188 // This instruction does not round to 24-bits
11178 11189 instruct subF_reg(regF dst, regF src) %{
11179 11190 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11180 11191 match(Set dst (SubF dst src));
11181 11192
11182 11193 format %{ "FSUB $dst,$src" %}
11183 11194 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
11184 11195 ins_encode( Push_Reg_F(src),
11185 11196 OpcP, RegOpc(dst) );
11186 11197 ins_pipe( fpu_reg_reg );
11187 11198 %}
11188 11199
11189 11200 // Spill to obtain 24-bit precision
11190 11201 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
11191 11202 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11192 11203 match(Set dst (AddF src1 src2));
11193 11204
11194 11205 format %{ "FADD $dst,$src1,$src2" %}
11195 11206 opcode(0xD8, 0x0); /* D8 C0+i */
11196 11207 ins_encode( Push_Reg_F(src2),
11197 11208 OpcReg_F(src1),
11198 11209 Pop_Mem_F(dst) );
11199 11210 ins_pipe( fpu_mem_reg_reg );
11200 11211 %}
11201 11212 //
11202 11213 // This instruction does not round to 24-bits
11203 11214 instruct addF_reg(regF dst, regF src) %{
11204 11215 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11205 11216 match(Set dst (AddF dst src));
11206 11217
11207 11218 format %{ "FLD $src\n\t"
11208 11219 "FADDp $dst,ST" %}
11209 11220 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
11210 11221 ins_encode( Push_Reg_F(src),
11211 11222 OpcP, RegOpc(dst) );
11212 11223 ins_pipe( fpu_reg_reg );
11213 11224 %}
11214 11225
11215 11226 // Add two single precision floating point values in xmm
11216 11227 instruct addX_reg(regX dst, regX src) %{
↓ open down ↓ |
713 lines elided |
↑ open up ↑ |
11217 11228 predicate(UseSSE>=1);
11218 11229 match(Set dst (AddF dst src));
11219 11230 format %{ "ADDSS $dst,$src" %}
11220 11231 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
11221 11232 ins_pipe( pipe_slow );
11222 11233 %}
11223 11234
11224 11235 instruct addX_imm(regX dst, immXF con) %{
11225 11236 predicate(UseSSE>=1);
11226 11237 match(Set dst (AddF dst con));
11227 - format %{ "ADDSS $dst,[$con]" %}
11228 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) );
11229 - ins_pipe( pipe_slow );
11238 + format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11239 + ins_encode %{
11240 + __ addss($dst$$XMMRegister, $constantaddress($con));
11241 + %}
11242 + ins_pipe(pipe_slow);
11230 11243 %}
11231 11244
11232 11245 instruct addX_mem(regX dst, memory mem) %{
11233 11246 predicate(UseSSE>=1);
11234 11247 match(Set dst (AddF dst (LoadF mem)));
11235 11248 format %{ "ADDSS $dst,$mem" %}
11236 11249 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
11237 11250 ins_pipe( pipe_slow );
11238 11251 %}
11239 11252
11240 11253 // Subtract two single precision floating point values in xmm
11241 11254 instruct subX_reg(regX dst, regX src) %{
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
11242 11255 predicate(UseSSE>=1);
11243 11256 match(Set dst (SubF dst src));
11244 11257 format %{ "SUBSS $dst,$src" %}
11245 11258 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
11246 11259 ins_pipe( pipe_slow );
11247 11260 %}
11248 11261
11249 11262 instruct subX_imm(regX dst, immXF con) %{
11250 11263 predicate(UseSSE>=1);
11251 11264 match(Set dst (SubF dst con));
11252 - format %{ "SUBSS $dst,[$con]" %}
11253 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) );
11254 - ins_pipe( pipe_slow );
11265 + format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11266 + ins_encode %{
11267 + __ subss($dst$$XMMRegister, $constantaddress($con));
11268 + %}
11269 + ins_pipe(pipe_slow);
11255 11270 %}
11256 11271
11257 11272 instruct subX_mem(regX dst, memory mem) %{
11258 11273 predicate(UseSSE>=1);
11259 11274 match(Set dst (SubF dst (LoadF mem)));
11260 11275 format %{ "SUBSS $dst,$mem" %}
11261 11276 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
11262 11277 ins_pipe( pipe_slow );
11263 11278 %}
11264 11279
11265 11280 // Multiply two single precision floating point values in xmm
11266 11281 instruct mulX_reg(regX dst, regX src) %{
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
11267 11282 predicate(UseSSE>=1);
11268 11283 match(Set dst (MulF dst src));
11269 11284 format %{ "MULSS $dst,$src" %}
11270 11285 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
11271 11286 ins_pipe( pipe_slow );
11272 11287 %}
11273 11288
11274 11289 instruct mulX_imm(regX dst, immXF con) %{
11275 11290 predicate(UseSSE>=1);
11276 11291 match(Set dst (MulF dst con));
11277 - format %{ "MULSS $dst,[$con]" %}
11278 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) );
11279 - ins_pipe( pipe_slow );
11292 + format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11293 + ins_encode %{
11294 + __ mulss($dst$$XMMRegister, $constantaddress($con));
11295 + %}
11296 + ins_pipe(pipe_slow);
11280 11297 %}
11281 11298
11282 11299 instruct mulX_mem(regX dst, memory mem) %{
11283 11300 predicate(UseSSE>=1);
11284 11301 match(Set dst (MulF dst (LoadF mem)));
11285 11302 format %{ "MULSS $dst,$mem" %}
11286 11303 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
11287 11304 ins_pipe( pipe_slow );
11288 11305 %}
11289 11306
11290 11307 // Divide two single precision floating point values in xmm
11291 11308 instruct divX_reg(regX dst, regX src) %{
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
11292 11309 predicate(UseSSE>=1);
11293 11310 match(Set dst (DivF dst src));
11294 11311 format %{ "DIVSS $dst,$src" %}
11295 11312 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
11296 11313 ins_pipe( pipe_slow );
11297 11314 %}
11298 11315
11299 11316 instruct divX_imm(regX dst, immXF con) %{
11300 11317 predicate(UseSSE>=1);
11301 11318 match(Set dst (DivF dst con));
11302 - format %{ "DIVSS $dst,[$con]" %}
11303 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) );
11304 - ins_pipe( pipe_slow );
11319 + format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11320 + ins_encode %{
11321 + __ divss($dst$$XMMRegister, $constantaddress($con));
11322 + %}
11323 + ins_pipe(pipe_slow);
11305 11324 %}
11306 11325
11307 11326 instruct divX_mem(regX dst, memory mem) %{
11308 11327 predicate(UseSSE>=1);
11309 11328 match(Set dst (DivF dst (LoadF mem)));
11310 11329 format %{ "DIVSS $dst,$mem" %}
11311 11330 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
11312 11331 ins_pipe( pipe_slow );
11313 11332 %}
11314 11333
11315 11334 // Get the square root of a single precision floating point values in xmm
11316 11335 instruct sqrtX_reg(regX dst, regX src) %{
11317 11336 predicate(UseSSE>=1);
11318 11337 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11319 11338 format %{ "SQRTSS $dst,$src" %}
11320 11339 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11321 11340 ins_pipe( pipe_slow );
11322 11341 %}
11323 11342
11324 11343 instruct sqrtX_mem(regX dst, memory mem) %{
11325 11344 predicate(UseSSE>=1);
11326 11345 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
11327 11346 format %{ "SQRTSS $dst,$mem" %}
11328 11347 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11329 11348 ins_pipe( pipe_slow );
11330 11349 %}
11331 11350
11332 11351 // Get the square root of a double precision floating point values in xmm
11333 11352 instruct sqrtXD_reg(regXD dst, regXD src) %{
11334 11353 predicate(UseSSE>=2);
11335 11354 match(Set dst (SqrtD src));
11336 11355 format %{ "SQRTSD $dst,$src" %}
11337 11356 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11338 11357 ins_pipe( pipe_slow );
11339 11358 %}
11340 11359
11341 11360 instruct sqrtXD_mem(regXD dst, memory mem) %{
11342 11361 predicate(UseSSE>=2);
11343 11362 match(Set dst (SqrtD (LoadD mem)));
11344 11363 format %{ "SQRTSD $dst,$mem" %}
11345 11364 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11346 11365 ins_pipe( pipe_slow );
11347 11366 %}
11348 11367
11349 11368 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11350 11369 predicate(UseSSE==0);
11351 11370 match(Set dst (AbsF src));
11352 11371 ins_cost(100);
11353 11372 format %{ "FABS" %}
11354 11373 opcode(0xE1, 0xD9);
11355 11374 ins_encode( OpcS, OpcP );
11356 11375 ins_pipe( fpu_reg_reg );
11357 11376 %}
11358 11377
11359 11378 instruct absX_reg(regX dst ) %{
11360 11379 predicate(UseSSE>=1);
11361 11380 match(Set dst (AbsF dst));
11362 11381 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11363 11382 ins_encode( AbsXF_encoding(dst));
11364 11383 ins_pipe( pipe_slow );
11365 11384 %}
11366 11385
11367 11386 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11368 11387 predicate(UseSSE==0);
11369 11388 match(Set dst (NegF src));
11370 11389 ins_cost(100);
11371 11390 format %{ "FCHS" %}
11372 11391 opcode(0xE0, 0xD9);
11373 11392 ins_encode( OpcS, OpcP );
11374 11393 ins_pipe( fpu_reg_reg );
11375 11394 %}
11376 11395
11377 11396 instruct negX_reg( regX dst ) %{
11378 11397 predicate(UseSSE>=1);
11379 11398 match(Set dst (NegF dst));
11380 11399 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
11381 11400 ins_encode( NegXF_encoding(dst));
11382 11401 ins_pipe( pipe_slow );
11383 11402 %}
11384 11403
11385 11404 // Cisc-alternate to addF_reg
11386 11405 // Spill to obtain 24-bit precision
11387 11406 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11388 11407 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11389 11408 match(Set dst (AddF src1 (LoadF src2)));
11390 11409
11391 11410 format %{ "FLD $src2\n\t"
11392 11411 "FADD ST,$src1\n\t"
11393 11412 "FSTP_S $dst" %}
11394 11413 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11395 11414 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11396 11415 OpcReg_F(src1),
11397 11416 Pop_Mem_F(dst) );
11398 11417 ins_pipe( fpu_mem_reg_mem );
11399 11418 %}
11400 11419 //
11401 11420 // Cisc-alternate to addF_reg
11402 11421 // This instruction does not round to 24-bits
11403 11422 instruct addF_reg_mem(regF dst, memory src) %{
11404 11423 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11405 11424 match(Set dst (AddF dst (LoadF src)));
11406 11425
11407 11426 format %{ "FADD $dst,$src" %}
11408 11427 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
11409 11428 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
11410 11429 OpcP, RegOpc(dst) );
11411 11430 ins_pipe( fpu_reg_mem );
11412 11431 %}
11413 11432
11414 11433 // // Following two instructions for _222_mpegaudio
11415 11434 // Spill to obtain 24-bit precision
11416 11435 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
11417 11436 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11418 11437 match(Set dst (AddF src1 src2));
11419 11438
11420 11439 format %{ "FADD $dst,$src1,$src2" %}
11421 11440 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11422 11441 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11423 11442 OpcReg_F(src2),
11424 11443 Pop_Mem_F(dst) );
11425 11444 ins_pipe( fpu_mem_reg_mem );
11426 11445 %}
11427 11446
11428 11447 // Cisc-spill variant
11429 11448 // Spill to obtain 24-bit precision
11430 11449 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11431 11450 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11432 11451 match(Set dst (AddF src1 (LoadF src2)));
11433 11452
11434 11453 format %{ "FADD $dst,$src1,$src2 cisc" %}
11435 11454 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11436 11455 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11437 11456 set_instruction_start,
11438 11457 OpcP, RMopc_Mem(secondary,src1),
11439 11458 Pop_Mem_F(dst) );
11440 11459 ins_pipe( fpu_mem_mem_mem );
11441 11460 %}
11442 11461
11443 11462 // Spill to obtain 24-bit precision
11444 11463 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11445 11464 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11446 11465 match(Set dst (AddF src1 src2));
11447 11466
11448 11467 format %{ "FADD $dst,$src1,$src2" %}
↓ open down ↓ |
134 lines elided |
↑ open up ↑ |
11449 11468 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
11450 11469 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11451 11470 set_instruction_start,
11452 11471 OpcP, RMopc_Mem(secondary,src1),
11453 11472 Pop_Mem_F(dst) );
11454 11473 ins_pipe( fpu_mem_mem_mem );
11455 11474 %}
11456 11475
11457 11476
11458 11477 // Spill to obtain 24-bit precision
11459 -instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
11478 +instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11460 11479 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11461 - match(Set dst (AddF src1 src2));
11462 - format %{ "FLD $src1\n\t"
11463 - "FADD $src2\n\t"
11480 + match(Set dst (AddF src con));
11481 + format %{ "FLD $src\n\t"
11482 + "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11464 11483 "FSTP_S $dst" %}
11465 - opcode(0xD8, 0x00); /* D8 /0 */
11466 - ins_encode( Push_Reg_F(src1),
11467 - Opc_MemImm_F(src2),
11468 - Pop_Mem_F(dst));
11469 - ins_pipe( fpu_mem_reg_con );
11484 + ins_encode %{
11485 + __ fld_s($src$$reg - 1); // FLD ST(i-1)
11486 + __ fadd_s($constantaddress($con));
11487 + __ fstp_s(Address(rsp, $dst$$disp));
11488 + %}
11489 + ins_pipe(fpu_mem_reg_con);
11470 11490 %}
11471 11491 //
11472 11492 // This instruction does not round to 24-bits
11473 -instruct addF_reg_imm(regF dst, regF src1, immF src2) %{
11493 +instruct addF_reg_imm(regF dst, regF src, immF con) %{
11474 11494 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11475 - match(Set dst (AddF src1 src2));
11476 - format %{ "FLD $src1\n\t"
11477 - "FADD $src2\n\t"
11478 - "FSTP_S $dst" %}
11479 - opcode(0xD8, 0x00); /* D8 /0 */
11480 - ins_encode( Push_Reg_F(src1),
11481 - Opc_MemImm_F(src2),
11482 - Pop_Reg_F(dst));
11483 - ins_pipe( fpu_reg_reg_con );
11495 + match(Set dst (AddF src con));
11496 + format %{ "FLD $src\n\t"
11497 + "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11498 + "FSTP $dst" %}
11499 + ins_encode %{
11500 + __ fld_s($src$$reg - 1); // FLD ST(i-1)
11501 + __ fadd_s($constantaddress($con));
11502 + __ fstp_d($dst$$reg);
11503 + %}
11504 + ins_pipe(fpu_reg_reg_con);
11484 11505 %}
11485 11506
11486 11507 // Spill to obtain 24-bit precision
11487 11508 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
11488 11509 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11489 11510 match(Set dst (MulF src1 src2));
11490 11511
11491 11512 format %{ "FLD $src1\n\t"
11492 11513 "FMUL $src2\n\t"
11493 11514 "FSTP_S $dst" %}
11494 11515 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11495 11516 ins_encode( Push_Reg_F(src1),
11496 11517 OpcReg_F(src2),
11497 11518 Pop_Mem_F(dst) );
11498 11519 ins_pipe( fpu_mem_reg_reg );
11499 11520 %}
11500 11521 //
11501 11522 // This instruction does not round to 24-bits
11502 11523 instruct mulF_reg(regF dst, regF src1, regF src2) %{
11503 11524 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11504 11525 match(Set dst (MulF src1 src2));
11505 11526
11506 11527 format %{ "FLD $src1\n\t"
11507 11528 "FMUL $src2\n\t"
11508 11529 "FSTP_S $dst" %}
11509 11530 opcode(0xD8, 0x1); /* D8 C8+i */
11510 11531 ins_encode( Push_Reg_F(src2),
11511 11532 OpcReg_F(src1),
11512 11533 Pop_Reg_F(dst) );
11513 11534 ins_pipe( fpu_reg_reg_reg );
11514 11535 %}
11515 11536
11516 11537
11517 11538 // Spill to obtain 24-bit precision
11518 11539 // Cisc-alternate to reg-reg multiply
11519 11540 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11520 11541 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11521 11542 match(Set dst (MulF src1 (LoadF src2)));
11522 11543
11523 11544 format %{ "FLD_S $src2\n\t"
11524 11545 "FMUL $src1\n\t"
11525 11546 "FSTP_S $dst" %}
11526 11547 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
11527 11548 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11528 11549 OpcReg_F(src1),
11529 11550 Pop_Mem_F(dst) );
11530 11551 ins_pipe( fpu_mem_reg_mem );
11531 11552 %}
11532 11553 //
11533 11554 // This instruction does not round to 24-bits
11534 11555 // Cisc-alternate to reg-reg multiply
11535 11556 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
11536 11557 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11537 11558 match(Set dst (MulF src1 (LoadF src2)));
11538 11559
11539 11560 format %{ "FMUL $dst,$src1,$src2" %}
11540 11561 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
11541 11562 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11542 11563 OpcReg_F(src1),
11543 11564 Pop_Reg_F(dst) );
11544 11565 ins_pipe( fpu_reg_reg_mem );
11545 11566 %}
11546 11567
11547 11568 // Spill to obtain 24-bit precision
11548 11569 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11549 11570 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11550 11571 match(Set dst (MulF src1 src2));
11551 11572
↓ open down ↓ |
58 lines elided |
↑ open up ↑ |
11552 11573 format %{ "FMUL $dst,$src1,$src2" %}
11553 11574 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
11554 11575 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11555 11576 set_instruction_start,
11556 11577 OpcP, RMopc_Mem(secondary,src1),
11557 11578 Pop_Mem_F(dst) );
11558 11579 ins_pipe( fpu_mem_mem_mem );
11559 11580 %}
11560 11581
11561 11582 // Spill to obtain 24-bit precision
11562 -instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
11583 +instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11563 11584 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11564 - match(Set dst (MulF src1 src2));
11585 + match(Set dst (MulF src con));
11565 11586
11566 - format %{ "FMULc $dst,$src1,$src2" %}
11567 - opcode(0xD8, 0x1); /* D8 /1*/
11568 - ins_encode( Push_Reg_F(src1),
11569 - Opc_MemImm_F(src2),
11570 - Pop_Mem_F(dst));
11571 - ins_pipe( fpu_mem_reg_con );
11587 + format %{ "FLD $src\n\t"
11588 + "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11589 + "FSTP_S $dst" %}
11590 + ins_encode %{
11591 + __ fld_s($src$$reg - 1); // FLD ST(i-1)
11592 + __ fmul_s($constantaddress($con));
11593 + __ fstp_s(Address(rsp, $dst$$disp));
11594 + %}
11595 + ins_pipe(fpu_mem_reg_con);
11572 11596 %}
11573 11597 //
11574 11598 // This instruction does not round to 24-bits
11575 -instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{
11599 +instruct mulF_reg_imm(regF dst, regF src, immF con) %{
11576 11600 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11577 - match(Set dst (MulF src1 src2));
11601 + match(Set dst (MulF src con));
11578 11602
11579 - format %{ "FMULc $dst. $src1, $src2" %}
11580 - opcode(0xD8, 0x1); /* D8 /1*/
11581 - ins_encode( Push_Reg_F(src1),
11582 - Opc_MemImm_F(src2),
11583 - Pop_Reg_F(dst));
11584 - ins_pipe( fpu_reg_reg_con );
11603 + format %{ "FLD $src\n\t"
11604 + "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11605 + "FSTP $dst" %}
11606 + ins_encode %{
11607 + __ fld_s($src$$reg - 1); // FLD ST(i-1)
11608 + __ fmul_s($constantaddress($con));
11609 + __ fstp_d($dst$$reg);
11610 + %}
11611 + ins_pipe(fpu_reg_reg_con);
11585 11612 %}
11586 11613
11587 11614
11588 11615 //
11589 11616 // MACRO1 -- subsume unshared load into mulF
11590 11617 // This instruction does not round to 24-bits
11591 11618 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
11592 11619 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11593 11620 match(Set dst (MulF (LoadF mem1) src));
11594 11621
11595 11622 format %{ "FLD $mem1 ===MACRO1===\n\t"
11596 11623 "FMUL ST,$src\n\t"
11597 11624 "FSTP $dst" %}
11598 11625 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
11599 11626 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11600 11627 OpcReg_F(src),
11601 11628 Pop_Reg_F(dst) );
11602 11629 ins_pipe( fpu_reg_reg_mem );
11603 11630 %}
11604 11631 //
11605 11632 // MACRO2 -- addF a mulF which subsumed an unshared load
11606 11633 // This instruction does not round to 24-bits
11607 11634 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
11608 11635 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11609 11636 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11610 11637 ins_cost(95);
11611 11638
11612 11639 format %{ "FLD $mem1 ===MACRO2===\n\t"
11613 11640 "FMUL ST,$src1 subsume mulF left load\n\t"
11614 11641 "FADD ST,$src2\n\t"
11615 11642 "FSTP $dst" %}
11616 11643 opcode(0xD9); /* LoadF D9 /0 */
11617 11644 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11618 11645 FMul_ST_reg(src1),
11619 11646 FAdd_ST_reg(src2),
11620 11647 Pop_Reg_F(dst) );
11621 11648 ins_pipe( fpu_reg_mem_reg_reg );
11622 11649 %}
11623 11650
11624 11651 // MACRO3 -- addF a mulF
11625 11652 // This instruction does not round to 24-bits. It is a '2-address'
11626 11653 // instruction in that the result goes back to src2. This eliminates
11627 11654 // a move from the macro; possibly the register allocator will have
11628 11655 // to add it back (and maybe not).
11629 11656 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
11630 11657 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11631 11658 match(Set src2 (AddF (MulF src0 src1) src2));
11632 11659
11633 11660 format %{ "FLD $src0 ===MACRO3===\n\t"
11634 11661 "FMUL ST,$src1\n\t"
11635 11662 "FADDP $src2,ST" %}
11636 11663 opcode(0xD9); /* LoadF D9 /0 */
11637 11664 ins_encode( Push_Reg_F(src0),
11638 11665 FMul_ST_reg(src1),
11639 11666 FAddP_reg_ST(src2) );
11640 11667 ins_pipe( fpu_reg_reg_reg );
11641 11668 %}
11642 11669
11643 11670 // MACRO4 -- divF subF
11644 11671 // This instruction does not round to 24-bits
11645 11672 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
11646 11673 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11647 11674 match(Set dst (DivF (SubF src2 src1) src3));
11648 11675
11649 11676 format %{ "FLD $src2 ===MACRO4===\n\t"
11650 11677 "FSUB ST,$src1\n\t"
11651 11678 "FDIV ST,$src3\n\t"
11652 11679 "FSTP $dst" %}
11653 11680 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11654 11681 ins_encode( Push_Reg_F(src2),
11655 11682 subF_divF_encode(src1,src3),
11656 11683 Pop_Reg_F(dst) );
11657 11684 ins_pipe( fpu_reg_reg_reg_reg );
11658 11685 %}
11659 11686
11660 11687 // Spill to obtain 24-bit precision
11661 11688 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
11662 11689 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11663 11690 match(Set dst (DivF src1 src2));
11664 11691
11665 11692 format %{ "FDIV $dst,$src1,$src2" %}
11666 11693 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11667 11694 ins_encode( Push_Reg_F(src1),
11668 11695 OpcReg_F(src2),
11669 11696 Pop_Mem_F(dst) );
11670 11697 ins_pipe( fpu_mem_reg_reg );
11671 11698 %}
11672 11699 //
11673 11700 // This instruction does not round to 24-bits
11674 11701 instruct divF_reg(regF dst, regF src) %{
11675 11702 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11676 11703 match(Set dst (DivF dst src));
11677 11704
11678 11705 format %{ "FDIV $dst,$src" %}
11679 11706 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11680 11707 ins_encode( Push_Reg_F(src),
11681 11708 OpcP, RegOpc(dst) );
11682 11709 ins_pipe( fpu_reg_reg );
11683 11710 %}
11684 11711
11685 11712
11686 11713 // Spill to obtain 24-bit precision
11687 11714 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11688 11715 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11689 11716 match(Set dst (ModF src1 src2));
11690 11717 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11691 11718
11692 11719 format %{ "FMOD $dst,$src1,$src2" %}
11693 11720 ins_encode( Push_Reg_Mod_D(src1, src2),
11694 11721 emitModD(),
11695 11722 Push_Result_Mod_D(src2),
11696 11723 Pop_Mem_F(dst));
11697 11724 ins_pipe( pipe_slow );
11698 11725 %}
11699 11726 //
11700 11727 // This instruction does not round to 24-bits
11701 11728 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11702 11729 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11703 11730 match(Set dst (ModF dst src));
11704 11731 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11705 11732
11706 11733 format %{ "FMOD $dst,$src" %}
11707 11734 ins_encode(Push_Reg_Mod_D(dst, src),
11708 11735 emitModD(),
11709 11736 Push_Result_Mod_D(src),
11710 11737 Pop_Reg_F(dst));
11711 11738 ins_pipe( pipe_slow );
11712 11739 %}
11713 11740
11714 11741 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11715 11742 predicate(UseSSE>=1);
11716 11743 match(Set dst (ModF src0 src1));
11717 11744 effect(KILL rax, KILL cr);
11718 11745 format %{ "SUB ESP,4\t # FMOD\n"
11719 11746 "\tMOVSS [ESP+0],$src1\n"
11720 11747 "\tFLD_S [ESP+0]\n"
11721 11748 "\tMOVSS [ESP+0],$src0\n"
11722 11749 "\tFLD_S [ESP+0]\n"
11723 11750 "loop:\tFPREM\n"
11724 11751 "\tFWAIT\n"
11725 11752 "\tFNSTSW AX\n"
11726 11753 "\tSAHF\n"
11727 11754 "\tJP loop\n"
11728 11755 "\tFSTP_S [ESP+0]\n"
11729 11756 "\tMOVSS $dst,[ESP+0]\n"
11730 11757 "\tADD ESP,4\n"
11731 11758 "\tFSTP ST0\t # Restore FPU Stack"
11732 11759 %}
11733 11760 ins_cost(250);
11734 11761 ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11735 11762 ins_pipe( pipe_slow );
11736 11763 %}
11737 11764
11738 11765
11739 11766 //----------Arithmetic Conversion Instructions---------------------------------
11740 11767 // The conversions operations are all Alpha sorted. Please keep it that way!
11741 11768
11742 11769 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11743 11770 predicate(UseSSE==0);
11744 11771 match(Set dst (RoundFloat src));
11745 11772 ins_cost(125);
11746 11773 format %{ "FST_S $dst,$src\t# F-round" %}
11747 11774 ins_encode( Pop_Mem_Reg_F(dst, src) );
11748 11775 ins_pipe( fpu_mem_reg );
11749 11776 %}
11750 11777
11751 11778 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11752 11779 predicate(UseSSE<=1);
11753 11780 match(Set dst (RoundDouble src));
11754 11781 ins_cost(125);
11755 11782 format %{ "FST_D $dst,$src\t# D-round" %}
11756 11783 ins_encode( Pop_Mem_Reg_D(dst, src) );
11757 11784 ins_pipe( fpu_mem_reg );
11758 11785 %}
11759 11786
11760 11787 // Force rounding to 24-bit precision and 6-bit exponent
11761 11788 instruct convD2F_reg(stackSlotF dst, regD src) %{
11762 11789 predicate(UseSSE==0);
11763 11790 match(Set dst (ConvD2F src));
11764 11791 format %{ "FST_S $dst,$src\t# F-round" %}
11765 11792 expand %{
11766 11793 roundFloat_mem_reg(dst,src);
11767 11794 %}
11768 11795 %}
11769 11796
11770 11797 // Force rounding to 24-bit precision and 6-bit exponent
11771 11798 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11772 11799 predicate(UseSSE==1);
11773 11800 match(Set dst (ConvD2F src));
11774 11801 effect( KILL cr );
11775 11802 format %{ "SUB ESP,4\n\t"
11776 11803 "FST_S [ESP],$src\t# F-round\n\t"
11777 11804 "MOVSS $dst,[ESP]\n\t"
11778 11805 "ADD ESP,4" %}
11779 11806 ins_encode( D2X_encoding(dst, src) );
11780 11807 ins_pipe( pipe_slow );
11781 11808 %}
11782 11809
11783 11810 // Force rounding double precision to single precision
11784 11811 instruct convXD2X_reg(regX dst, regXD src) %{
11785 11812 predicate(UseSSE>=2);
11786 11813 match(Set dst (ConvD2F src));
11787 11814 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11788 11815 opcode(0xF2, 0x0F, 0x5A);
11789 11816 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11790 11817 ins_pipe( pipe_slow );
11791 11818 %}
11792 11819
11793 11820 instruct convF2D_reg_reg(regD dst, regF src) %{
11794 11821 predicate(UseSSE==0);
11795 11822 match(Set dst (ConvF2D src));
11796 11823 format %{ "FST_S $dst,$src\t# D-round" %}
11797 11824 ins_encode( Pop_Reg_Reg_D(dst, src));
11798 11825 ins_pipe( fpu_reg_reg );
11799 11826 %}
11800 11827
11801 11828 instruct convF2D_reg(stackSlotD dst, regF src) %{
11802 11829 predicate(UseSSE==1);
11803 11830 match(Set dst (ConvF2D src));
11804 11831 format %{ "FST_D $dst,$src\t# D-round" %}
11805 11832 expand %{
11806 11833 roundDouble_mem_reg(dst,src);
11807 11834 %}
11808 11835 %}
11809 11836
11810 11837 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11811 11838 predicate(UseSSE==1);
11812 11839 match(Set dst (ConvF2D src));
11813 11840 effect( KILL cr );
11814 11841 format %{ "SUB ESP,4\n\t"
11815 11842 "MOVSS [ESP] $src\n\t"
11816 11843 "FLD_S [ESP]\n\t"
11817 11844 "ADD ESP,4\n\t"
11818 11845 "FSTP $dst\t# D-round" %}
11819 11846 ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11820 11847 ins_pipe( pipe_slow );
11821 11848 %}
11822 11849
11823 11850 instruct convX2XD_reg(regXD dst, regX src) %{
11824 11851 predicate(UseSSE>=2);
11825 11852 match(Set dst (ConvF2D src));
11826 11853 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11827 11854 opcode(0xF3, 0x0F, 0x5A);
11828 11855 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11829 11856 ins_pipe( pipe_slow );
11830 11857 %}
11831 11858
11832 11859 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11833 11860 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11834 11861 predicate(UseSSE<=1);
11835 11862 match(Set dst (ConvD2I src));
11836 11863 effect( KILL tmp, KILL cr );
11837 11864 format %{ "FLD $src\t# Convert double to int \n\t"
11838 11865 "FLDCW trunc mode\n\t"
11839 11866 "SUB ESP,4\n\t"
11840 11867 "FISTp [ESP + #0]\n\t"
11841 11868 "FLDCW std/24-bit mode\n\t"
11842 11869 "POP EAX\n\t"
11843 11870 "CMP EAX,0x80000000\n\t"
11844 11871 "JNE,s fast\n\t"
11845 11872 "FLD_D $src\n\t"
11846 11873 "CALL d2i_wrapper\n"
11847 11874 "fast:" %}
11848 11875 ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11849 11876 ins_pipe( pipe_slow );
11850 11877 %}
11851 11878
11852 11879 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11853 11880 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11854 11881 predicate(UseSSE>=2);
11855 11882 match(Set dst (ConvD2I src));
11856 11883 effect( KILL tmp, KILL cr );
11857 11884 format %{ "CVTTSD2SI $dst, $src\n\t"
11858 11885 "CMP $dst,0x80000000\n\t"
11859 11886 "JNE,s fast\n\t"
11860 11887 "SUB ESP, 8\n\t"
11861 11888 "MOVSD [ESP], $src\n\t"
11862 11889 "FLD_D [ESP]\n\t"
11863 11890 "ADD ESP, 8\n\t"
11864 11891 "CALL d2i_wrapper\n"
11865 11892 "fast:" %}
11866 11893 opcode(0x1); // double-precision conversion
11867 11894 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11868 11895 ins_pipe( pipe_slow );
11869 11896 %}
11870 11897
11871 11898 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11872 11899 predicate(UseSSE<=1);
11873 11900 match(Set dst (ConvD2L src));
11874 11901 effect( KILL cr );
11875 11902 format %{ "FLD $src\t# Convert double to long\n\t"
11876 11903 "FLDCW trunc mode\n\t"
11877 11904 "SUB ESP,8\n\t"
11878 11905 "FISTp [ESP + #0]\n\t"
11879 11906 "FLDCW std/24-bit mode\n\t"
11880 11907 "POP EAX\n\t"
11881 11908 "POP EDX\n\t"
11882 11909 "CMP EDX,0x80000000\n\t"
11883 11910 "JNE,s fast\n\t"
11884 11911 "TEST EAX,EAX\n\t"
11885 11912 "JNE,s fast\n\t"
11886 11913 "FLD $src\n\t"
11887 11914 "CALL d2l_wrapper\n"
11888 11915 "fast:" %}
11889 11916 ins_encode( Push_Reg_D(src), D2L_encoding(src) );
11890 11917 ins_pipe( pipe_slow );
11891 11918 %}
11892 11919
11893 11920 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11894 11921 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11895 11922 predicate (UseSSE>=2);
11896 11923 match(Set dst (ConvD2L src));
11897 11924 effect( KILL cr );
11898 11925 format %{ "SUB ESP,8\t# Convert double to long\n\t"
11899 11926 "MOVSD [ESP],$src\n\t"
11900 11927 "FLD_D [ESP]\n\t"
11901 11928 "FLDCW trunc mode\n\t"
11902 11929 "FISTp [ESP + #0]\n\t"
11903 11930 "FLDCW std/24-bit mode\n\t"
11904 11931 "POP EAX\n\t"
11905 11932 "POP EDX\n\t"
11906 11933 "CMP EDX,0x80000000\n\t"
11907 11934 "JNE,s fast\n\t"
11908 11935 "TEST EAX,EAX\n\t"
11909 11936 "JNE,s fast\n\t"
11910 11937 "SUB ESP,8\n\t"
11911 11938 "MOVSD [ESP],$src\n\t"
11912 11939 "FLD_D [ESP]\n\t"
11913 11940 "CALL d2l_wrapper\n"
11914 11941 "fast:" %}
11915 11942 ins_encode( XD2L_encoding(src) );
11916 11943 ins_pipe( pipe_slow );
11917 11944 %}
11918 11945
11919 11946 // Convert a double to an int. Java semantics require we do complex
11920 11947 // manglations in the corner cases. So we set the rounding mode to
11921 11948 // 'zero', store the darned double down as an int, and reset the
11922 11949 // rounding mode to 'nearest'. The hardware stores a flag value down
11923 11950 // if we would overflow or converted a NAN; we check for this and
11924 11951 // and go the slow path if needed.
11925 11952 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11926 11953 predicate(UseSSE==0);
11927 11954 match(Set dst (ConvF2I src));
11928 11955 effect( KILL tmp, KILL cr );
11929 11956 format %{ "FLD $src\t# Convert float to int \n\t"
11930 11957 "FLDCW trunc mode\n\t"
11931 11958 "SUB ESP,4\n\t"
11932 11959 "FISTp [ESP + #0]\n\t"
11933 11960 "FLDCW std/24-bit mode\n\t"
11934 11961 "POP EAX\n\t"
11935 11962 "CMP EAX,0x80000000\n\t"
11936 11963 "JNE,s fast\n\t"
11937 11964 "FLD $src\n\t"
11938 11965 "CALL d2i_wrapper\n"
11939 11966 "fast:" %}
11940 11967 // D2I_encoding works for F2I
11941 11968 ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11942 11969 ins_pipe( pipe_slow );
11943 11970 %}
11944 11971
11945 11972 // Convert a float in xmm to an int reg.
11946 11973 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11947 11974 predicate(UseSSE>=1);
11948 11975 match(Set dst (ConvF2I src));
11949 11976 effect( KILL tmp, KILL cr );
11950 11977 format %{ "CVTTSS2SI $dst, $src\n\t"
11951 11978 "CMP $dst,0x80000000\n\t"
11952 11979 "JNE,s fast\n\t"
11953 11980 "SUB ESP, 4\n\t"
11954 11981 "MOVSS [ESP], $src\n\t"
11955 11982 "FLD [ESP]\n\t"
11956 11983 "ADD ESP, 4\n\t"
11957 11984 "CALL d2i_wrapper\n"
11958 11985 "fast:" %}
11959 11986 opcode(0x0); // single-precision conversion
11960 11987 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11961 11988 ins_pipe( pipe_slow );
11962 11989 %}
11963 11990
11964 11991 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11965 11992 predicate(UseSSE==0);
11966 11993 match(Set dst (ConvF2L src));
11967 11994 effect( KILL cr );
11968 11995 format %{ "FLD $src\t# Convert float to long\n\t"
11969 11996 "FLDCW trunc mode\n\t"
11970 11997 "SUB ESP,8\n\t"
11971 11998 "FISTp [ESP + #0]\n\t"
11972 11999 "FLDCW std/24-bit mode\n\t"
11973 12000 "POP EAX\n\t"
11974 12001 "POP EDX\n\t"
11975 12002 "CMP EDX,0x80000000\n\t"
11976 12003 "JNE,s fast\n\t"
11977 12004 "TEST EAX,EAX\n\t"
11978 12005 "JNE,s fast\n\t"
11979 12006 "FLD $src\n\t"
11980 12007 "CALL d2l_wrapper\n"
11981 12008 "fast:" %}
11982 12009 // D2L_encoding works for F2L
11983 12010 ins_encode( Push_Reg_F(src), D2L_encoding(src) );
11984 12011 ins_pipe( pipe_slow );
11985 12012 %}
11986 12013
11987 12014 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11988 12015 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
11989 12016 predicate (UseSSE>=1);
11990 12017 match(Set dst (ConvF2L src));
11991 12018 effect( KILL cr );
11992 12019 format %{ "SUB ESP,8\t# Convert float to long\n\t"
11993 12020 "MOVSS [ESP],$src\n\t"
11994 12021 "FLD_S [ESP]\n\t"
11995 12022 "FLDCW trunc mode\n\t"
11996 12023 "FISTp [ESP + #0]\n\t"
11997 12024 "FLDCW std/24-bit mode\n\t"
11998 12025 "POP EAX\n\t"
11999 12026 "POP EDX\n\t"
12000 12027 "CMP EDX,0x80000000\n\t"
12001 12028 "JNE,s fast\n\t"
12002 12029 "TEST EAX,EAX\n\t"
12003 12030 "JNE,s fast\n\t"
12004 12031 "SUB ESP,4\t# Convert float to long\n\t"
12005 12032 "MOVSS [ESP],$src\n\t"
12006 12033 "FLD_S [ESP]\n\t"
12007 12034 "ADD ESP,4\n\t"
12008 12035 "CALL d2l_wrapper\n"
12009 12036 "fast:" %}
12010 12037 ins_encode( X2L_encoding(src) );
12011 12038 ins_pipe( pipe_slow );
12012 12039 %}
12013 12040
12014 12041 instruct convI2D_reg(regD dst, stackSlotI src) %{
12015 12042 predicate( UseSSE<=1 );
12016 12043 match(Set dst (ConvI2D src));
12017 12044 format %{ "FILD $src\n\t"
12018 12045 "FSTP $dst" %}
12019 12046 opcode(0xDB, 0x0); /* DB /0 */
12020 12047 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
12021 12048 ins_pipe( fpu_reg_mem );
12022 12049 %}
12023 12050
12024 12051 instruct convI2XD_reg(regXD dst, eRegI src) %{
12025 12052 predicate( UseSSE>=2 && !UseXmmI2D );
12026 12053 match(Set dst (ConvI2D src));
12027 12054 format %{ "CVTSI2SD $dst,$src" %}
12028 12055 opcode(0xF2, 0x0F, 0x2A);
12029 12056 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12030 12057 ins_pipe( pipe_slow );
12031 12058 %}
12032 12059
12033 12060 instruct convI2XD_mem(regXD dst, memory mem) %{
12034 12061 predicate( UseSSE>=2 );
12035 12062 match(Set dst (ConvI2D (LoadI mem)));
12036 12063 format %{ "CVTSI2SD $dst,$mem" %}
12037 12064 opcode(0xF2, 0x0F, 0x2A);
12038 12065 ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
12039 12066 ins_pipe( pipe_slow );
12040 12067 %}
12041 12068
12042 12069 instruct convXI2XD_reg(regXD dst, eRegI src)
12043 12070 %{
12044 12071 predicate( UseSSE>=2 && UseXmmI2D );
12045 12072 match(Set dst (ConvI2D src));
12046 12073
12047 12074 format %{ "MOVD $dst,$src\n\t"
12048 12075 "CVTDQ2PD $dst,$dst\t# i2d" %}
12049 12076 ins_encode %{
12050 12077 __ movdl($dst$$XMMRegister, $src$$Register);
12051 12078 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
12052 12079 %}
12053 12080 ins_pipe(pipe_slow); // XXX
12054 12081 %}
12055 12082
12056 12083 instruct convI2D_mem(regD dst, memory mem) %{
12057 12084 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
12058 12085 match(Set dst (ConvI2D (LoadI mem)));
12059 12086 format %{ "FILD $mem\n\t"
12060 12087 "FSTP $dst" %}
12061 12088 opcode(0xDB); /* DB /0 */
12062 12089 ins_encode( OpcP, RMopc_Mem(0x00,mem),
12063 12090 Pop_Reg_D(dst));
12064 12091 ins_pipe( fpu_reg_mem );
12065 12092 %}
12066 12093
12067 12094 // Convert a byte to a float; no rounding step needed.
12068 12095 instruct conv24I2F_reg(regF dst, stackSlotI src) %{
12069 12096 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
12070 12097 match(Set dst (ConvI2F src));
12071 12098 format %{ "FILD $src\n\t"
12072 12099 "FSTP $dst" %}
12073 12100
12074 12101 opcode(0xDB, 0x0); /* DB /0 */
12075 12102 ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
12076 12103 ins_pipe( fpu_reg_mem );
12077 12104 %}
12078 12105
12079 12106 // In 24-bit mode, force exponent rounding by storing back out
12080 12107 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
12081 12108 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12082 12109 match(Set dst (ConvI2F src));
12083 12110 ins_cost(200);
12084 12111 format %{ "FILD $src\n\t"
12085 12112 "FSTP_S $dst" %}
12086 12113 opcode(0xDB, 0x0); /* DB /0 */
12087 12114 ins_encode( Push_Mem_I(src),
12088 12115 Pop_Mem_F(dst));
12089 12116 ins_pipe( fpu_mem_mem );
12090 12117 %}
12091 12118
12092 12119 // In 24-bit mode, force exponent rounding by storing back out
12093 12120 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
12094 12121 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12095 12122 match(Set dst (ConvI2F (LoadI mem)));
12096 12123 ins_cost(200);
12097 12124 format %{ "FILD $mem\n\t"
12098 12125 "FSTP_S $dst" %}
12099 12126 opcode(0xDB); /* DB /0 */
12100 12127 ins_encode( OpcP, RMopc_Mem(0x00,mem),
12101 12128 Pop_Mem_F(dst));
12102 12129 ins_pipe( fpu_mem_mem );
12103 12130 %}
12104 12131
12105 12132 // This instruction does not round to 24-bits
12106 12133 instruct convI2F_reg(regF dst, stackSlotI src) %{
12107 12134 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12108 12135 match(Set dst (ConvI2F src));
12109 12136 format %{ "FILD $src\n\t"
12110 12137 "FSTP $dst" %}
12111 12138 opcode(0xDB, 0x0); /* DB /0 */
12112 12139 ins_encode( Push_Mem_I(src),
12113 12140 Pop_Reg_F(dst));
12114 12141 ins_pipe( fpu_reg_mem );
12115 12142 %}
12116 12143
12117 12144 // This instruction does not round to 24-bits
12118 12145 instruct convI2F_mem(regF dst, memory mem) %{
12119 12146 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12120 12147 match(Set dst (ConvI2F (LoadI mem)));
12121 12148 format %{ "FILD $mem\n\t"
12122 12149 "FSTP $dst" %}
12123 12150 opcode(0xDB); /* DB /0 */
12124 12151 ins_encode( OpcP, RMopc_Mem(0x00,mem),
12125 12152 Pop_Reg_F(dst));
12126 12153 ins_pipe( fpu_reg_mem );
12127 12154 %}
12128 12155
12129 12156 // Convert an int to a float in xmm; no rounding step needed.
12130 12157 instruct convI2X_reg(regX dst, eRegI src) %{
12131 12158 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
12132 12159 match(Set dst (ConvI2F src));
12133 12160 format %{ "CVTSI2SS $dst, $src" %}
12134 12161
12135 12162 opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */
12136 12163 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12137 12164 ins_pipe( pipe_slow );
12138 12165 %}
12139 12166
12140 12167 instruct convXI2X_reg(regX dst, eRegI src)
12141 12168 %{
12142 12169 predicate( UseSSE>=2 && UseXmmI2F );
12143 12170 match(Set dst (ConvI2F src));
12144 12171
12145 12172 format %{ "MOVD $dst,$src\n\t"
12146 12173 "CVTDQ2PS $dst,$dst\t# i2f" %}
12147 12174 ins_encode %{
12148 12175 __ movdl($dst$$XMMRegister, $src$$Register);
12149 12176 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
12150 12177 %}
12151 12178 ins_pipe(pipe_slow); // XXX
12152 12179 %}
12153 12180
12154 12181 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
12155 12182 match(Set dst (ConvI2L src));
12156 12183 effect(KILL cr);
12157 12184 ins_cost(375);
12158 12185 format %{ "MOV $dst.lo,$src\n\t"
12159 12186 "MOV $dst.hi,$src\n\t"
12160 12187 "SAR $dst.hi,31" %}
12161 12188 ins_encode(convert_int_long(dst,src));
12162 12189 ins_pipe( ialu_reg_reg_long );
12163 12190 %}
12164 12191
12165 12192 // Zero-extend convert int to long
12166 12193 instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
12167 12194 match(Set dst (AndL (ConvI2L src) mask) );
12168 12195 effect( KILL flags );
12169 12196 ins_cost(250);
12170 12197 format %{ "MOV $dst.lo,$src\n\t"
12171 12198 "XOR $dst.hi,$dst.hi" %}
12172 12199 opcode(0x33); // XOR
12173 12200 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12174 12201 ins_pipe( ialu_reg_reg_long );
12175 12202 %}
12176 12203
12177 12204 // Zero-extend long
12178 12205 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
12179 12206 match(Set dst (AndL src mask) );
12180 12207 effect( KILL flags );
12181 12208 ins_cost(250);
12182 12209 format %{ "MOV $dst.lo,$src.lo\n\t"
12183 12210 "XOR $dst.hi,$dst.hi\n\t" %}
12184 12211 opcode(0x33); // XOR
12185 12212 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12186 12213 ins_pipe( ialu_reg_reg_long );
12187 12214 %}
12188 12215
12189 12216 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
12190 12217 predicate (UseSSE<=1);
12191 12218 match(Set dst (ConvL2D src));
12192 12219 effect( KILL cr );
12193 12220 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
12194 12221 "PUSH $src.lo\n\t"
12195 12222 "FILD ST,[ESP + #0]\n\t"
12196 12223 "ADD ESP,8\n\t"
12197 12224 "FSTP_D $dst\t# D-round" %}
12198 12225 opcode(0xDF, 0x5); /* DF /5 */
12199 12226 ins_encode(convert_long_double(src), Pop_Mem_D(dst));
12200 12227 ins_pipe( pipe_slow );
12201 12228 %}
12202 12229
12203 12230 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
12204 12231 predicate (UseSSE>=2);
12205 12232 match(Set dst (ConvL2D src));
12206 12233 effect( KILL cr );
12207 12234 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
12208 12235 "PUSH $src.lo\n\t"
12209 12236 "FILD_D [ESP]\n\t"
12210 12237 "FSTP_D [ESP]\n\t"
12211 12238 "MOVSD $dst,[ESP]\n\t"
12212 12239 "ADD ESP,8" %}
12213 12240 opcode(0xDF, 0x5); /* DF /5 */
12214 12241 ins_encode(convert_long_double2(src), Push_ResultXD(dst));
12215 12242 ins_pipe( pipe_slow );
12216 12243 %}
12217 12244
12218 12245 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
12219 12246 predicate (UseSSE>=1);
12220 12247 match(Set dst (ConvL2F src));
12221 12248 effect( KILL cr );
12222 12249 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
12223 12250 "PUSH $src.lo\n\t"
12224 12251 "FILD_D [ESP]\n\t"
12225 12252 "FSTP_S [ESP]\n\t"
12226 12253 "MOVSS $dst,[ESP]\n\t"
12227 12254 "ADD ESP,8" %}
12228 12255 opcode(0xDF, 0x5); /* DF /5 */
12229 12256 ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
12230 12257 ins_pipe( pipe_slow );
12231 12258 %}
12232 12259
12233 12260 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
12234 12261 match(Set dst (ConvL2F src));
12235 12262 effect( KILL cr );
12236 12263 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
12237 12264 "PUSH $src.lo\n\t"
12238 12265 "FILD ST,[ESP + #0]\n\t"
12239 12266 "ADD ESP,8\n\t"
12240 12267 "FSTP_S $dst\t# F-round" %}
12241 12268 opcode(0xDF, 0x5); /* DF /5 */
12242 12269 ins_encode(convert_long_double(src), Pop_Mem_F(dst));
12243 12270 ins_pipe( pipe_slow );
12244 12271 %}
12245 12272
12246 12273 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12247 12274 match(Set dst (ConvL2I src));
12248 12275 effect( DEF dst, USE src );
12249 12276 format %{ "MOV $dst,$src.lo" %}
12250 12277 ins_encode(enc_CopyL_Lo(dst,src));
12251 12278 ins_pipe( ialu_reg_reg );
12252 12279 %}
12253 12280
12254 12281
12255 12282 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12256 12283 match(Set dst (MoveF2I src));
12257 12284 effect( DEF dst, USE src );
12258 12285 ins_cost(100);
12259 12286 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
12260 12287 opcode(0x8B);
12261 12288 ins_encode( OpcP, RegMem(dst,src));
12262 12289 ins_pipe( ialu_reg_mem );
12263 12290 %}
12264 12291
12265 12292 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12266 12293 predicate(UseSSE==0);
12267 12294 match(Set dst (MoveF2I src));
12268 12295 effect( DEF dst, USE src );
12269 12296
12270 12297 ins_cost(125);
12271 12298 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
12272 12299 ins_encode( Pop_Mem_Reg_F(dst, src) );
12273 12300 ins_pipe( fpu_mem_reg );
12274 12301 %}
12275 12302
12276 12303 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12277 12304 predicate(UseSSE>=1);
12278 12305 match(Set dst (MoveF2I src));
12279 12306 effect( DEF dst, USE src );
12280 12307
12281 12308 ins_cost(95);
12282 12309 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
12283 12310 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
12284 12311 ins_pipe( pipe_slow );
12285 12312 %}
12286 12313
12287 12314 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12288 12315 predicate(UseSSE>=2);
12289 12316 match(Set dst (MoveF2I src));
12290 12317 effect( DEF dst, USE src );
12291 12318 ins_cost(85);
12292 12319 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
12293 12320 ins_encode( MovX2I_reg(dst, src));
12294 12321 ins_pipe( pipe_slow );
12295 12322 %}
12296 12323
12297 12324 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12298 12325 match(Set dst (MoveI2F src));
12299 12326 effect( DEF dst, USE src );
12300 12327
12301 12328 ins_cost(100);
12302 12329 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
12303 12330 opcode(0x89);
12304 12331 ins_encode( OpcPRegSS( dst, src ) );
12305 12332 ins_pipe( ialu_mem_reg );
12306 12333 %}
12307 12334
12308 12335
12309 12336 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12310 12337 predicate(UseSSE==0);
12311 12338 match(Set dst (MoveI2F src));
12312 12339 effect(DEF dst, USE src);
12313 12340
12314 12341 ins_cost(125);
12315 12342 format %{ "FLD_S $src\n\t"
12316 12343 "FSTP $dst\t# MoveI2F_stack_reg" %}
12317 12344 opcode(0xD9); /* D9 /0, FLD m32real */
12318 12345 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12319 12346 Pop_Reg_F(dst) );
12320 12347 ins_pipe( fpu_reg_mem );
12321 12348 %}
12322 12349
12323 12350 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12324 12351 predicate(UseSSE>=1);
12325 12352 match(Set dst (MoveI2F src));
12326 12353 effect( DEF dst, USE src );
12327 12354
12328 12355 ins_cost(95);
12329 12356 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
12330 12357 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12331 12358 ins_pipe( pipe_slow );
12332 12359 %}
12333 12360
12334 12361 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12335 12362 predicate(UseSSE>=2);
12336 12363 match(Set dst (MoveI2F src));
12337 12364 effect( DEF dst, USE src );
12338 12365
12339 12366 ins_cost(85);
12340 12367 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
12341 12368 ins_encode( MovI2X_reg(dst, src) );
12342 12369 ins_pipe( pipe_slow );
12343 12370 %}
12344 12371
12345 12372 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12346 12373 match(Set dst (MoveD2L src));
12347 12374 effect(DEF dst, USE src);
12348 12375
12349 12376 ins_cost(250);
12350 12377 format %{ "MOV $dst.lo,$src\n\t"
12351 12378 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12352 12379 opcode(0x8B, 0x8B);
12353 12380 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12354 12381 ins_pipe( ialu_mem_long_reg );
12355 12382 %}
12356 12383
12357 12384 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12358 12385 predicate(UseSSE<=1);
12359 12386 match(Set dst (MoveD2L src));
12360 12387 effect(DEF dst, USE src);
12361 12388
12362 12389 ins_cost(125);
12363 12390 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
12364 12391 ins_encode( Pop_Mem_Reg_D(dst, src) );
12365 12392 ins_pipe( fpu_mem_reg );
12366 12393 %}
12367 12394
12368 12395 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12369 12396 predicate(UseSSE>=2);
12370 12397 match(Set dst (MoveD2L src));
12371 12398 effect(DEF dst, USE src);
12372 12399 ins_cost(95);
12373 12400
12374 12401 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
12375 12402 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
12376 12403 ins_pipe( pipe_slow );
12377 12404 %}
12378 12405
12379 12406 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12380 12407 predicate(UseSSE>=2);
12381 12408 match(Set dst (MoveD2L src));
12382 12409 effect(DEF dst, USE src, TEMP tmp);
12383 12410 ins_cost(85);
12384 12411 format %{ "MOVD $dst.lo,$src\n\t"
12385 12412 "PSHUFLW $tmp,$src,0x4E\n\t"
12386 12413 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12387 12414 ins_encode( MovXD2L_reg(dst, src, tmp) );
12388 12415 ins_pipe( pipe_slow );
12389 12416 %}
12390 12417
12391 12418 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12392 12419 match(Set dst (MoveL2D src));
12393 12420 effect(DEF dst, USE src);
12394 12421
12395 12422 ins_cost(200);
12396 12423 format %{ "MOV $dst,$src.lo\n\t"
12397 12424 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12398 12425 opcode(0x89, 0x89);
12399 12426 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12400 12427 ins_pipe( ialu_mem_long_reg );
12401 12428 %}
12402 12429
12403 12430
12404 12431 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12405 12432 predicate(UseSSE<=1);
12406 12433 match(Set dst (MoveL2D src));
12407 12434 effect(DEF dst, USE src);
12408 12435 ins_cost(125);
12409 12436
12410 12437 format %{ "FLD_D $src\n\t"
12411 12438 "FSTP $dst\t# MoveL2D_stack_reg" %}
12412 12439 opcode(0xDD); /* DD /0, FLD m64real */
12413 12440 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12414 12441 Pop_Reg_D(dst) );
12415 12442 ins_pipe( fpu_reg_mem );
12416 12443 %}
12417 12444
12418 12445
12419 12446 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12420 12447 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12421 12448 match(Set dst (MoveL2D src));
12422 12449 effect(DEF dst, USE src);
12423 12450
12424 12451 ins_cost(95);
12425 12452 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12426 12453 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12427 12454 ins_pipe( pipe_slow );
12428 12455 %}
12429 12456
12430 12457 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12431 12458 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12432 12459 match(Set dst (MoveL2D src));
12433 12460 effect(DEF dst, USE src);
12434 12461
12435 12462 ins_cost(95);
12436 12463 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12437 12464 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
12438 12465 ins_pipe( pipe_slow );
12439 12466 %}
12440 12467
12441 12468 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12442 12469 predicate(UseSSE>=2);
12443 12470 match(Set dst (MoveL2D src));
12444 12471 effect(TEMP dst, USE src, TEMP tmp);
12445 12472 ins_cost(85);
12446 12473 format %{ "MOVD $dst,$src.lo\n\t"
12447 12474 "MOVD $tmp,$src.hi\n\t"
12448 12475 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12449 12476 ins_encode( MovL2XD_reg(dst, src, tmp) );
12450 12477 ins_pipe( pipe_slow );
12451 12478 %}
12452 12479
12453 12480 // Replicate scalar to packed byte (1 byte) values in xmm
12454 12481 instruct Repl8B_reg(regXD dst, regXD src) %{
12455 12482 predicate(UseSSE>=2);
12456 12483 match(Set dst (Replicate8B src));
12457 12484 format %{ "MOVDQA $dst,$src\n\t"
12458 12485 "PUNPCKLBW $dst,$dst\n\t"
12459 12486 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12460 12487 ins_encode( pshufd_8x8(dst, src));
12461 12488 ins_pipe( pipe_slow );
12462 12489 %}
12463 12490
12464 12491 // Replicate scalar to packed byte (1 byte) values in xmm
12465 12492 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12466 12493 predicate(UseSSE>=2);
12467 12494 match(Set dst (Replicate8B src));
12468 12495 format %{ "MOVD $dst,$src\n\t"
12469 12496 "PUNPCKLBW $dst,$dst\n\t"
12470 12497 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12471 12498 ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
12472 12499 ins_pipe( pipe_slow );
12473 12500 %}
12474 12501
12475 12502 // Replicate scalar zero to packed byte (1 byte) values in xmm
12476 12503 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12477 12504 predicate(UseSSE>=2);
12478 12505 match(Set dst (Replicate8B zero));
12479 12506 format %{ "PXOR $dst,$dst\t! replicate8B" %}
12480 12507 ins_encode( pxor(dst, dst));
12481 12508 ins_pipe( fpu_reg_reg );
12482 12509 %}
12483 12510
12484 12511 // Replicate scalar to packed shore (2 byte) values in xmm
12485 12512 instruct Repl4S_reg(regXD dst, regXD src) %{
12486 12513 predicate(UseSSE>=2);
12487 12514 match(Set dst (Replicate4S src));
12488 12515 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12489 12516 ins_encode( pshufd_4x16(dst, src));
12490 12517 ins_pipe( fpu_reg_reg );
12491 12518 %}
12492 12519
12493 12520 // Replicate scalar to packed shore (2 byte) values in xmm
12494 12521 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12495 12522 predicate(UseSSE>=2);
12496 12523 match(Set dst (Replicate4S src));
12497 12524 format %{ "MOVD $dst,$src\n\t"
12498 12525 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12499 12526 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12500 12527 ins_pipe( fpu_reg_reg );
12501 12528 %}
12502 12529
12503 12530 // Replicate scalar zero to packed short (2 byte) values in xmm
12504 12531 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12505 12532 predicate(UseSSE>=2);
12506 12533 match(Set dst (Replicate4S zero));
12507 12534 format %{ "PXOR $dst,$dst\t! replicate4S" %}
12508 12535 ins_encode( pxor(dst, dst));
12509 12536 ins_pipe( fpu_reg_reg );
12510 12537 %}
12511 12538
12512 12539 // Replicate scalar to packed char (2 byte) values in xmm
12513 12540 instruct Repl4C_reg(regXD dst, regXD src) %{
12514 12541 predicate(UseSSE>=2);
12515 12542 match(Set dst (Replicate4C src));
12516 12543 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12517 12544 ins_encode( pshufd_4x16(dst, src));
12518 12545 ins_pipe( fpu_reg_reg );
12519 12546 %}
12520 12547
12521 12548 // Replicate scalar to packed char (2 byte) values in xmm
12522 12549 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12523 12550 predicate(UseSSE>=2);
12524 12551 match(Set dst (Replicate4C src));
12525 12552 format %{ "MOVD $dst,$src\n\t"
12526 12553 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12527 12554 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12528 12555 ins_pipe( fpu_reg_reg );
12529 12556 %}
12530 12557
12531 12558 // Replicate scalar zero to packed char (2 byte) values in xmm
12532 12559 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12533 12560 predicate(UseSSE>=2);
12534 12561 match(Set dst (Replicate4C zero));
12535 12562 format %{ "PXOR $dst,$dst\t! replicate4C" %}
12536 12563 ins_encode( pxor(dst, dst));
12537 12564 ins_pipe( fpu_reg_reg );
12538 12565 %}
12539 12566
12540 12567 // Replicate scalar to packed integer (4 byte) values in xmm
12541 12568 instruct Repl2I_reg(regXD dst, regXD src) %{
12542 12569 predicate(UseSSE>=2);
12543 12570 match(Set dst (Replicate2I src));
12544 12571 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12545 12572 ins_encode( pshufd(dst, src, 0x00));
12546 12573 ins_pipe( fpu_reg_reg );
12547 12574 %}
12548 12575
12549 12576 // Replicate scalar to packed integer (4 byte) values in xmm
12550 12577 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12551 12578 predicate(UseSSE>=2);
12552 12579 match(Set dst (Replicate2I src));
12553 12580 format %{ "MOVD $dst,$src\n\t"
12554 12581 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12555 12582 ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12556 12583 ins_pipe( fpu_reg_reg );
12557 12584 %}
12558 12585
12559 12586 // Replicate scalar zero to packed integer (2 byte) values in xmm
12560 12587 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12561 12588 predicate(UseSSE>=2);
12562 12589 match(Set dst (Replicate2I zero));
12563 12590 format %{ "PXOR $dst,$dst\t! replicate2I" %}
12564 12591 ins_encode( pxor(dst, dst));
12565 12592 ins_pipe( fpu_reg_reg );
12566 12593 %}
12567 12594
12568 12595 // Replicate scalar to packed single precision floating point values in xmm
12569 12596 instruct Repl2F_reg(regXD dst, regXD src) %{
12570 12597 predicate(UseSSE>=2);
12571 12598 match(Set dst (Replicate2F src));
12572 12599 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12573 12600 ins_encode( pshufd(dst, src, 0xe0));
12574 12601 ins_pipe( fpu_reg_reg );
12575 12602 %}
12576 12603
12577 12604 // Replicate scalar to packed single precision floating point values in xmm
12578 12605 instruct Repl2F_regX(regXD dst, regX src) %{
12579 12606 predicate(UseSSE>=2);
12580 12607 match(Set dst (Replicate2F src));
12581 12608 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12582 12609 ins_encode( pshufd(dst, src, 0xe0));
12583 12610 ins_pipe( fpu_reg_reg );
12584 12611 %}
12585 12612
12586 12613 // Replicate scalar to packed single precision floating point values in xmm
12587 12614 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12588 12615 predicate(UseSSE>=2);
12589 12616 match(Set dst (Replicate2F zero));
12590 12617 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12591 12618 ins_encode( pxor(dst, dst));
12592 12619 ins_pipe( fpu_reg_reg );
12593 12620 %}
12594 12621
12595 12622 // =======================================================================
12596 12623 // fast clearing of an array
12597 12624 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12598 12625 match(Set dummy (ClearArray cnt base));
12599 12626 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12600 12627 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
12601 12628 "XOR EAX,EAX\n\t"
12602 12629 "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12603 12630 opcode(0,0x4);
12604 12631 ins_encode( Opcode(0xD1), RegOpc(ECX),
12605 12632 OpcRegReg(0x33,EAX,EAX),
12606 12633 Opcode(0xF3), Opcode(0xAB) );
12607 12634 ins_pipe( pipe_slow );
12608 12635 %}
12609 12636
12610 12637 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2,
12611 12638 eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{
12612 12639 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12613 12640 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12614 12641
12615 12642 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1, $tmp2" %}
12616 12643 ins_encode %{
12617 12644 __ string_compare($str1$$Register, $str2$$Register,
12618 12645 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12619 12646 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12620 12647 %}
12621 12648 ins_pipe( pipe_slow );
12622 12649 %}
12623 12650
12624 12651 // fast string equals
12625 12652 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12626 12653 regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12627 12654 match(Set result (StrEquals (Binary str1 str2) cnt));
12628 12655 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12629 12656
12630 12657 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12631 12658 ins_encode %{
12632 12659 __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12633 12660 $cnt$$Register, $result$$Register, $tmp3$$Register,
12634 12661 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12635 12662 %}
12636 12663 ins_pipe( pipe_slow );
12637 12664 %}
12638 12665
12639 12666 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12640 12667 eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12641 12668 predicate(UseSSE42Intrinsics);
12642 12669 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12643 12670 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
12644 12671
12645 12672 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp2, $tmp1" %}
12646 12673 ins_encode %{
12647 12674 __ string_indexof($str1$$Register, $str2$$Register,
12648 12675 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12649 12676 $tmp1$$XMMRegister, $tmp2$$Register);
12650 12677 %}
12651 12678 ins_pipe( pipe_slow );
12652 12679 %}
12653 12680
12654 12681 // fast array equals
12655 12682 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12656 12683 regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12657 12684 %{
12658 12685 match(Set result (AryEq ary1 ary2));
12659 12686 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12660 12687 //ins_cost(300);
12661 12688
12662 12689 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12663 12690 ins_encode %{
12664 12691 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12665 12692 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12666 12693 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12667 12694 %}
12668 12695 ins_pipe( pipe_slow );
12669 12696 %}
12670 12697
12671 12698 //----------Control Flow Instructions------------------------------------------
12672 12699 // Signed compare Instructions
12673 12700 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12674 12701 match(Set cr (CmpI op1 op2));
12675 12702 effect( DEF cr, USE op1, USE op2 );
12676 12703 format %{ "CMP $op1,$op2" %}
12677 12704 opcode(0x3B); /* Opcode 3B /r */
12678 12705 ins_encode( OpcP, RegReg( op1, op2) );
12679 12706 ins_pipe( ialu_cr_reg_reg );
12680 12707 %}
12681 12708
12682 12709 instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12683 12710 match(Set cr (CmpI op1 op2));
12684 12711 effect( DEF cr, USE op1 );
12685 12712 format %{ "CMP $op1,$op2" %}
12686 12713 opcode(0x81,0x07); /* Opcode 81 /7 */
12687 12714 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
12688 12715 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12689 12716 ins_pipe( ialu_cr_reg_imm );
12690 12717 %}
12691 12718
12692 12719 // Cisc-spilled version of cmpI_eReg
12693 12720 instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
12694 12721 match(Set cr (CmpI op1 (LoadI op2)));
12695 12722
12696 12723 format %{ "CMP $op1,$op2" %}
12697 12724 ins_cost(500);
12698 12725 opcode(0x3B); /* Opcode 3B /r */
12699 12726 ins_encode( OpcP, RegMem( op1, op2) );
12700 12727 ins_pipe( ialu_cr_reg_mem );
12701 12728 %}
12702 12729
12703 12730 instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
12704 12731 match(Set cr (CmpI src zero));
12705 12732 effect( DEF cr, USE src );
12706 12733
12707 12734 format %{ "TEST $src,$src" %}
12708 12735 opcode(0x85);
12709 12736 ins_encode( OpcP, RegReg( src, src ) );
12710 12737 ins_pipe( ialu_cr_reg_imm );
12711 12738 %}
12712 12739
12713 12740 instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
12714 12741 match(Set cr (CmpI (AndI src con) zero));
12715 12742
12716 12743 format %{ "TEST $src,$con" %}
12717 12744 opcode(0xF7,0x00);
12718 12745 ins_encode( OpcP, RegOpc(src), Con32(con) );
12719 12746 ins_pipe( ialu_cr_reg_imm );
12720 12747 %}
12721 12748
12722 12749 instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
12723 12750 match(Set cr (CmpI (AndI src mem) zero));
12724 12751
12725 12752 format %{ "TEST $src,$mem" %}
12726 12753 opcode(0x85);
12727 12754 ins_encode( OpcP, RegMem( src, mem ) );
12728 12755 ins_pipe( ialu_cr_reg_mem );
12729 12756 %}
12730 12757
12731 12758 // Unsigned compare Instructions; really, same as signed except they
12732 12759 // produce an eFlagsRegU instead of eFlagsReg.
12733 12760 instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12734 12761 match(Set cr (CmpU op1 op2));
12735 12762
12736 12763 format %{ "CMPu $op1,$op2" %}
12737 12764 opcode(0x3B); /* Opcode 3B /r */
12738 12765 ins_encode( OpcP, RegReg( op1, op2) );
12739 12766 ins_pipe( ialu_cr_reg_reg );
12740 12767 %}
12741 12768
12742 12769 instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12743 12770 match(Set cr (CmpU op1 op2));
12744 12771
12745 12772 format %{ "CMPu $op1,$op2" %}
12746 12773 opcode(0x81,0x07); /* Opcode 81 /7 */
12747 12774 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12748 12775 ins_pipe( ialu_cr_reg_imm );
12749 12776 %}
12750 12777
12751 12778 // // Cisc-spilled version of cmpU_eReg
12752 12779 instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12753 12780 match(Set cr (CmpU op1 (LoadI op2)));
12754 12781
12755 12782 format %{ "CMPu $op1,$op2" %}
12756 12783 ins_cost(500);
12757 12784 opcode(0x3B); /* Opcode 3B /r */
12758 12785 ins_encode( OpcP, RegMem( op1, op2) );
12759 12786 ins_pipe( ialu_cr_reg_mem );
12760 12787 %}
12761 12788
12762 12789 // // Cisc-spilled version of cmpU_eReg
12763 12790 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12764 12791 // match(Set cr (CmpU (LoadI op1) op2));
12765 12792 //
12766 12793 // format %{ "CMPu $op1,$op2" %}
12767 12794 // ins_cost(500);
12768 12795 // opcode(0x39); /* Opcode 39 /r */
12769 12796 // ins_encode( OpcP, RegMem( op1, op2) );
12770 12797 //%}
12771 12798
12772 12799 instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12773 12800 match(Set cr (CmpU src zero));
12774 12801
12775 12802 format %{ "TESTu $src,$src" %}
12776 12803 opcode(0x85);
12777 12804 ins_encode( OpcP, RegReg( src, src ) );
12778 12805 ins_pipe( ialu_cr_reg_imm );
12779 12806 %}
12780 12807
12781 12808 // Unsigned pointer compare Instructions
12782 12809 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12783 12810 match(Set cr (CmpP op1 op2));
12784 12811
12785 12812 format %{ "CMPu $op1,$op2" %}
12786 12813 opcode(0x3B); /* Opcode 3B /r */
12787 12814 ins_encode( OpcP, RegReg( op1, op2) );
12788 12815 ins_pipe( ialu_cr_reg_reg );
12789 12816 %}
12790 12817
12791 12818 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12792 12819 match(Set cr (CmpP op1 op2));
12793 12820
12794 12821 format %{ "CMPu $op1,$op2" %}
12795 12822 opcode(0x81,0x07); /* Opcode 81 /7 */
12796 12823 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12797 12824 ins_pipe( ialu_cr_reg_imm );
12798 12825 %}
12799 12826
12800 12827 // // Cisc-spilled version of cmpP_eReg
12801 12828 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12802 12829 match(Set cr (CmpP op1 (LoadP op2)));
12803 12830
12804 12831 format %{ "CMPu $op1,$op2" %}
12805 12832 ins_cost(500);
12806 12833 opcode(0x3B); /* Opcode 3B /r */
12807 12834 ins_encode( OpcP, RegMem( op1, op2) );
12808 12835 ins_pipe( ialu_cr_reg_mem );
12809 12836 %}
12810 12837
12811 12838 // // Cisc-spilled version of cmpP_eReg
12812 12839 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12813 12840 // match(Set cr (CmpP (LoadP op1) op2));
12814 12841 //
12815 12842 // format %{ "CMPu $op1,$op2" %}
12816 12843 // ins_cost(500);
12817 12844 // opcode(0x39); /* Opcode 39 /r */
12818 12845 // ins_encode( OpcP, RegMem( op1, op2) );
12819 12846 //%}
12820 12847
12821 12848 // Compare raw pointer (used in out-of-heap check).
12822 12849 // Only works because non-oop pointers must be raw pointers
12823 12850 // and raw pointers have no anti-dependencies.
12824 12851 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12825 12852 predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12826 12853 match(Set cr (CmpP op1 (LoadP op2)));
12827 12854
12828 12855 format %{ "CMPu $op1,$op2" %}
12829 12856 opcode(0x3B); /* Opcode 3B /r */
12830 12857 ins_encode( OpcP, RegMem( op1, op2) );
12831 12858 ins_pipe( ialu_cr_reg_mem );
12832 12859 %}
12833 12860
12834 12861 //
12835 12862 // This will generate a signed flags result. This should be ok
12836 12863 // since any compare to a zero should be eq/neq.
12837 12864 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12838 12865 match(Set cr (CmpP src zero));
12839 12866
12840 12867 format %{ "TEST $src,$src" %}
12841 12868 opcode(0x85);
12842 12869 ins_encode( OpcP, RegReg( src, src ) );
12843 12870 ins_pipe( ialu_cr_reg_imm );
12844 12871 %}
12845 12872
12846 12873 // Cisc-spilled version of testP_reg
12847 12874 // This will generate a signed flags result. This should be ok
12848 12875 // since any compare to a zero should be eq/neq.
12849 12876 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12850 12877 match(Set cr (CmpP (LoadP op) zero));
12851 12878
12852 12879 format %{ "TEST $op,0xFFFFFFFF" %}
12853 12880 ins_cost(500);
12854 12881 opcode(0xF7); /* Opcode F7 /0 */
12855 12882 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12856 12883 ins_pipe( ialu_cr_reg_imm );
12857 12884 %}
12858 12885
12859 12886 // Yanked all unsigned pointer compare operations.
12860 12887 // Pointer compares are done with CmpP which is already unsigned.
12861 12888
12862 12889 //----------Max and Min--------------------------------------------------------
12863 12890 // Min Instructions
12864 12891 ////
12865 12892 // *** Min and Max using the conditional move are slower than the
12866 12893 // *** branch version on a Pentium III.
12867 12894 // // Conditional move for min
12868 12895 //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12869 12896 // effect( USE_DEF op2, USE op1, USE cr );
12870 12897 // format %{ "CMOVlt $op2,$op1\t! min" %}
12871 12898 // opcode(0x4C,0x0F);
12872 12899 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12873 12900 // ins_pipe( pipe_cmov_reg );
12874 12901 //%}
12875 12902 //
12876 12903 //// Min Register with Register (P6 version)
12877 12904 //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12878 12905 // predicate(VM_Version::supports_cmov() );
12879 12906 // match(Set op2 (MinI op1 op2));
12880 12907 // ins_cost(200);
12881 12908 // expand %{
12882 12909 // eFlagsReg cr;
12883 12910 // compI_eReg(cr,op1,op2);
12884 12911 // cmovI_reg_lt(op2,op1,cr);
12885 12912 // %}
12886 12913 //%}
12887 12914
12888 12915 // Min Register with Register (generic version)
12889 12916 instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12890 12917 match(Set dst (MinI dst src));
12891 12918 effect(KILL flags);
12892 12919 ins_cost(300);
12893 12920
12894 12921 format %{ "MIN $dst,$src" %}
12895 12922 opcode(0xCC);
12896 12923 ins_encode( min_enc(dst,src) );
12897 12924 ins_pipe( pipe_slow );
12898 12925 %}
12899 12926
12900 12927 // Max Register with Register
12901 12928 // *** Min and Max using the conditional move are slower than the
12902 12929 // *** branch version on a Pentium III.
12903 12930 // // Conditional move for max
12904 12931 //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12905 12932 // effect( USE_DEF op2, USE op1, USE cr );
12906 12933 // format %{ "CMOVgt $op2,$op1\t! max" %}
12907 12934 // opcode(0x4F,0x0F);
12908 12935 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12909 12936 // ins_pipe( pipe_cmov_reg );
12910 12937 //%}
12911 12938 //
12912 12939 // // Max Register with Register (P6 version)
12913 12940 //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12914 12941 // predicate(VM_Version::supports_cmov() );
12915 12942 // match(Set op2 (MaxI op1 op2));
12916 12943 // ins_cost(200);
12917 12944 // expand %{
12918 12945 // eFlagsReg cr;
12919 12946 // compI_eReg(cr,op1,op2);
12920 12947 // cmovI_reg_gt(op2,op1,cr);
12921 12948 // %}
12922 12949 //%}
12923 12950
12924 12951 // Max Register with Register (generic version)
12925 12952 instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12926 12953 match(Set dst (MaxI dst src));
12927 12954 effect(KILL flags);
12928 12955 ins_cost(300);
12929 12956
12930 12957 format %{ "MAX $dst,$src" %}
12931 12958 opcode(0xCC);
↓ open down ↓ |
1337 lines elided |
↑ open up ↑ |
12932 12959 ins_encode( max_enc(dst,src) );
12933 12960 ins_pipe( pipe_slow );
12934 12961 %}
12935 12962
12936 12963 // ============================================================================
12937 12964 // Branch Instructions
12938 12965 // Jump Table
12939 12966 instruct jumpXtnd(eRegI switch_val) %{
12940 12967 match(Jump switch_val);
12941 12968 ins_cost(350);
12942 -
12943 - format %{ "JMP [table_base](,$switch_val,1)\n\t" %}
12944 -
12969 + format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %}
12945 12970 ins_encode %{
12946 - address table_base = __ address_table_constant(_index2label);
12947 -
12948 12971 // Jump to Address(table_base + switch_reg)
12949 - InternalAddress table(table_base);
12950 12972 Address index(noreg, $switch_val$$Register, Address::times_1);
12951 - __ jump(ArrayAddress(table, index));
12973 + __ jump(ArrayAddress($constantaddress, index));
12952 12974 %}
12953 12975 ins_pc_relative(1);
12954 12976 ins_pipe(pipe_jmp);
12955 12977 %}
12956 12978
12957 12979 // Jump Direct - Label defines a relative address from JMP+1
12958 12980 instruct jmpDir(label labl) %{
12959 12981 match(Goto);
12960 12982 effect(USE labl);
12961 12983
12962 12984 ins_cost(300);
12963 12985 format %{ "JMP $labl" %}
12964 12986 size(5);
12965 12987 opcode(0xE9);
12966 12988 ins_encode( OpcP, Lbl( labl ) );
12967 12989 ins_pipe( pipe_jmp );
12968 12990 ins_pc_relative(1);
12969 12991 %}
12970 12992
12971 12993 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12972 12994 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12973 12995 match(If cop cr);
12974 12996 effect(USE labl);
12975 12997
12976 12998 ins_cost(300);
12977 12999 format %{ "J$cop $labl" %}
12978 13000 size(6);
12979 13001 opcode(0x0F, 0x80);
12980 13002 ins_encode( Jcc( cop, labl) );
12981 13003 ins_pipe( pipe_jcc );
12982 13004 ins_pc_relative(1);
12983 13005 %}
12984 13006
12985 13007 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12986 13008 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12987 13009 match(CountedLoopEnd cop cr);
12988 13010 effect(USE labl);
12989 13011
12990 13012 ins_cost(300);
12991 13013 format %{ "J$cop $labl\t# Loop end" %}
12992 13014 size(6);
12993 13015 opcode(0x0F, 0x80);
12994 13016 ins_encode( Jcc( cop, labl) );
12995 13017 ins_pipe( pipe_jcc );
12996 13018 ins_pc_relative(1);
12997 13019 %}
12998 13020
12999 13021 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13000 13022 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13001 13023 match(CountedLoopEnd cop cmp);
13002 13024 effect(USE labl);
13003 13025
13004 13026 ins_cost(300);
13005 13027 format %{ "J$cop,u $labl\t# Loop end" %}
13006 13028 size(6);
13007 13029 opcode(0x0F, 0x80);
13008 13030 ins_encode( Jcc( cop, labl) );
13009 13031 ins_pipe( pipe_jcc );
13010 13032 ins_pc_relative(1);
13011 13033 %}
13012 13034
13013 13035 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13014 13036 match(CountedLoopEnd cop cmp);
13015 13037 effect(USE labl);
13016 13038
13017 13039 ins_cost(200);
13018 13040 format %{ "J$cop,u $labl\t# Loop end" %}
13019 13041 size(6);
13020 13042 opcode(0x0F, 0x80);
13021 13043 ins_encode( Jcc( cop, labl) );
13022 13044 ins_pipe( pipe_jcc );
13023 13045 ins_pc_relative(1);
13024 13046 %}
13025 13047
13026 13048 // Jump Direct Conditional - using unsigned comparison
13027 13049 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13028 13050 match(If cop cmp);
13029 13051 effect(USE labl);
13030 13052
13031 13053 ins_cost(300);
13032 13054 format %{ "J$cop,u $labl" %}
13033 13055 size(6);
13034 13056 opcode(0x0F, 0x80);
13035 13057 ins_encode(Jcc(cop, labl));
13036 13058 ins_pipe(pipe_jcc);
13037 13059 ins_pc_relative(1);
13038 13060 %}
13039 13061
13040 13062 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13041 13063 match(If cop cmp);
13042 13064 effect(USE labl);
13043 13065
13044 13066 ins_cost(200);
13045 13067 format %{ "J$cop,u $labl" %}
13046 13068 size(6);
13047 13069 opcode(0x0F, 0x80);
13048 13070 ins_encode(Jcc(cop, labl));
13049 13071 ins_pipe(pipe_jcc);
13050 13072 ins_pc_relative(1);
13051 13073 %}
13052 13074
13053 13075 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13054 13076 match(If cop cmp);
13055 13077 effect(USE labl);
13056 13078
13057 13079 ins_cost(200);
13058 13080 format %{ $$template
13059 13081 if ($cop$$cmpcode == Assembler::notEqual) {
13060 13082 $$emit$$"JP,u $labl\n\t"
13061 13083 $$emit$$"J$cop,u $labl"
13062 13084 } else {
13063 13085 $$emit$$"JP,u done\n\t"
13064 13086 $$emit$$"J$cop,u $labl\n\t"
13065 13087 $$emit$$"done:"
13066 13088 }
13067 13089 %}
13068 13090 size(12);
13069 13091 opcode(0x0F, 0x80);
13070 13092 ins_encode %{
13071 13093 Label* l = $labl$$label;
13072 13094 $$$emit8$primary;
13073 13095 emit_cc(cbuf, $secondary, Assembler::parity);
13074 13096 int parity_disp = -1;
13075 13097 bool ok = false;
13076 13098 if ($cop$$cmpcode == Assembler::notEqual) {
13077 13099 // the two jumps 6 bytes apart so the jump distances are too
13078 13100 parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13079 13101 } else if ($cop$$cmpcode == Assembler::equal) {
13080 13102 parity_disp = 6;
13081 13103 ok = true;
13082 13104 } else {
13083 13105 ShouldNotReachHere();
13084 13106 }
13085 13107 emit_d32(cbuf, parity_disp);
13086 13108 $$$emit8$primary;
13087 13109 emit_cc(cbuf, $secondary, $cop$$cmpcode);
13088 13110 int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13089 13111 emit_d32(cbuf, disp);
13090 13112 %}
13091 13113 ins_pipe(pipe_jcc);
13092 13114 ins_pc_relative(1);
13093 13115 %}
13094 13116
13095 13117 // ============================================================================
13096 13118 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
13097 13119 // array for an instance of the superklass. Set a hidden internal cache on a
13098 13120 // hit (cache is checked with exposed code in gen_subtype_check()). Return
13099 13121 // NZ for a miss or zero for a hit. The encoding ALSO sets flags.
13100 13122 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
13101 13123 match(Set result (PartialSubtypeCheck sub super));
13102 13124 effect( KILL rcx, KILL cr );
13103 13125
13104 13126 ins_cost(1100); // slightly larger than the next version
13105 13127 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
13106 13128 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13107 13129 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13108 13130 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13109 13131 "JNE,s miss\t\t# Missed: EDI not-zero\n\t"
13110 13132 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
13111 13133 "XOR $result,$result\t\t Hit: EDI zero\n\t"
13112 13134 "miss:\t" %}
13113 13135
13114 13136 opcode(0x1); // Force a XOR of EDI
13115 13137 ins_encode( enc_PartialSubtypeCheck() );
13116 13138 ins_pipe( pipe_slow );
13117 13139 %}
13118 13140
13119 13141 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
13120 13142 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13121 13143 effect( KILL rcx, KILL result );
13122 13144
13123 13145 ins_cost(1000);
13124 13146 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
13125 13147 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13126 13148 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13127 13149 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13128 13150 "JNE,s miss\t\t# Missed: flags NZ\n\t"
13129 13151 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
13130 13152 "miss:\t" %}
13131 13153
13132 13154 opcode(0x0); // No need to XOR EDI
13133 13155 ins_encode( enc_PartialSubtypeCheck() );
13134 13156 ins_pipe( pipe_slow );
13135 13157 %}
13136 13158
13137 13159 // ============================================================================
13138 13160 // Branch Instructions -- short offset versions
13139 13161 //
13140 13162 // These instructions are used to replace jumps of a long offset (the default
13141 13163 // match) with jumps of a shorter offset. These instructions are all tagged
13142 13164 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13143 13165 // match rules in general matching. Instead, the ADLC generates a conversion
13144 13166 // method in the MachNode which can be used to do in-place replacement of the
13145 13167 // long variant with the shorter variant. The compiler will determine if a
13146 13168 // branch can be taken by the is_short_branch_offset() predicate in the machine
13147 13169 // specific code section of the file.
13148 13170
13149 13171 // Jump Direct - Label defines a relative address from JMP+1
13150 13172 instruct jmpDir_short(label labl) %{
13151 13173 match(Goto);
13152 13174 effect(USE labl);
13153 13175
13154 13176 ins_cost(300);
13155 13177 format %{ "JMP,s $labl" %}
13156 13178 size(2);
13157 13179 opcode(0xEB);
13158 13180 ins_encode( OpcP, LblShort( labl ) );
13159 13181 ins_pipe( pipe_jmp );
13160 13182 ins_pc_relative(1);
13161 13183 ins_short_branch(1);
13162 13184 %}
13163 13185
13164 13186 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13165 13187 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
13166 13188 match(If cop cr);
13167 13189 effect(USE labl);
13168 13190
13169 13191 ins_cost(300);
13170 13192 format %{ "J$cop,s $labl" %}
13171 13193 size(2);
13172 13194 opcode(0x70);
13173 13195 ins_encode( JccShort( cop, labl) );
13174 13196 ins_pipe( pipe_jcc );
13175 13197 ins_pc_relative(1);
13176 13198 ins_short_branch(1);
13177 13199 %}
13178 13200
13179 13201 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13180 13202 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
13181 13203 match(CountedLoopEnd cop cr);
13182 13204 effect(USE labl);
13183 13205
13184 13206 ins_cost(300);
13185 13207 format %{ "J$cop,s $labl\t# Loop end" %}
13186 13208 size(2);
13187 13209 opcode(0x70);
13188 13210 ins_encode( JccShort( cop, labl) );
13189 13211 ins_pipe( pipe_jcc );
13190 13212 ins_pc_relative(1);
13191 13213 ins_short_branch(1);
13192 13214 %}
13193 13215
13194 13216 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13195 13217 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13196 13218 match(CountedLoopEnd cop cmp);
13197 13219 effect(USE labl);
13198 13220
13199 13221 ins_cost(300);
13200 13222 format %{ "J$cop,us $labl\t# Loop end" %}
13201 13223 size(2);
13202 13224 opcode(0x70);
13203 13225 ins_encode( JccShort( cop, labl) );
13204 13226 ins_pipe( pipe_jcc );
13205 13227 ins_pc_relative(1);
13206 13228 ins_short_branch(1);
13207 13229 %}
13208 13230
13209 13231 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13210 13232 match(CountedLoopEnd cop cmp);
13211 13233 effect(USE labl);
13212 13234
13213 13235 ins_cost(300);
13214 13236 format %{ "J$cop,us $labl\t# Loop end" %}
13215 13237 size(2);
13216 13238 opcode(0x70);
13217 13239 ins_encode( JccShort( cop, labl) );
13218 13240 ins_pipe( pipe_jcc );
13219 13241 ins_pc_relative(1);
13220 13242 ins_short_branch(1);
13221 13243 %}
13222 13244
13223 13245 // Jump Direct Conditional - using unsigned comparison
13224 13246 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13225 13247 match(If cop cmp);
13226 13248 effect(USE labl);
13227 13249
13228 13250 ins_cost(300);
13229 13251 format %{ "J$cop,us $labl" %}
13230 13252 size(2);
13231 13253 opcode(0x70);
13232 13254 ins_encode( JccShort( cop, labl) );
13233 13255 ins_pipe( pipe_jcc );
13234 13256 ins_pc_relative(1);
13235 13257 ins_short_branch(1);
13236 13258 %}
13237 13259
13238 13260 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13239 13261 match(If cop cmp);
13240 13262 effect(USE labl);
13241 13263
13242 13264 ins_cost(300);
13243 13265 format %{ "J$cop,us $labl" %}
13244 13266 size(2);
13245 13267 opcode(0x70);
13246 13268 ins_encode( JccShort( cop, labl) );
13247 13269 ins_pipe( pipe_jcc );
13248 13270 ins_pc_relative(1);
13249 13271 ins_short_branch(1);
13250 13272 %}
13251 13273
13252 13274 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13253 13275 match(If cop cmp);
13254 13276 effect(USE labl);
13255 13277
13256 13278 ins_cost(300);
13257 13279 format %{ $$template
13258 13280 if ($cop$$cmpcode == Assembler::notEqual) {
13259 13281 $$emit$$"JP,u,s $labl\n\t"
13260 13282 $$emit$$"J$cop,u,s $labl"
13261 13283 } else {
13262 13284 $$emit$$"JP,u,s done\n\t"
13263 13285 $$emit$$"J$cop,u,s $labl\n\t"
13264 13286 $$emit$$"done:"
13265 13287 }
13266 13288 %}
13267 13289 size(4);
13268 13290 opcode(0x70);
13269 13291 ins_encode %{
13270 13292 Label* l = $labl$$label;
13271 13293 emit_cc(cbuf, $primary, Assembler::parity);
13272 13294 int parity_disp = -1;
13273 13295 if ($cop$$cmpcode == Assembler::notEqual) {
13274 13296 parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13275 13297 } else if ($cop$$cmpcode == Assembler::equal) {
13276 13298 parity_disp = 2;
13277 13299 } else {
13278 13300 ShouldNotReachHere();
13279 13301 }
13280 13302 emit_d8(cbuf, parity_disp);
13281 13303 emit_cc(cbuf, $primary, $cop$$cmpcode);
13282 13304 int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13283 13305 emit_d8(cbuf, disp);
13284 13306 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
13285 13307 assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
13286 13308 %}
13287 13309 ins_pipe(pipe_jcc);
13288 13310 ins_pc_relative(1);
13289 13311 ins_short_branch(1);
13290 13312 %}
13291 13313
13292 13314 // ============================================================================
13293 13315 // Long Compare
13294 13316 //
13295 13317 // Currently we hold longs in 2 registers. Comparing such values efficiently
13296 13318 // is tricky. The flavor of compare used depends on whether we are testing
13297 13319 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit.
13298 13320 // The GE test is the negated LT test. The LE test can be had by commuting
13299 13321 // the operands (yielding a GE test) and then negating; negate again for the
13300 13322 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the
13301 13323 // NE test is negated from that.
13302 13324
13303 13325 // Due to a shortcoming in the ADLC, it mixes up expressions like:
13304 13326 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
13305 13327 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections
13306 13328 // are collapsed internally in the ADLC's dfa-gen code. The match for
13307 13329 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
13308 13330 // foo match ends up with the wrong leaf. One fix is to not match both
13309 13331 // reg-reg and reg-zero forms of long-compare. This is unfortunate because
13310 13332 // both forms beat the trinary form of long-compare and both are very useful
13311 13333 // on Intel which has so few registers.
13312 13334
13313 13335 // Manifest a CmpL result in an integer register. Very painful.
13314 13336 // This is the test to avoid.
13315 13337 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
13316 13338 match(Set dst (CmpL3 src1 src2));
13317 13339 effect( KILL flags );
13318 13340 ins_cost(1000);
13319 13341 format %{ "XOR $dst,$dst\n\t"
13320 13342 "CMP $src1.hi,$src2.hi\n\t"
13321 13343 "JLT,s m_one\n\t"
13322 13344 "JGT,s p_one\n\t"
13323 13345 "CMP $src1.lo,$src2.lo\n\t"
13324 13346 "JB,s m_one\n\t"
13325 13347 "JEQ,s done\n"
13326 13348 "p_one:\tINC $dst\n\t"
13327 13349 "JMP,s done\n"
13328 13350 "m_one:\tDEC $dst\n"
13329 13351 "done:" %}
13330 13352 ins_encode %{
13331 13353 Label p_one, m_one, done;
13332 13354 __ xorptr($dst$$Register, $dst$$Register);
13333 13355 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13334 13356 __ jccb(Assembler::less, m_one);
13335 13357 __ jccb(Assembler::greater, p_one);
13336 13358 __ cmpl($src1$$Register, $src2$$Register);
13337 13359 __ jccb(Assembler::below, m_one);
13338 13360 __ jccb(Assembler::equal, done);
13339 13361 __ bind(p_one);
13340 13362 __ incrementl($dst$$Register);
13341 13363 __ jmpb(done);
13342 13364 __ bind(m_one);
13343 13365 __ decrementl($dst$$Register);
13344 13366 __ bind(done);
13345 13367 %}
13346 13368 ins_pipe( pipe_slow );
13347 13369 %}
13348 13370
13349 13371 //======
13350 13372 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13351 13373 // compares. Can be used for LE or GT compares by reversing arguments.
13352 13374 // NOT GOOD FOR EQ/NE tests.
13353 13375 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13354 13376 match( Set flags (CmpL src zero ));
13355 13377 ins_cost(100);
13356 13378 format %{ "TEST $src.hi,$src.hi" %}
13357 13379 opcode(0x85);
13358 13380 ins_encode( OpcP, RegReg_Hi2( src, src ) );
13359 13381 ins_pipe( ialu_cr_reg_reg );
13360 13382 %}
13361 13383
13362 13384 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13363 13385 // compares. Can be used for LE or GT compares by reversing arguments.
13364 13386 // NOT GOOD FOR EQ/NE tests.
13365 13387 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13366 13388 match( Set flags (CmpL src1 src2 ));
13367 13389 effect( TEMP tmp );
13368 13390 ins_cost(300);
13369 13391 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13370 13392 "MOV $tmp,$src1.hi\n\t"
13371 13393 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %}
13372 13394 ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13373 13395 ins_pipe( ialu_cr_reg_reg );
13374 13396 %}
13375 13397
13376 13398 // Long compares reg < zero/req OR reg >= zero/req.
13377 13399 // Just a wrapper for a normal branch, plus the predicate test.
13378 13400 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13379 13401 match(If cmp flags);
13380 13402 effect(USE labl);
13381 13403 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13382 13404 expand %{
13383 13405 jmpCon(cmp,flags,labl); // JLT or JGE...
13384 13406 %}
13385 13407 %}
13386 13408
13387 13409 // Compare 2 longs and CMOVE longs.
13388 13410 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13389 13411 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13390 13412 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13391 13413 ins_cost(400);
13392 13414 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13393 13415 "CMOV$cmp $dst.hi,$src.hi" %}
13394 13416 opcode(0x0F,0x40);
13395 13417 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13396 13418 ins_pipe( pipe_cmov_reg_long );
13397 13419 %}
13398 13420
13399 13421 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13400 13422 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13401 13423 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13402 13424 ins_cost(500);
13403 13425 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13404 13426 "CMOV$cmp $dst.hi,$src.hi" %}
13405 13427 opcode(0x0F,0x40);
13406 13428 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13407 13429 ins_pipe( pipe_cmov_reg_long );
13408 13430 %}
13409 13431
13410 13432 // Compare 2 longs and CMOVE ints.
13411 13433 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
13412 13434 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13413 13435 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13414 13436 ins_cost(200);
13415 13437 format %{ "CMOV$cmp $dst,$src" %}
13416 13438 opcode(0x0F,0x40);
13417 13439 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13418 13440 ins_pipe( pipe_cmov_reg );
13419 13441 %}
13420 13442
13421 13443 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
13422 13444 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13423 13445 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13424 13446 ins_cost(250);
13425 13447 format %{ "CMOV$cmp $dst,$src" %}
13426 13448 opcode(0x0F,0x40);
13427 13449 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13428 13450 ins_pipe( pipe_cmov_mem );
13429 13451 %}
13430 13452
13431 13453 // Compare 2 longs and CMOVE ints.
13432 13454 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13433 13455 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13434 13456 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13435 13457 ins_cost(200);
13436 13458 format %{ "CMOV$cmp $dst,$src" %}
13437 13459 opcode(0x0F,0x40);
13438 13460 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13439 13461 ins_pipe( pipe_cmov_reg );
13440 13462 %}
13441 13463
13442 13464 // Compare 2 longs and CMOVE doubles
13443 13465 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13444 13466 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13445 13467 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13446 13468 ins_cost(200);
13447 13469 expand %{
13448 13470 fcmovD_regS(cmp,flags,dst,src);
13449 13471 %}
13450 13472 %}
13451 13473
13452 13474 // Compare 2 longs and CMOVE doubles
13453 13475 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
13454 13476 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13455 13477 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13456 13478 ins_cost(200);
13457 13479 expand %{
13458 13480 fcmovXD_regS(cmp,flags,dst,src);
13459 13481 %}
13460 13482 %}
13461 13483
13462 13484 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13463 13485 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13464 13486 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13465 13487 ins_cost(200);
13466 13488 expand %{
13467 13489 fcmovF_regS(cmp,flags,dst,src);
13468 13490 %}
13469 13491 %}
13470 13492
13471 13493 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
13472 13494 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13473 13495 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13474 13496 ins_cost(200);
13475 13497 expand %{
13476 13498 fcmovX_regS(cmp,flags,dst,src);
13477 13499 %}
13478 13500 %}
13479 13501
13480 13502 //======
13481 13503 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13482 13504 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
13483 13505 match( Set flags (CmpL src zero ));
13484 13506 effect(TEMP tmp);
13485 13507 ins_cost(200);
13486 13508 format %{ "MOV $tmp,$src.lo\n\t"
13487 13509 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13488 13510 ins_encode( long_cmp_flags0( src, tmp ) );
13489 13511 ins_pipe( ialu_reg_reg_long );
13490 13512 %}
13491 13513
13492 13514 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13493 13515 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13494 13516 match( Set flags (CmpL src1 src2 ));
13495 13517 ins_cost(200+300);
13496 13518 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13497 13519 "JNE,s skip\n\t"
13498 13520 "CMP $src1.hi,$src2.hi\n\t"
13499 13521 "skip:\t" %}
13500 13522 ins_encode( long_cmp_flags1( src1, src2 ) );
13501 13523 ins_pipe( ialu_cr_reg_reg );
13502 13524 %}
13503 13525
13504 13526 // Long compare reg == zero/reg OR reg != zero/reg
13505 13527 // Just a wrapper for a normal branch, plus the predicate test.
13506 13528 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13507 13529 match(If cmp flags);
13508 13530 effect(USE labl);
13509 13531 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13510 13532 expand %{
13511 13533 jmpCon(cmp,flags,labl); // JEQ or JNE...
13512 13534 %}
13513 13535 %}
13514 13536
13515 13537 // Compare 2 longs and CMOVE longs.
13516 13538 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13517 13539 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13518 13540 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13519 13541 ins_cost(400);
13520 13542 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13521 13543 "CMOV$cmp $dst.hi,$src.hi" %}
13522 13544 opcode(0x0F,0x40);
13523 13545 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13524 13546 ins_pipe( pipe_cmov_reg_long );
13525 13547 %}
13526 13548
13527 13549 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13528 13550 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13529 13551 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13530 13552 ins_cost(500);
13531 13553 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13532 13554 "CMOV$cmp $dst.hi,$src.hi" %}
13533 13555 opcode(0x0F,0x40);
13534 13556 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13535 13557 ins_pipe( pipe_cmov_reg_long );
13536 13558 %}
13537 13559
13538 13560 // Compare 2 longs and CMOVE ints.
13539 13561 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
13540 13562 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13541 13563 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13542 13564 ins_cost(200);
13543 13565 format %{ "CMOV$cmp $dst,$src" %}
13544 13566 opcode(0x0F,0x40);
13545 13567 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13546 13568 ins_pipe( pipe_cmov_reg );
13547 13569 %}
13548 13570
13549 13571 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
13550 13572 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13551 13573 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13552 13574 ins_cost(250);
13553 13575 format %{ "CMOV$cmp $dst,$src" %}
13554 13576 opcode(0x0F,0x40);
13555 13577 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13556 13578 ins_pipe( pipe_cmov_mem );
13557 13579 %}
13558 13580
13559 13581 // Compare 2 longs and CMOVE ints.
13560 13582 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13561 13583 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13562 13584 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13563 13585 ins_cost(200);
13564 13586 format %{ "CMOV$cmp $dst,$src" %}
13565 13587 opcode(0x0F,0x40);
13566 13588 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13567 13589 ins_pipe( pipe_cmov_reg );
13568 13590 %}
13569 13591
13570 13592 // Compare 2 longs and CMOVE doubles
13571 13593 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13572 13594 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13573 13595 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13574 13596 ins_cost(200);
13575 13597 expand %{
13576 13598 fcmovD_regS(cmp,flags,dst,src);
13577 13599 %}
13578 13600 %}
13579 13601
13580 13602 // Compare 2 longs and CMOVE doubles
13581 13603 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
13582 13604 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13583 13605 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13584 13606 ins_cost(200);
13585 13607 expand %{
13586 13608 fcmovXD_regS(cmp,flags,dst,src);
13587 13609 %}
13588 13610 %}
13589 13611
13590 13612 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13591 13613 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13592 13614 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13593 13615 ins_cost(200);
13594 13616 expand %{
13595 13617 fcmovF_regS(cmp,flags,dst,src);
13596 13618 %}
13597 13619 %}
13598 13620
13599 13621 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
13600 13622 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13601 13623 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13602 13624 ins_cost(200);
13603 13625 expand %{
13604 13626 fcmovX_regS(cmp,flags,dst,src);
13605 13627 %}
13606 13628 %}
13607 13629
13608 13630 //======
13609 13631 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13610 13632 // Same as cmpL_reg_flags_LEGT except must negate src
13611 13633 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13612 13634 match( Set flags (CmpL src zero ));
13613 13635 effect( TEMP tmp );
13614 13636 ins_cost(300);
13615 13637 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13616 13638 "CMP $tmp,$src.lo\n\t"
13617 13639 "SBB $tmp,$src.hi\n\t" %}
13618 13640 ins_encode( long_cmp_flags3(src, tmp) );
13619 13641 ins_pipe( ialu_reg_reg_long );
13620 13642 %}
13621 13643
13622 13644 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13623 13645 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
13624 13646 // requires a commuted test to get the same result.
13625 13647 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13626 13648 match( Set flags (CmpL src1 src2 ));
13627 13649 effect( TEMP tmp );
13628 13650 ins_cost(300);
13629 13651 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13630 13652 "MOV $tmp,$src2.hi\n\t"
13631 13653 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %}
13632 13654 ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13633 13655 ins_pipe( ialu_cr_reg_reg );
13634 13656 %}
13635 13657
13636 13658 // Long compares reg < zero/req OR reg >= zero/req.
13637 13659 // Just a wrapper for a normal branch, plus the predicate test
13638 13660 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13639 13661 match(If cmp flags);
13640 13662 effect(USE labl);
13641 13663 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13642 13664 ins_cost(300);
13643 13665 expand %{
13644 13666 jmpCon(cmp,flags,labl); // JGT or JLE...
13645 13667 %}
13646 13668 %}
13647 13669
13648 13670 // Compare 2 longs and CMOVE longs.
13649 13671 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13650 13672 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13651 13673 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13652 13674 ins_cost(400);
13653 13675 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13654 13676 "CMOV$cmp $dst.hi,$src.hi" %}
13655 13677 opcode(0x0F,0x40);
13656 13678 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13657 13679 ins_pipe( pipe_cmov_reg_long );
13658 13680 %}
13659 13681
13660 13682 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13661 13683 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13662 13684 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13663 13685 ins_cost(500);
13664 13686 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13665 13687 "CMOV$cmp $dst.hi,$src.hi+4" %}
13666 13688 opcode(0x0F,0x40);
13667 13689 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13668 13690 ins_pipe( pipe_cmov_reg_long );
13669 13691 %}
13670 13692
13671 13693 // Compare 2 longs and CMOVE ints.
13672 13694 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
13673 13695 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13674 13696 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13675 13697 ins_cost(200);
13676 13698 format %{ "CMOV$cmp $dst,$src" %}
13677 13699 opcode(0x0F,0x40);
13678 13700 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13679 13701 ins_pipe( pipe_cmov_reg );
13680 13702 %}
13681 13703
13682 13704 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
13683 13705 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13684 13706 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13685 13707 ins_cost(250);
13686 13708 format %{ "CMOV$cmp $dst,$src" %}
13687 13709 opcode(0x0F,0x40);
13688 13710 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13689 13711 ins_pipe( pipe_cmov_mem );
13690 13712 %}
13691 13713
13692 13714 // Compare 2 longs and CMOVE ptrs.
13693 13715 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13694 13716 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13695 13717 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13696 13718 ins_cost(200);
13697 13719 format %{ "CMOV$cmp $dst,$src" %}
13698 13720 opcode(0x0F,0x40);
13699 13721 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13700 13722 ins_pipe( pipe_cmov_reg );
13701 13723 %}
13702 13724
13703 13725 // Compare 2 longs and CMOVE doubles
13704 13726 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13705 13727 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13706 13728 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13707 13729 ins_cost(200);
13708 13730 expand %{
13709 13731 fcmovD_regS(cmp,flags,dst,src);
13710 13732 %}
13711 13733 %}
13712 13734
13713 13735 // Compare 2 longs and CMOVE doubles
13714 13736 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
13715 13737 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13716 13738 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13717 13739 ins_cost(200);
13718 13740 expand %{
13719 13741 fcmovXD_regS(cmp,flags,dst,src);
13720 13742 %}
13721 13743 %}
13722 13744
13723 13745 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13724 13746 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13725 13747 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13726 13748 ins_cost(200);
13727 13749 expand %{
13728 13750 fcmovF_regS(cmp,flags,dst,src);
13729 13751 %}
13730 13752 %}
13731 13753
13732 13754
13733 13755 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13734 13756 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13735 13757 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13736 13758 ins_cost(200);
13737 13759 expand %{
13738 13760 fcmovX_regS(cmp,flags,dst,src);
13739 13761 %}
13740 13762 %}
13741 13763
13742 13764
13743 13765 // ============================================================================
13744 13766 // Procedure Call/Return Instructions
13745 13767 // Call Java Static Instruction
13746 13768 // Note: If this code changes, the corresponding ret_addr_offset() and
13747 13769 // compute_padding() functions will have to be adjusted.
13748 13770 instruct CallStaticJavaDirect(method meth) %{
13749 13771 match(CallStaticJava);
13750 13772 predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13751 13773 effect(USE meth);
13752 13774
13753 13775 ins_cost(300);
13754 13776 format %{ "CALL,static " %}
13755 13777 opcode(0xE8); /* E8 cd */
13756 13778 ins_encode( pre_call_FPU,
13757 13779 Java_Static_Call( meth ),
13758 13780 call_epilog,
13759 13781 post_call_FPU );
13760 13782 ins_pipe( pipe_slow );
13761 13783 ins_pc_relative(1);
13762 13784 ins_alignment(4);
13763 13785 %}
13764 13786
13765 13787 // Call Java Static Instruction (method handle version)
13766 13788 // Note: If this code changes, the corresponding ret_addr_offset() and
13767 13789 // compute_padding() functions will have to be adjusted.
13768 13790 instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
13769 13791 match(CallStaticJava);
13770 13792 predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13771 13793 effect(USE meth);
13772 13794 // EBP is saved by all callees (for interpreter stack correction).
13773 13795 // We use it here for a similar purpose, in {preserve,restore}_SP.
13774 13796
13775 13797 ins_cost(300);
13776 13798 format %{ "CALL,static/MethodHandle " %}
13777 13799 opcode(0xE8); /* E8 cd */
13778 13800 ins_encode( pre_call_FPU,
13779 13801 preserve_SP,
13780 13802 Java_Static_Call( meth ),
13781 13803 restore_SP,
13782 13804 call_epilog,
13783 13805 post_call_FPU );
13784 13806 ins_pipe( pipe_slow );
13785 13807 ins_pc_relative(1);
13786 13808 ins_alignment(4);
13787 13809 %}
13788 13810
13789 13811 // Call Java Dynamic Instruction
13790 13812 // Note: If this code changes, the corresponding ret_addr_offset() and
13791 13813 // compute_padding() functions will have to be adjusted.
13792 13814 instruct CallDynamicJavaDirect(method meth) %{
13793 13815 match(CallDynamicJava);
13794 13816 effect(USE meth);
13795 13817
13796 13818 ins_cost(300);
13797 13819 format %{ "MOV EAX,(oop)-1\n\t"
13798 13820 "CALL,dynamic" %}
13799 13821 opcode(0xE8); /* E8 cd */
13800 13822 ins_encode( pre_call_FPU,
13801 13823 Java_Dynamic_Call( meth ),
13802 13824 call_epilog,
13803 13825 post_call_FPU );
13804 13826 ins_pipe( pipe_slow );
13805 13827 ins_pc_relative(1);
13806 13828 ins_alignment(4);
13807 13829 %}
13808 13830
13809 13831 // Call Runtime Instruction
13810 13832 instruct CallRuntimeDirect(method meth) %{
13811 13833 match(CallRuntime );
13812 13834 effect(USE meth);
13813 13835
13814 13836 ins_cost(300);
13815 13837 format %{ "CALL,runtime " %}
13816 13838 opcode(0xE8); /* E8 cd */
13817 13839 // Use FFREEs to clear entries in float stack
13818 13840 ins_encode( pre_call_FPU,
13819 13841 FFree_Float_Stack_All,
13820 13842 Java_To_Runtime( meth ),
13821 13843 post_call_FPU );
13822 13844 ins_pipe( pipe_slow );
13823 13845 ins_pc_relative(1);
13824 13846 %}
13825 13847
13826 13848 // Call runtime without safepoint
13827 13849 instruct CallLeafDirect(method meth) %{
13828 13850 match(CallLeaf);
13829 13851 effect(USE meth);
13830 13852
13831 13853 ins_cost(300);
13832 13854 format %{ "CALL_LEAF,runtime " %}
13833 13855 opcode(0xE8); /* E8 cd */
13834 13856 ins_encode( pre_call_FPU,
13835 13857 FFree_Float_Stack_All,
13836 13858 Java_To_Runtime( meth ),
13837 13859 Verify_FPU_For_Leaf, post_call_FPU );
13838 13860 ins_pipe( pipe_slow );
13839 13861 ins_pc_relative(1);
13840 13862 %}
13841 13863
13842 13864 instruct CallLeafNoFPDirect(method meth) %{
13843 13865 match(CallLeafNoFP);
13844 13866 effect(USE meth);
13845 13867
13846 13868 ins_cost(300);
13847 13869 format %{ "CALL_LEAF_NOFP,runtime " %}
13848 13870 opcode(0xE8); /* E8 cd */
13849 13871 ins_encode(Java_To_Runtime(meth));
13850 13872 ins_pipe( pipe_slow );
13851 13873 ins_pc_relative(1);
13852 13874 %}
13853 13875
13854 13876
13855 13877 // Return Instruction
13856 13878 // Remove the return address & jump to it.
13857 13879 instruct Ret() %{
13858 13880 match(Return);
13859 13881 format %{ "RET" %}
13860 13882 opcode(0xC3);
13861 13883 ins_encode(OpcP);
13862 13884 ins_pipe( pipe_jmp );
13863 13885 %}
13864 13886
13865 13887 // Tail Call; Jump from runtime stub to Java code.
13866 13888 // Also known as an 'interprocedural jump'.
13867 13889 // Target of jump will eventually return to caller.
13868 13890 // TailJump below removes the return address.
13869 13891 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13870 13892 match(TailCall jump_target method_oop );
13871 13893 ins_cost(300);
13872 13894 format %{ "JMP $jump_target \t# EBX holds method oop" %}
13873 13895 opcode(0xFF, 0x4); /* Opcode FF /4 */
13874 13896 ins_encode( OpcP, RegOpc(jump_target) );
13875 13897 ins_pipe( pipe_jmp );
13876 13898 %}
13877 13899
13878 13900
13879 13901 // Tail Jump; remove the return address; jump to target.
13880 13902 // TailCall above leaves the return address around.
13881 13903 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13882 13904 match( TailJump jump_target ex_oop );
13883 13905 ins_cost(300);
13884 13906 format %{ "POP EDX\t# pop return address into dummy\n\t"
13885 13907 "JMP $jump_target " %}
13886 13908 opcode(0xFF, 0x4); /* Opcode FF /4 */
13887 13909 ins_encode( enc_pop_rdx,
13888 13910 OpcP, RegOpc(jump_target) );
13889 13911 ins_pipe( pipe_jmp );
13890 13912 %}
13891 13913
13892 13914 // Create exception oop: created by stack-crawling runtime code.
13893 13915 // Created exception is now available to this handler, and is setup
13894 13916 // just prior to jumping to this handler. No code emitted.
13895 13917 instruct CreateException( eAXRegP ex_oop )
13896 13918 %{
13897 13919 match(Set ex_oop (CreateEx));
13898 13920
13899 13921 size(0);
13900 13922 // use the following format syntax
13901 13923 format %{ "# exception oop is in EAX; no code emitted" %}
13902 13924 ins_encode();
13903 13925 ins_pipe( empty );
13904 13926 %}
13905 13927
13906 13928
13907 13929 // Rethrow exception:
13908 13930 // The exception oop will come in the first argument position.
13909 13931 // Then JUMP (not call) to the rethrow stub code.
13910 13932 instruct RethrowException()
13911 13933 %{
13912 13934 match(Rethrow);
13913 13935
13914 13936 // use the following format syntax
13915 13937 format %{ "JMP rethrow_stub" %}
13916 13938 ins_encode(enc_rethrow);
13917 13939 ins_pipe( pipe_jmp );
13918 13940 %}
13919 13941
13920 13942 // inlined locking and unlocking
13921 13943
13922 13944
13923 13945 instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
13924 13946 match( Set cr (FastLock object box) );
13925 13947 effect( TEMP tmp, TEMP scr );
13926 13948 ins_cost(300);
13927 13949 format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
13928 13950 ins_encode( Fast_Lock(object,box,tmp,scr) );
13929 13951 ins_pipe( pipe_slow );
13930 13952 ins_pc_relative(1);
13931 13953 %}
13932 13954
13933 13955 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13934 13956 match( Set cr (FastUnlock object box) );
13935 13957 effect( TEMP tmp );
13936 13958 ins_cost(300);
13937 13959 format %{ "FASTUNLOCK $object, $box, $tmp" %}
13938 13960 ins_encode( Fast_Unlock(object,box,tmp) );
13939 13961 ins_pipe( pipe_slow );
13940 13962 ins_pc_relative(1);
13941 13963 %}
13942 13964
13943 13965
13944 13966
13945 13967 // ============================================================================
13946 13968 // Safepoint Instruction
13947 13969 instruct safePoint_poll(eFlagsReg cr) %{
13948 13970 match(SafePoint);
13949 13971 effect(KILL cr);
13950 13972
13951 13973 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13952 13974 // On SPARC that might be acceptable as we can generate the address with
13953 13975 // just a sethi, saving an or. By polling at offset 0 we can end up
13954 13976 // putting additional pressure on the index-0 in the D$. Because of
13955 13977 // alignment (just like the situation at hand) the lower indices tend
13956 13978 // to see more traffic. It'd be better to change the polling address
13957 13979 // to offset 0 of the last $line in the polling page.
13958 13980
13959 13981 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %}
13960 13982 ins_cost(125);
13961 13983 size(6) ;
13962 13984 ins_encode( Safepoint_Poll() );
13963 13985 ins_pipe( ialu_reg_mem );
13964 13986 %}
13965 13987
13966 13988 //----------PEEPHOLE RULES-----------------------------------------------------
13967 13989 // These must follow all instruction definitions as they use the names
13968 13990 // defined in the instructions definitions.
13969 13991 //
13970 13992 // peepmatch ( root_instr_name [preceding_instruction]* );
13971 13993 //
13972 13994 // peepconstraint %{
13973 13995 // (instruction_number.operand_name relational_op instruction_number.operand_name
13974 13996 // [, ...] );
13975 13997 // // instruction numbers are zero-based using left to right order in peepmatch
13976 13998 //
13977 13999 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
13978 14000 // // provide an instruction_number.operand_name for each operand that appears
13979 14001 // // in the replacement instruction's match rule
13980 14002 //
13981 14003 // ---------VM FLAGS---------------------------------------------------------
13982 14004 //
13983 14005 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13984 14006 //
13985 14007 // Each peephole rule is given an identifying number starting with zero and
13986 14008 // increasing by one in the order seen by the parser. An individual peephole
13987 14009 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13988 14010 // on the command-line.
13989 14011 //
13990 14012 // ---------CURRENT LIMITATIONS----------------------------------------------
13991 14013 //
13992 14014 // Only match adjacent instructions in same basic block
13993 14015 // Only equality constraints
13994 14016 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13995 14017 // Only one replacement instruction
13996 14018 //
13997 14019 // ---------EXAMPLE----------------------------------------------------------
13998 14020 //
13999 14021 // // pertinent parts of existing instructions in architecture description
14000 14022 // instruct movI(eRegI dst, eRegI src) %{
14001 14023 // match(Set dst (CopyI src));
14002 14024 // %}
14003 14025 //
14004 14026 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14005 14027 // match(Set dst (AddI dst src));
14006 14028 // effect(KILL cr);
14007 14029 // %}
14008 14030 //
14009 14031 // // Change (inc mov) to lea
14010 14032 // peephole %{
14011 14033 // // increment preceeded by register-register move
14012 14034 // peepmatch ( incI_eReg movI );
14013 14035 // // require that the destination register of the increment
14014 14036 // // match the destination register of the move
14015 14037 // peepconstraint ( 0.dst == 1.dst );
14016 14038 // // construct a replacement instruction that sets
14017 14039 // // the destination to ( move's source register + one )
14018 14040 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14019 14041 // %}
14020 14042 //
14021 14043 // Implementation no longer uses movX instructions since
14022 14044 // machine-independent system no longer uses CopyX nodes.
14023 14045 //
14024 14046 // peephole %{
14025 14047 // peepmatch ( incI_eReg movI );
14026 14048 // peepconstraint ( 0.dst == 1.dst );
14027 14049 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14028 14050 // %}
14029 14051 //
14030 14052 // peephole %{
14031 14053 // peepmatch ( decI_eReg movI );
14032 14054 // peepconstraint ( 0.dst == 1.dst );
14033 14055 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14034 14056 // %}
14035 14057 //
14036 14058 // peephole %{
14037 14059 // peepmatch ( addI_eReg_imm movI );
14038 14060 // peepconstraint ( 0.dst == 1.dst );
14039 14061 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14040 14062 // %}
14041 14063 //
14042 14064 // peephole %{
14043 14065 // peepmatch ( addP_eReg_imm movP );
14044 14066 // peepconstraint ( 0.dst == 1.dst );
14045 14067 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14046 14068 // %}
14047 14069
14048 14070 // // Change load of spilled value to only a spill
14049 14071 // instruct storeI(memory mem, eRegI src) %{
14050 14072 // match(Set mem (StoreI mem src));
14051 14073 // %}
14052 14074 //
14053 14075 // instruct loadI(eRegI dst, memory mem) %{
14054 14076 // match(Set dst (LoadI mem));
14055 14077 // %}
14056 14078 //
14057 14079 peephole %{
14058 14080 peepmatch ( loadI storeI );
14059 14081 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14060 14082 peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14061 14083 %}
14062 14084
14063 14085 //----------SMARTSPILL RULES---------------------------------------------------
14064 14086 // These must follow all instruction definitions as they use the names
14065 14087 // defined in the instructions definitions.
↓ open down ↓ |
1104 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX