Print this page
rev 1839 : 6961690: load oops from constant table on SPARC
Summary: oops should be loaded from the constant table of an nmethod instead of materializing them with a long code sequence.
Reviewed-by:
Split |
Close |
Expand all |
Collapse all |
--- old/src/cpu/x86/vm/x86_32.ad
+++ new/src/cpu/x86/vm/x86_32.ad
1 1 //
2 2 // Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
3 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 //
5 5 // This code is free software; you can redistribute it and/or modify it
6 6 // under the terms of the GNU General Public License version 2 only, as
7 7 // published by the Free Software Foundation.
8 8 //
9 9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 // version 2 for more details (a copy is included in the LICENSE file that
13 13 // accompanied this code).
14 14 //
15 15 // You should have received a copy of the GNU General Public License version
16 16 // 2 along with this work; if not, write to the Free Software Foundation,
17 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 //
19 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 // or visit www.oracle.com if you need additional information or have any
21 21 // questions.
22 22 //
23 23 //
24 24
25 25 // X86 Architecture Description File
26 26
27 27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 28 // This information is used by the matcher and the register allocator to
29 29 // describe individual registers and classes of registers within the target
30 30 // archtecture.
31 31
32 32 register %{
33 33 //----------Architecture Description Register Definitions----------------------
34 34 // General Registers
35 35 // "reg_def" name ( register save type, C convention save type,
36 36 // ideal register type, encoding );
37 37 // Register Save Types:
38 38 //
39 39 // NS = No-Save: The register allocator assumes that these registers
40 40 // can be used without saving upon entry to the method, &
41 41 // that they do not need to be saved at call sites.
42 42 //
43 43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 44 // can be used without saving upon entry to the method,
45 45 // but that they must be saved at call sites.
46 46 //
47 47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 48 // must be saved before using them upon entry to the
49 49 // method, but they do not need to be saved at call
50 50 // sites.
51 51 //
52 52 // AS = Always-Save: The register allocator assumes that these registers
53 53 // must be saved before using them upon entry to the
54 54 // method, & that they must be saved at call sites.
55 55 //
56 56 // Ideal Register Type is used to determine how to save & restore a
57 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 59 //
60 60 // The encoding number is the actual bit-pattern placed into the opcodes.
61 61
62 62 // General Registers
63 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
64 64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
65 65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
66 66
67 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
76 76
77 77 // Special Registers
78 78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
79 79
80 80 // Float registers. We treat TOS/FPR0 special. It is invisible to the
81 81 // allocator, and only shows up in the encodings.
82 82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
83 83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
84 84 // Ok so here's the trick FPR1 is really st(0) except in the midst
85 85 // of emission of assembly for a machnode. During the emission the fpu stack
86 86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
87 87 // the stack will not have this element so FPR1 == st(0) from the
88 88 // oopMap viewpoint. This same weirdness with numbering causes
89 89 // instruction encoding to have to play games with the register
90 90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
91 91 // where it does flt->flt moves to see an example
92 92 //
93 93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
94 94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
95 95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
96 96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
97 97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
98 98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
99 99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
100 100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
101 101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
102 102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
103 103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
104 104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
105 105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
106 106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
107 107
108 108 // XMM registers. 128-bit registers or 4 words each, labeled a-d.
109 109 // Word a in each register holds a Float, words ab hold a Double.
110 110 // We currently do not use the SIMD capabilities, so registers cd
111 111 // are unused at the moment.
112 112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
113 113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
114 114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
115 115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
116 116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
117 117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
118 118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
119 119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
120 120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
121 121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
122 122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
123 123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
124 124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
125 125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
126 126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
127 127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
128 128
129 129 // Specify priority of register selection within phases of register
130 130 // allocation. Highest priority is first. A useful heuristic is to
131 131 // give registers a low priority when they are required by machine
132 132 // instructions, like EAX and EDX. Registers which are used as
133 133 // pairs must fall on an even boundary (witness the FPR#L's in this list).
134 134 // For the Intel integer registers, the equivalent Long pairs are
135 135 // EDX:EAX, EBX:ECX, and EDI:EBP.
136 136 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
137 137 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
138 138 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
139 139 FPR6L, FPR6H, FPR7L, FPR7H );
140 140
141 141 alloc_class chunk1( XMM0a, XMM0b,
142 142 XMM1a, XMM1b,
143 143 XMM2a, XMM2b,
144 144 XMM3a, XMM3b,
145 145 XMM4a, XMM4b,
146 146 XMM5a, XMM5b,
147 147 XMM6a, XMM6b,
148 148 XMM7a, XMM7b, EFLAGS);
149 149
150 150
151 151 //----------Architecture Description Register Classes--------------------------
152 152 // Several register classes are automatically defined based upon information in
153 153 // this architecture description.
154 154 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
155 155 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )
156 156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
157 157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
158 158 //
159 159 // Class for all registers
160 160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
161 161 // Class for general registers
162 162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
163 163 // Class for general registers which may be used for implicit null checks on win95
164 164 // Also safe for use by tailjump. We don't want to allocate in rbp,
165 165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
166 166 // Class of "X" registers
167 167 reg_class x_reg(EBX, ECX, EDX, EAX);
168 168 // Class of registers that can appear in an address with no offset.
169 169 // EBP and ESP require an extra instruction byte for zero offset.
170 170 // Used in fast-unlock
171 171 reg_class p_reg(EDX, EDI, ESI, EBX);
172 172 // Class for general registers not including ECX
173 173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
174 174 // Class for general registers not including EAX
175 175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
176 176 // Class for general registers not including EAX or EBX.
177 177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
178 178 // Class of EAX (for multiply and divide operations)
179 179 reg_class eax_reg(EAX);
180 180 // Class of EBX (for atomic add)
181 181 reg_class ebx_reg(EBX);
182 182 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
183 183 reg_class ecx_reg(ECX);
184 184 // Class of EDX (for multiply and divide operations)
185 185 reg_class edx_reg(EDX);
186 186 // Class of EDI (for synchronization)
187 187 reg_class edi_reg(EDI);
188 188 // Class of ESI (for synchronization)
189 189 reg_class esi_reg(ESI);
190 190 // Singleton class for interpreter's stack pointer
191 191 reg_class ebp_reg(EBP);
192 192 // Singleton class for stack pointer
193 193 reg_class sp_reg(ESP);
194 194 // Singleton class for instruction pointer
195 195 // reg_class ip_reg(EIP);
196 196 // Singleton class for condition codes
197 197 reg_class int_flags(EFLAGS);
198 198 // Class of integer register pairs
199 199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
200 200 // Class of integer register pairs that aligns with calling convention
201 201 reg_class eadx_reg( EAX,EDX );
202 202 reg_class ebcx_reg( ECX,EBX );
203 203 // Not AX or DX, used in divides
204 204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
205 205
206 206 // Floating point registers. Notice FPR0 is not a choice.
207 207 // FPR0 is not ever allocated; we use clever encodings to fake
208 208 // a 2-address instructions out of Intels FP stack.
209 209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
210 210
211 211 // make a register class for SSE registers
212 212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
213 213
214 214 // make a double register class for SSE2 registers
215 215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
216 216 XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
217 217
218 218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
219 219 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
220 220 FPR7L,FPR7H );
221 221
222 222 reg_class flt_reg0( FPR1L );
223 223 reg_class dbl_reg0( FPR1L,FPR1H );
224 224 reg_class dbl_reg1( FPR2L,FPR2H );
225 225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
226 226 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
227 227
228 228 // XMM6 and XMM7 could be used as temporary registers for long, float and
229 229 // double values for SSE2.
230 230 reg_class xdb_reg6( XMM6a,XMM6b );
231 231 reg_class xdb_reg7( XMM7a,XMM7b );
232 232 %}
233 233
234 234
235 235 //----------SOURCE BLOCK-------------------------------------------------------
236 236 // This is a block of C++ code which provides values, functions, and
237 237 // definitions necessary in the rest of the architecture description
238 238 source_hpp %{
239 239 // Must be visible to the DFA in dfa_x86_32.cpp
240 240 extern bool is_operand_hi32_zero(Node* n);
241 241 %}
242 242
243 243 source %{
244 244 #define RELOC_IMM32 Assembler::imm_operand
245 245 #define RELOC_DISP32 Assembler::disp32_operand
246 246
247 247 #define __ _masm.
248 248
249 249 // How to find the high register of a Long pair, given the low register
250 250 #define HIGH_FROM_LOW(x) ((x)+2)
251 251
252 252 // These masks are used to provide 128-bit aligned bitmasks to the XMM
253 253 // instructions, to allow sign-masking or sign-bit flipping. They allow
254 254 // fast versions of NegF/NegD and AbsF/AbsD.
255 255
256 256 // Note: 'double' and 'long long' have 32-bits alignment on x86.
257 257 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
258 258 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
259 259 // of 128-bits operands for SSE instructions.
260 260 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
261 261 // Store the value to a 128-bits operand.
262 262 operand[0] = lo;
263 263 operand[1] = hi;
264 264 return operand;
265 265 }
266 266
267 267 // Buffer for 128-bits masks used by SSE instructions.
268 268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
269 269
270 270 // Static initialization during VM startup.
271 271 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
272 272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
273 273 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
274 274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
275 275
276 276 // Offset hacking within calls.
277 277 static int pre_call_FPU_size() {
278 278 if (Compile::current()->in_24_bit_fp_mode())
279 279 return 6; // fldcw
280 280 return 0;
281 281 }
282 282
283 283 static int preserve_SP_size() {
284 284 return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg)
285 285 }
286 286
287 287 // !!!!! Special hack to get all type of calls to specify the byte offset
288 288 // from the start of the call to the point where the return address
289 289 // will point.
290 290 int MachCallStaticJavaNode::ret_addr_offset() {
291 291 int offset = 5 + pre_call_FPU_size(); // 5 bytes from start of call to where return address points
292 292 if (_method_handle_invoke)
293 293 offset += preserve_SP_size();
294 294 return offset;
295 295 }
296 296
297 297 int MachCallDynamicJavaNode::ret_addr_offset() {
298 298 return 10 + pre_call_FPU_size(); // 10 bytes from start of call to where return address points
299 299 }
300 300
301 301 static int sizeof_FFree_Float_Stack_All = -1;
302 302
303 303 int MachCallRuntimeNode::ret_addr_offset() {
304 304 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
305 305 return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
306 306 }
307 307
308 308 // Indicate if the safepoint node needs the polling page as an input.
309 309 // Since x86 does have absolute addressing, it doesn't.
310 310 bool SafePointNode::needs_polling_address_input() {
311 311 return false;
312 312 }
313 313
314 314 //
315 315 // Compute padding required for nodes which need alignment
316 316 //
317 317
318 318 // The address of the call instruction needs to be 4-byte aligned to
319 319 // ensure that it does not span a cache line so that it can be patched.
320 320 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
321 321 current_offset += pre_call_FPU_size(); // skip fldcw, if any
322 322 current_offset += 1; // skip call opcode byte
323 323 return round_to(current_offset, alignment_required()) - current_offset;
324 324 }
325 325
326 326 // The address of the call instruction needs to be 4-byte aligned to
327 327 // ensure that it does not span a cache line so that it can be patched.
328 328 int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
329 329 current_offset += pre_call_FPU_size(); // skip fldcw, if any
330 330 current_offset += preserve_SP_size(); // skip mov rbp, rsp
331 331 current_offset += 1; // skip call opcode byte
332 332 return round_to(current_offset, alignment_required()) - current_offset;
333 333 }
334 334
335 335 // The address of the call instruction needs to be 4-byte aligned to
336 336 // ensure that it does not span a cache line so that it can be patched.
337 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
338 338 current_offset += pre_call_FPU_size(); // skip fldcw, if any
339 339 current_offset += 5; // skip MOV instruction
340 340 current_offset += 1; // skip call opcode byte
341 341 return round_to(current_offset, alignment_required()) - current_offset;
342 342 }
343 343
344 344 #ifndef PRODUCT
345 345 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
346 346 st->print("INT3");
347 347 }
348 348 #endif
349 349
350 350 // EMIT_RM()
351 351 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
352 352 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
353 353 cbuf.insts()->emit_int8(c);
354 354 }
355 355
356 356 // EMIT_CC()
357 357 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
358 358 unsigned char c = (unsigned char)( f1 | f2 );
359 359 cbuf.insts()->emit_int8(c);
360 360 }
361 361
362 362 // EMIT_OPCODE()
363 363 void emit_opcode(CodeBuffer &cbuf, int code) {
364 364 cbuf.insts()->emit_int8((unsigned char) code);
365 365 }
366 366
367 367 // EMIT_OPCODE() w/ relocation information
368 368 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
369 369 cbuf.relocate(cbuf.insts_mark() + offset, reloc);
370 370 emit_opcode(cbuf, code);
371 371 }
372 372
373 373 // EMIT_D8()
374 374 void emit_d8(CodeBuffer &cbuf, int d8) {
375 375 cbuf.insts()->emit_int8((unsigned char) d8);
376 376 }
377 377
378 378 // EMIT_D16()
379 379 void emit_d16(CodeBuffer &cbuf, int d16) {
380 380 cbuf.insts()->emit_int16(d16);
381 381 }
382 382
383 383 // EMIT_D32()
384 384 void emit_d32(CodeBuffer &cbuf, int d32) {
385 385 cbuf.insts()->emit_int32(d32);
386 386 }
387 387
388 388 // emit 32 bit value and construct relocation entry from relocInfo::relocType
389 389 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
390 390 int format) {
391 391 cbuf.relocate(cbuf.insts_mark(), reloc, format);
392 392 cbuf.insts()->emit_int32(d32);
393 393 }
394 394
395 395 // emit 32 bit value and construct relocation entry from RelocationHolder
396 396 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
397 397 int format) {
398 398 #ifdef ASSERT
399 399 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
400 400 assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
401 401 }
402 402 #endif
403 403 cbuf.relocate(cbuf.insts_mark(), rspec, format);
404 404 cbuf.insts()->emit_int32(d32);
405 405 }
406 406
407 407 // Access stack slot for load or store
408 408 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
409 409 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src])
410 410 if( -128 <= disp && disp <= 127 ) {
411 411 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte
412 412 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
413 413 emit_d8 (cbuf, disp); // Displacement // R/M byte
414 414 } else {
415 415 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte
416 416 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
417 417 emit_d32(cbuf, disp); // Displacement // R/M byte
418 418 }
419 419 }
420 420
421 421 // eRegI ereg, memory mem) %{ // emit_reg_mem
422 422 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
423 423 // There is no index & no scale, use form without SIB byte
424 424 if ((index == 0x4) &&
425 425 (scale == 0) && (base != ESP_enc)) {
426 426 // If no displacement, mode is 0x0; unless base is [EBP]
427 427 if ( (displace == 0) && (base != EBP_enc) ) {
428 428 emit_rm(cbuf, 0x0, reg_encoding, base);
429 429 }
430 430 else { // If 8-bit displacement, mode 0x1
431 431 if ((displace >= -128) && (displace <= 127)
432 432 && !(displace_is_oop) ) {
433 433 emit_rm(cbuf, 0x1, reg_encoding, base);
434 434 emit_d8(cbuf, displace);
435 435 }
436 436 else { // If 32-bit displacement
437 437 if (base == -1) { // Special flag for absolute address
438 438 emit_rm(cbuf, 0x0, reg_encoding, 0x5);
439 439 // (manual lies; no SIB needed here)
440 440 if ( displace_is_oop ) {
441 441 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
442 442 } else {
443 443 emit_d32 (cbuf, displace);
444 444 }
445 445 }
446 446 else { // Normal base + offset
447 447 emit_rm(cbuf, 0x2, reg_encoding, base);
448 448 if ( displace_is_oop ) {
449 449 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
450 450 } else {
451 451 emit_d32 (cbuf, displace);
452 452 }
453 453 }
454 454 }
455 455 }
456 456 }
457 457 else { // Else, encode with the SIB byte
458 458 // If no displacement, mode is 0x0; unless base is [EBP]
459 459 if (displace == 0 && (base != EBP_enc)) { // If no displacement
460 460 emit_rm(cbuf, 0x0, reg_encoding, 0x4);
461 461 emit_rm(cbuf, scale, index, base);
462 462 }
463 463 else { // If 8-bit displacement, mode 0x1
464 464 if ((displace >= -128) && (displace <= 127)
465 465 && !(displace_is_oop) ) {
466 466 emit_rm(cbuf, 0x1, reg_encoding, 0x4);
467 467 emit_rm(cbuf, scale, index, base);
468 468 emit_d8(cbuf, displace);
469 469 }
470 470 else { // If 32-bit displacement
471 471 if (base == 0x04 ) {
472 472 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
473 473 emit_rm(cbuf, scale, index, 0x04);
474 474 } else {
475 475 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
476 476 emit_rm(cbuf, scale, index, base);
477 477 }
478 478 if ( displace_is_oop ) {
479 479 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
480 480 } else {
481 481 emit_d32 (cbuf, displace);
482 482 }
483 483 }
484 484 }
485 485 }
486 486 }
487 487
488 488
489 489 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
490 490 if( dst_encoding == src_encoding ) {
491 491 // reg-reg copy, use an empty encoding
492 492 } else {
493 493 emit_opcode( cbuf, 0x8B );
494 494 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
495 495 }
496 496 }
497 497
498 498 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
499 499 if( dst_encoding == src_encoding ) {
↓ open down ↓ |
499 lines elided |
↑ open up ↑ |
500 500 // reg-reg copy, use an empty encoding
501 501 } else {
502 502 MacroAssembler _masm(&cbuf);
503 503
504 504 __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
505 505 }
506 506 }
507 507
508 508
509 509 //=============================================================================
510 +const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
511 +
512 +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
513 + // Empty encoding
514 +}
515 +
516 +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
517 + return 0;
518 +}
519 +
520 +#ifndef PRODUCT
521 +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
522 + st->print("# MachConstantBaseNode (empty encoding)");
523 +}
524 +#endif
525 +
526 +
527 +//=============================================================================
510 528 #ifndef PRODUCT
511 529 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
512 530 Compile* C = ra_->C;
513 531 if( C->in_24_bit_fp_mode() ) {
514 532 st->print("FLDCW 24 bit fpu control word");
515 533 st->print_cr(""); st->print("\t");
516 534 }
517 535
518 536 int framesize = C->frame_slots() << LogBytesPerInt;
519 537 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
520 538 // Remove two words for return addr and rbp,
521 539 framesize -= 2*wordSize;
522 540
523 541 // Calls to C2R adapters often do not accept exceptional returns.
524 542 // We require that their callers must bang for them. But be careful, because
525 543 // some VM calls (such as call site linkage) can use several kilobytes of
526 544 // stack. But the stack safety zone should account for that.
527 545 // See bugs 4446381, 4468289, 4497237.
528 546 if (C->need_stack_bang(framesize)) {
529 547 st->print_cr("# stack bang"); st->print("\t");
530 548 }
531 549 st->print_cr("PUSHL EBP"); st->print("\t");
532 550
533 551 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
534 552 st->print("PUSH 0xBADB100D\t# Majik cookie for stack depth check");
535 553 st->print_cr(""); st->print("\t");
536 554 framesize -= wordSize;
537 555 }
538 556
539 557 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
540 558 if (framesize) {
541 559 st->print("SUB ESP,%d\t# Create frame",framesize);
542 560 }
543 561 } else {
544 562 st->print("SUB ESP,%d\t# Create frame",framesize);
545 563 }
546 564 }
547 565 #endif
548 566
549 567
550 568 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
551 569 Compile* C = ra_->C;
552 570
553 571 if (UseSSE >= 2 && VerifyFPU) {
554 572 MacroAssembler masm(&cbuf);
555 573 masm.verify_FPU(0, "FPU stack must be clean on entry");
556 574 }
557 575
558 576 // WARNING: Initial instruction MUST be 5 bytes or longer so that
559 577 // NativeJump::patch_verified_entry will be able to patch out the entry
560 578 // code safely. The fldcw is ok at 6 bytes, the push to verify stack
561 579 // depth is ok at 5 bytes, the frame allocation can be either 3 or
562 580 // 6 bytes. So if we don't do the fldcw or the push then we must
563 581 // use the 6 byte frame allocation even if we have no frame. :-(
564 582 // If method sets FPU control word do it now
565 583 if( C->in_24_bit_fp_mode() ) {
566 584 MacroAssembler masm(&cbuf);
567 585 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
568 586 }
569 587
570 588 int framesize = C->frame_slots() << LogBytesPerInt;
571 589 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
572 590 // Remove two words for return addr and rbp,
573 591 framesize -= 2*wordSize;
574 592
575 593 // Calls to C2R adapters often do not accept exceptional returns.
576 594 // We require that their callers must bang for them. But be careful, because
577 595 // some VM calls (such as call site linkage) can use several kilobytes of
578 596 // stack. But the stack safety zone should account for that.
579 597 // See bugs 4446381, 4468289, 4497237.
580 598 if (C->need_stack_bang(framesize)) {
581 599 MacroAssembler masm(&cbuf);
582 600 masm.generate_stack_overflow_check(framesize);
583 601 }
584 602
585 603 // We always push rbp, so that on return to interpreter rbp, will be
586 604 // restored correctly and we can correct the stack.
587 605 emit_opcode(cbuf, 0x50 | EBP_enc);
588 606
589 607 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
590 608 emit_opcode(cbuf, 0x68); // push 0xbadb100d
591 609 emit_d32(cbuf, 0xbadb100d);
592 610 framesize -= wordSize;
593 611 }
594 612
595 613 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
596 614 if (framesize) {
597 615 emit_opcode(cbuf, 0x83); // sub SP,#framesize
598 616 emit_rm(cbuf, 0x3, 0x05, ESP_enc);
599 617 emit_d8(cbuf, framesize);
600 618 }
601 619 } else {
602 620 emit_opcode(cbuf, 0x81); // sub SP,#framesize
603 621 emit_rm(cbuf, 0x3, 0x05, ESP_enc);
604 622 emit_d32(cbuf, framesize);
605 623 }
606 624 C->set_frame_complete(cbuf.insts_size());
607 625
608 626 #ifdef ASSERT
609 627 if (VerifyStackAtCalls) {
610 628 Label L;
611 629 MacroAssembler masm(&cbuf);
612 630 masm.push(rax);
613 631 masm.mov(rax, rsp);
614 632 masm.andptr(rax, StackAlignmentInBytes-1);
615 633 masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
616 634 masm.pop(rax);
617 635 masm.jcc(Assembler::equal, L);
618 636 masm.stop("Stack is not properly aligned!");
619 637 masm.bind(L);
620 638 }
621 639 #endif
622 640
623 641 }
624 642
625 643 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
626 644 return MachNode::size(ra_); // too many variables; just compute it the hard way
627 645 }
628 646
629 647 int MachPrologNode::reloc() const {
630 648 return 0; // a large enough number
631 649 }
632 650
633 651 //=============================================================================
634 652 #ifndef PRODUCT
635 653 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
636 654 Compile *C = ra_->C;
637 655 int framesize = C->frame_slots() << LogBytesPerInt;
638 656 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
639 657 // Remove two words for return addr and rbp,
640 658 framesize -= 2*wordSize;
641 659
642 660 if( C->in_24_bit_fp_mode() ) {
643 661 st->print("FLDCW standard control word");
644 662 st->cr(); st->print("\t");
645 663 }
646 664 if( framesize ) {
647 665 st->print("ADD ESP,%d\t# Destroy frame",framesize);
648 666 st->cr(); st->print("\t");
649 667 }
650 668 st->print_cr("POPL EBP"); st->print("\t");
651 669 if( do_polling() && C->is_method_compilation() ) {
652 670 st->print("TEST PollPage,EAX\t! Poll Safepoint");
653 671 st->cr(); st->print("\t");
654 672 }
655 673 }
656 674 #endif
657 675
658 676 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
659 677 Compile *C = ra_->C;
660 678
661 679 // If method set FPU control word, restore to standard control word
662 680 if( C->in_24_bit_fp_mode() ) {
663 681 MacroAssembler masm(&cbuf);
664 682 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
665 683 }
666 684
667 685 int framesize = C->frame_slots() << LogBytesPerInt;
668 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
669 687 // Remove two words for return addr and rbp,
670 688 framesize -= 2*wordSize;
671 689
672 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
673 691
674 692 if( framesize >= 128 ) {
675 693 emit_opcode(cbuf, 0x81); // add SP, #framesize
676 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
677 695 emit_d32(cbuf, framesize);
678 696 }
679 697 else if( framesize ) {
680 698 emit_opcode(cbuf, 0x83); // add SP, #framesize
681 699 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
682 700 emit_d8(cbuf, framesize);
683 701 }
684 702
685 703 emit_opcode(cbuf, 0x58 | EBP_enc);
686 704
687 705 if( do_polling() && C->is_method_compilation() ) {
688 706 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
689 707 emit_opcode(cbuf,0x85);
690 708 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
691 709 emit_d32(cbuf, (intptr_t)os::get_polling_page());
692 710 }
693 711 }
694 712
695 713 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
696 714 Compile *C = ra_->C;
697 715 // If method set FPU control word, restore to standard control word
698 716 int size = C->in_24_bit_fp_mode() ? 6 : 0;
699 717 if( do_polling() && C->is_method_compilation() ) size += 6;
700 718
701 719 int framesize = C->frame_slots() << LogBytesPerInt;
702 720 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
703 721 // Remove two words for return addr and rbp,
704 722 framesize -= 2*wordSize;
705 723
706 724 size++; // popl rbp,
707 725
708 726 if( framesize >= 128 ) {
709 727 size += 6;
710 728 } else {
711 729 size += framesize ? 3 : 0;
712 730 }
713 731 return size;
714 732 }
715 733
716 734 int MachEpilogNode::reloc() const {
717 735 return 0; // a large enough number
718 736 }
719 737
720 738 const Pipeline * MachEpilogNode::pipeline() const {
721 739 return MachNode::pipeline_class();
722 740 }
723 741
724 742 int MachEpilogNode::safepoint_offset() const { return 0; }
725 743
726 744 //=============================================================================
727 745
728 746 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
729 747 static enum RC rc_class( OptoReg::Name reg ) {
730 748
731 749 if( !OptoReg::is_valid(reg) ) return rc_bad;
732 750 if (OptoReg::is_stack(reg)) return rc_stack;
733 751
734 752 VMReg r = OptoReg::as_VMReg(reg);
735 753 if (r->is_Register()) return rc_int;
736 754 if (r->is_FloatRegister()) {
737 755 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
738 756 return rc_float;
739 757 }
740 758 assert(r->is_XMMRegister(), "must be");
741 759 return rc_xmm;
742 760 }
743 761
744 762 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
745 763 int opcode, const char *op_str, int size, outputStream* st ) {
746 764 if( cbuf ) {
747 765 emit_opcode (*cbuf, opcode );
748 766 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
749 767 #ifndef PRODUCT
750 768 } else if( !do_size ) {
751 769 if( size != 0 ) st->print("\n\t");
752 770 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
753 771 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
754 772 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
755 773 } else { // FLD, FST, PUSH, POP
756 774 st->print("%s [ESP + #%d]",op_str,offset);
757 775 }
758 776 #endif
759 777 }
760 778 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
761 779 return size+3+offset_size;
762 780 }
763 781
764 782 // Helper for XMM registers. Extra opcode bits, limited syntax.
765 783 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
766 784 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
767 785 if( cbuf ) {
768 786 if( reg_lo+1 == reg_hi ) { // double move?
769 787 if( is_load && !UseXmmLoadAndClearUpper )
770 788 emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
771 789 else
772 790 emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
773 791 } else {
774 792 emit_opcode(*cbuf, 0xF3 );
775 793 }
776 794 emit_opcode(*cbuf, 0x0F );
777 795 if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
778 796 emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load
779 797 else
780 798 emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
781 799 encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
782 800 #ifndef PRODUCT
783 801 } else if( !do_size ) {
784 802 if( size != 0 ) st->print("\n\t");
785 803 if( reg_lo+1 == reg_hi ) { // double move?
786 804 if( is_load ) st->print("%s %s,[ESP + #%d]",
787 805 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
788 806 Matcher::regName[reg_lo], offset);
789 807 else st->print("MOVSD [ESP + #%d],%s",
790 808 offset, Matcher::regName[reg_lo]);
791 809 } else {
792 810 if( is_load ) st->print("MOVSS %s,[ESP + #%d]",
793 811 Matcher::regName[reg_lo], offset);
794 812 else st->print("MOVSS [ESP + #%d],%s",
795 813 offset, Matcher::regName[reg_lo]);
796 814 }
797 815 #endif
798 816 }
799 817 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
800 818 return size+5+offset_size;
801 819 }
802 820
803 821
804 822 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
805 823 int src_hi, int dst_hi, int size, outputStream* st ) {
806 824 if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
807 825 if( cbuf ) {
808 826 if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
809 827 emit_opcode(*cbuf, 0x66 );
810 828 }
811 829 emit_opcode(*cbuf, 0x0F );
812 830 emit_opcode(*cbuf, 0x28 );
813 831 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
814 832 #ifndef PRODUCT
815 833 } else if( !do_size ) {
816 834 if( size != 0 ) st->print("\n\t");
817 835 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
818 836 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
819 837 } else {
820 838 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
821 839 }
822 840 #endif
823 841 }
824 842 return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
825 843 } else {
826 844 if( cbuf ) {
827 845 emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
828 846 emit_opcode(*cbuf, 0x0F );
829 847 emit_opcode(*cbuf, 0x10 );
830 848 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
831 849 #ifndef PRODUCT
832 850 } else if( !do_size ) {
833 851 if( size != 0 ) st->print("\n\t");
834 852 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
835 853 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
836 854 } else {
837 855 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
838 856 }
839 857 #endif
840 858 }
841 859 return size+4;
842 860 }
843 861 }
844 862
845 863 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
846 864 int src_hi, int dst_hi, int size, outputStream* st ) {
847 865 // 32-bit
848 866 if (cbuf) {
849 867 emit_opcode(*cbuf, 0x66);
850 868 emit_opcode(*cbuf, 0x0F);
851 869 emit_opcode(*cbuf, 0x6E);
852 870 emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
853 871 #ifndef PRODUCT
854 872 } else if (!do_size) {
855 873 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
856 874 #endif
857 875 }
858 876 return 4;
859 877 }
860 878
861 879
862 880 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
863 881 int src_hi, int dst_hi, int size, outputStream* st ) {
864 882 // 32-bit
865 883 if (cbuf) {
866 884 emit_opcode(*cbuf, 0x66);
867 885 emit_opcode(*cbuf, 0x0F);
868 886 emit_opcode(*cbuf, 0x7E);
869 887 emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
870 888 #ifndef PRODUCT
871 889 } else if (!do_size) {
872 890 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
873 891 #endif
874 892 }
875 893 return 4;
876 894 }
877 895
878 896 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
879 897 if( cbuf ) {
880 898 emit_opcode(*cbuf, 0x8B );
881 899 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
882 900 #ifndef PRODUCT
883 901 } else if( !do_size ) {
884 902 if( size != 0 ) st->print("\n\t");
885 903 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
886 904 #endif
887 905 }
888 906 return size+2;
889 907 }
890 908
891 909 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
892 910 int offset, int size, outputStream* st ) {
893 911 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
894 912 if( cbuf ) {
895 913 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
896 914 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
897 915 #ifndef PRODUCT
898 916 } else if( !do_size ) {
899 917 if( size != 0 ) st->print("\n\t");
900 918 st->print("FLD %s",Matcher::regName[src_lo]);
901 919 #endif
902 920 }
903 921 size += 2;
904 922 }
905 923
906 924 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
907 925 const char *op_str;
908 926 int op;
909 927 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
910 928 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
911 929 op = 0xDD;
912 930 } else { // 32-bit store
913 931 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
914 932 op = 0xD9;
915 933 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
916 934 }
917 935
918 936 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
919 937 }
920 938
921 939 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
922 940 // Get registers to move
923 941 OptoReg::Name src_second = ra_->get_reg_second(in(1));
924 942 OptoReg::Name src_first = ra_->get_reg_first(in(1));
925 943 OptoReg::Name dst_second = ra_->get_reg_second(this );
926 944 OptoReg::Name dst_first = ra_->get_reg_first(this );
927 945
928 946 enum RC src_second_rc = rc_class(src_second);
929 947 enum RC src_first_rc = rc_class(src_first);
930 948 enum RC dst_second_rc = rc_class(dst_second);
931 949 enum RC dst_first_rc = rc_class(dst_first);
932 950
933 951 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
934 952
935 953 // Generate spill code!
936 954 int size = 0;
937 955
938 956 if( src_first == dst_first && src_second == dst_second )
939 957 return size; // Self copy, no move
940 958
941 959 // --------------------------------------
942 960 // Check for mem-mem move. push/pop to move.
943 961 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
944 962 if( src_second == dst_first ) { // overlapping stack copy ranges
945 963 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
946 964 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
947 965 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
948 966 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
949 967 }
950 968 // move low bits
951 969 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
952 970 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
953 971 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
954 972 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
955 973 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
956 974 }
957 975 return size;
958 976 }
959 977
960 978 // --------------------------------------
961 979 // Check for integer reg-reg copy
962 980 if( src_first_rc == rc_int && dst_first_rc == rc_int )
963 981 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
964 982
965 983 // Check for integer store
966 984 if( src_first_rc == rc_int && dst_first_rc == rc_stack )
967 985 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
968 986
969 987 // Check for integer load
970 988 if( dst_first_rc == rc_int && src_first_rc == rc_stack )
971 989 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
972 990
973 991 // Check for integer reg-xmm reg copy
974 992 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
975 993 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
976 994 "no 64 bit integer-float reg moves" );
977 995 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
978 996 }
979 997 // --------------------------------------
980 998 // Check for float reg-reg copy
981 999 if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
982 1000 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
983 1001 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
984 1002 if( cbuf ) {
985 1003
986 1004 // Note the mucking with the register encode to compensate for the 0/1
987 1005 // indexing issue mentioned in a comment in the reg_def sections
988 1006 // for FPR registers many lines above here.
989 1007
990 1008 if( src_first != FPR1L_num ) {
991 1009 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i)
992 1010 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
993 1011 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
994 1012 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
995 1013 } else {
996 1014 emit_opcode (*cbuf, 0xDD ); // FST ST(i)
997 1015 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
998 1016 }
999 1017 #ifndef PRODUCT
1000 1018 } else if( !do_size ) {
1001 1019 if( size != 0 ) st->print("\n\t");
1002 1020 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1003 1021 else st->print( "FST %s", Matcher::regName[dst_first]);
1004 1022 #endif
1005 1023 }
1006 1024 return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1007 1025 }
1008 1026
1009 1027 // Check for float store
1010 1028 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1011 1029 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1012 1030 }
1013 1031
1014 1032 // Check for float load
1015 1033 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1016 1034 int offset = ra_->reg2offset(src_first);
1017 1035 const char *op_str;
1018 1036 int op;
1019 1037 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1020 1038 op_str = "FLD_D";
1021 1039 op = 0xDD;
1022 1040 } else { // 32-bit load
1023 1041 op_str = "FLD_S";
1024 1042 op = 0xD9;
1025 1043 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1026 1044 }
1027 1045 if( cbuf ) {
1028 1046 emit_opcode (*cbuf, op );
1029 1047 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
1030 1048 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
1031 1049 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1032 1050 #ifndef PRODUCT
1033 1051 } else if( !do_size ) {
1034 1052 if( size != 0 ) st->print("\n\t");
1035 1053 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
1036 1054 #endif
1037 1055 }
1038 1056 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1039 1057 return size + 3+offset_size+2;
1040 1058 }
1041 1059
1042 1060 // Check for xmm reg-reg copy
1043 1061 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1044 1062 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1045 1063 (src_first+1 == src_second && dst_first+1 == dst_second),
1046 1064 "no non-adjacent float-moves" );
1047 1065 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1048 1066 }
1049 1067
1050 1068 // Check for xmm reg-integer reg copy
1051 1069 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1052 1070 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1053 1071 "no 64 bit float-integer reg moves" );
1054 1072 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1055 1073 }
1056 1074
1057 1075 // Check for xmm store
1058 1076 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1059 1077 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1060 1078 }
1061 1079
1062 1080 // Check for float xmm load
1063 1081 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1064 1082 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1065 1083 }
1066 1084
1067 1085 // Copy from float reg to xmm reg
1068 1086 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1069 1087 // copy to the top of stack from floating point reg
1070 1088 // and use LEA to preserve flags
1071 1089 if( cbuf ) {
1072 1090 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8]
1073 1091 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1074 1092 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1075 1093 emit_d8(*cbuf,0xF8);
1076 1094 #ifndef PRODUCT
1077 1095 } else if( !do_size ) {
1078 1096 if( size != 0 ) st->print("\n\t");
1079 1097 st->print("LEA ESP,[ESP-8]");
1080 1098 #endif
1081 1099 }
1082 1100 size += 4;
1083 1101
1084 1102 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1085 1103
1086 1104 // Copy from the temp memory to the xmm reg.
1087 1105 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1088 1106
1089 1107 if( cbuf ) {
1090 1108 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
1091 1109 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1092 1110 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1093 1111 emit_d8(*cbuf,0x08);
1094 1112 #ifndef PRODUCT
1095 1113 } else if( !do_size ) {
1096 1114 if( size != 0 ) st->print("\n\t");
1097 1115 st->print("LEA ESP,[ESP+8]");
1098 1116 #endif
1099 1117 }
1100 1118 size += 4;
1101 1119 return size;
1102 1120 }
1103 1121
1104 1122 assert( size > 0, "missed a case" );
1105 1123
1106 1124 // --------------------------------------------------------------------
1107 1125 // Check for second bits still needing moving.
1108 1126 if( src_second == dst_second )
1109 1127 return size; // Self copy; no move
1110 1128 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1111 1129
1112 1130 // Check for second word int-int move
1113 1131 if( src_second_rc == rc_int && dst_second_rc == rc_int )
1114 1132 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1115 1133
1116 1134 // Check for second word integer store
1117 1135 if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1118 1136 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1119 1137
1120 1138 // Check for second word integer load
1121 1139 if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1122 1140 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1123 1141
1124 1142
1125 1143 Unimplemented();
1126 1144 }
1127 1145
1128 1146 #ifndef PRODUCT
1129 1147 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1130 1148 implementation( NULL, ra_, false, st );
1131 1149 }
1132 1150 #endif
1133 1151
1134 1152 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1135 1153 implementation( &cbuf, ra_, false, NULL );
1136 1154 }
1137 1155
1138 1156 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1139 1157 return implementation( NULL, ra_, true, NULL );
1140 1158 }
1141 1159
1142 1160 //=============================================================================
1143 1161 #ifndef PRODUCT
1144 1162 void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1145 1163 st->print("NOP \t# %d bytes pad for loops and calls", _count);
1146 1164 }
1147 1165 #endif
1148 1166
1149 1167 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1150 1168 MacroAssembler _masm(&cbuf);
1151 1169 __ nop(_count);
1152 1170 }
1153 1171
1154 1172 uint MachNopNode::size(PhaseRegAlloc *) const {
1155 1173 return _count;
1156 1174 }
1157 1175
1158 1176
1159 1177 //=============================================================================
1160 1178 #ifndef PRODUCT
1161 1179 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1162 1180 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1163 1181 int reg = ra_->get_reg_first(this);
1164 1182 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
1165 1183 }
1166 1184 #endif
1167 1185
1168 1186 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1169 1187 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1170 1188 int reg = ra_->get_encode(this);
1171 1189 if( offset >= 128 ) {
1172 1190 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1173 1191 emit_rm(cbuf, 0x2, reg, 0x04);
1174 1192 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1175 1193 emit_d32(cbuf, offset);
1176 1194 }
1177 1195 else {
1178 1196 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1179 1197 emit_rm(cbuf, 0x1, reg, 0x04);
1180 1198 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1181 1199 emit_d8(cbuf, offset);
1182 1200 }
1183 1201 }
1184 1202
1185 1203 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1186 1204 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1187 1205 if( offset >= 128 ) {
1188 1206 return 7;
1189 1207 }
1190 1208 else {
1191 1209 return 4;
1192 1210 }
1193 1211 }
1194 1212
1195 1213 //=============================================================================
1196 1214
1197 1215 // emit call stub, compiled java to interpreter
1198 1216 void emit_java_to_interp(CodeBuffer &cbuf ) {
1199 1217 // Stub is fixed up when the corresponding call is converted from calling
1200 1218 // compiled code to calling interpreted code.
1201 1219 // mov rbx,0
1202 1220 // jmp -1
1203 1221
1204 1222 address mark = cbuf.insts_mark(); // get mark within main instrs section
1205 1223
1206 1224 // Note that the code buffer's insts_mark is always relative to insts.
1207 1225 // That's why we must use the macroassembler to generate a stub.
1208 1226 MacroAssembler _masm(&cbuf);
1209 1227
1210 1228 address base =
1211 1229 __ start_a_stub(Compile::MAX_stubs_size);
1212 1230 if (base == NULL) return; // CodeBuffer::expand failed
1213 1231 // static stub relocation stores the instruction address of the call
1214 1232 __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1215 1233 // static stub relocation also tags the methodOop in the code-stream.
1216 1234 __ movoop(rbx, (jobject)NULL); // method is zapped till fixup time
1217 1235 // This is recognized as unresolved by relocs/nativeInst/ic code
1218 1236 __ jump(RuntimeAddress(__ pc()));
1219 1237
1220 1238 __ end_a_stub();
1221 1239 // Update current stubs pointer and restore insts_end.
1222 1240 }
1223 1241 // size of call stub, compiled java to interpretor
1224 1242 uint size_java_to_interp() {
1225 1243 return 10; // movl; jmp
1226 1244 }
1227 1245 // relocation entries for call stub, compiled java to interpretor
1228 1246 uint reloc_java_to_interp() {
1229 1247 return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1230 1248 }
1231 1249
1232 1250 //=============================================================================
1233 1251 #ifndef PRODUCT
1234 1252 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1235 1253 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
1236 1254 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
1237 1255 st->print_cr("\tNOP");
1238 1256 st->print_cr("\tNOP");
1239 1257 if( !OptoBreakpoint )
1240 1258 st->print_cr("\tNOP");
1241 1259 }
1242 1260 #endif
1243 1261
1244 1262 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1245 1263 MacroAssembler masm(&cbuf);
1246 1264 #ifdef ASSERT
1247 1265 uint insts_size = cbuf.insts_size();
1248 1266 #endif
1249 1267 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1250 1268 masm.jump_cc(Assembler::notEqual,
1251 1269 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1252 1270 /* WARNING these NOPs are critical so that verified entry point is properly
1253 1271 aligned for patching by NativeJump::patch_verified_entry() */
1254 1272 int nops_cnt = 2;
1255 1273 if( !OptoBreakpoint ) // Leave space for int3
1256 1274 nops_cnt += 1;
1257 1275 masm.nop(nops_cnt);
1258 1276
1259 1277 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1260 1278 }
1261 1279
1262 1280 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1263 1281 return OptoBreakpoint ? 11 : 12;
1264 1282 }
1265 1283
1266 1284
1267 1285 //=============================================================================
1268 1286 uint size_exception_handler() {
1269 1287 // NativeCall instruction size is the same as NativeJump.
1270 1288 // exception handler starts out as jump and can be patched to
1271 1289 // a call be deoptimization. (4932387)
1272 1290 // Note that this value is also credited (in output.cpp) to
1273 1291 // the size of the code section.
1274 1292 return NativeJump::instruction_size;
1275 1293 }
1276 1294
1277 1295 // Emit exception handler code. Stuff framesize into a register
1278 1296 // and call a VM stub routine.
1279 1297 int emit_exception_handler(CodeBuffer& cbuf) {
1280 1298
1281 1299 // Note that the code buffer's insts_mark is always relative to insts.
1282 1300 // That's why we must use the macroassembler to generate a handler.
1283 1301 MacroAssembler _masm(&cbuf);
1284 1302 address base =
1285 1303 __ start_a_stub(size_exception_handler());
1286 1304 if (base == NULL) return 0; // CodeBuffer::expand failed
1287 1305 int offset = __ offset();
1288 1306 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1289 1307 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1290 1308 __ end_a_stub();
1291 1309 return offset;
1292 1310 }
1293 1311
1294 1312 uint size_deopt_handler() {
1295 1313 // NativeCall instruction size is the same as NativeJump.
1296 1314 // exception handler starts out as jump and can be patched to
1297 1315 // a call be deoptimization. (4932387)
1298 1316 // Note that this value is also credited (in output.cpp) to
1299 1317 // the size of the code section.
1300 1318 return 5 + NativeJump::instruction_size; // pushl(); jmp;
1301 1319 }
1302 1320
1303 1321 // Emit deopt handler code.
1304 1322 int emit_deopt_handler(CodeBuffer& cbuf) {
1305 1323
1306 1324 // Note that the code buffer's insts_mark is always relative to insts.
1307 1325 // That's why we must use the macroassembler to generate a handler.
1308 1326 MacroAssembler _masm(&cbuf);
1309 1327 address base =
1310 1328 __ start_a_stub(size_exception_handler());
1311 1329 if (base == NULL) return 0; // CodeBuffer::expand failed
1312 1330 int offset = __ offset();
↓ open down ↓ |
793 lines elided |
↑ open up ↑ |
1313 1331 InternalAddress here(__ pc());
1314 1332 __ pushptr(here.addr());
1315 1333
1316 1334 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1317 1335 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1318 1336 __ end_a_stub();
1319 1337 return offset;
1320 1338 }
1321 1339
1322 1340
1323 -static void emit_double_constant(CodeBuffer& cbuf, double x) {
1324 - int mark = cbuf.insts()->mark_off();
1325 - MacroAssembler _masm(&cbuf);
1326 - address double_address = __ double_constant(x);
1327 - cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift
1328 - emit_d32_reloc(cbuf,
1329 - (int)double_address,
1330 - internal_word_Relocation::spec(double_address),
1331 - RELOC_DISP32);
1332 -}
1333 -
1334 -static void emit_float_constant(CodeBuffer& cbuf, float x) {
1335 - int mark = cbuf.insts()->mark_off();
1336 - MacroAssembler _masm(&cbuf);
1337 - address float_address = __ float_constant(x);
1338 - cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift
1339 - emit_d32_reloc(cbuf,
1340 - (int)float_address,
1341 - internal_word_Relocation::spec(float_address),
1342 - RELOC_DISP32);
1343 -}
1344 -
1345 -
1346 1341 const bool Matcher::match_rule_supported(int opcode) {
1347 1342 if (!has_match_rule(opcode))
1348 1343 return false;
1349 1344
1350 1345 return true; // Per default match rules are supported.
1351 1346 }
1352 1347
1353 1348 int Matcher::regnum_to_fpu_offset(int regnum) {
1354 1349 return regnum - 32; // The FP registers are in the second chunk
1355 1350 }
1356 1351
1357 -bool is_positive_zero_float(jfloat f) {
1358 - return jint_cast(f) == jint_cast(0.0F);
1359 -}
1360 -
1361 -bool is_positive_one_float(jfloat f) {
1362 - return jint_cast(f) == jint_cast(1.0F);
1363 -}
1364 -
1365 -bool is_positive_zero_double(jdouble d) {
1366 - return jlong_cast(d) == jlong_cast(0.0);
1367 -}
1368 -
1369 -bool is_positive_one_double(jdouble d) {
1370 - return jlong_cast(d) == jlong_cast(1.0);
1371 -}
1372 -
1373 1352 // This is UltraSparc specific, true just means we have fast l2f conversion
1374 1353 const bool Matcher::convL2FSupported(void) {
1375 1354 return true;
1376 1355 }
1377 1356
1378 1357 // Vector width in bytes
1379 1358 const uint Matcher::vector_width_in_bytes(void) {
1380 1359 return UseSSE >= 2 ? 8 : 0;
1381 1360 }
1382 1361
1383 1362 // Vector ideal reg
1384 1363 const uint Matcher::vector_ideal_reg(void) {
1385 1364 return Op_RegD;
1386 1365 }
1387 1366
1388 1367 // Is this branch offset short enough that a short branch can be used?
1389 1368 //
1390 1369 // NOTE: If the platform does not provide any short branch variants, then
1391 1370 // this method should return false for offset 0.
1392 1371 bool Matcher::is_short_branch_offset(int rule, int offset) {
1393 1372 // the short version of jmpConUCF2 contains multiple branches,
1394 1373 // making the reach slightly less
1395 1374 if (rule == jmpConUCF2_rule)
1396 1375 return (-126 <= offset && offset <= 125);
1397 1376 return (-128 <= offset && offset <= 127);
1398 1377 }
1399 1378
1400 1379 const bool Matcher::isSimpleConstant64(jlong value) {
1401 1380 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1402 1381 return false;
1403 1382 }
1404 1383
1405 1384 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1406 1385 const bool Matcher::init_array_count_is_in_bytes = false;
1407 1386
1408 1387 // Threshold size for cleararray.
1409 1388 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1410 1389
1411 1390 // Should the Matcher clone shifts on addressing modes, expecting them to
1412 1391 // be subsumed into complex addressing expressions or compute them into
1413 1392 // registers? True for Intel but false for most RISCs
1414 1393 const bool Matcher::clone_shift_expressions = true;
1415 1394
1416 1395 bool Matcher::narrow_oop_use_complex_address() {
1417 1396 ShouldNotCallThis();
1418 1397 return true;
1419 1398 }
1420 1399
1421 1400
1422 1401 // Is it better to copy float constants, or load them directly from memory?
1423 1402 // Intel can load a float constant from a direct address, requiring no
1424 1403 // extra registers. Most RISCs will have to materialize an address into a
1425 1404 // register first, so they would do better to copy the constant from stack.
1426 1405 const bool Matcher::rematerialize_float_constants = true;
1427 1406
1428 1407 // If CPU can load and store mis-aligned doubles directly then no fixup is
1429 1408 // needed. Else we split the double into 2 integer pieces and move it
1430 1409 // piece-by-piece. Only happens when passing doubles into C code as the
1431 1410 // Java calling convention forces doubles to be aligned.
1432 1411 const bool Matcher::misaligned_doubles_ok = true;
1433 1412
1434 1413
1435 1414 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1436 1415 // Get the memory operand from the node
1437 1416 uint numopnds = node->num_opnds(); // Virtual call for number of operands
1438 1417 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far
1439 1418 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1440 1419 uint opcnt = 1; // First operand
1441 1420 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1442 1421 while( idx >= skipped+num_edges ) {
1443 1422 skipped += num_edges;
1444 1423 opcnt++; // Bump operand count
1445 1424 assert( opcnt < numopnds, "Accessing non-existent operand" );
1446 1425 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1447 1426 }
1448 1427
1449 1428 MachOper *memory = node->_opnds[opcnt];
1450 1429 MachOper *new_memory = NULL;
1451 1430 switch (memory->opcode()) {
1452 1431 case DIRECT:
1453 1432 case INDOFFSET32X:
1454 1433 // No transformation necessary.
1455 1434 return;
1456 1435 case INDIRECT:
1457 1436 new_memory = new (C) indirect_win95_safeOper( );
1458 1437 break;
1459 1438 case INDOFFSET8:
1460 1439 new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1461 1440 break;
1462 1441 case INDOFFSET32:
1463 1442 new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1464 1443 break;
1465 1444 case INDINDEXOFFSET:
1466 1445 new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1467 1446 break;
1468 1447 case INDINDEXSCALE:
1469 1448 new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1470 1449 break;
1471 1450 case INDINDEXSCALEOFFSET:
1472 1451 new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1473 1452 break;
1474 1453 case LOAD_LONG_INDIRECT:
1475 1454 case LOAD_LONG_INDOFFSET32:
1476 1455 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1477 1456 return;
1478 1457 default:
1479 1458 assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1480 1459 return;
1481 1460 }
1482 1461 node->_opnds[opcnt] = new_memory;
1483 1462 }
1484 1463
1485 1464 // Advertise here if the CPU requires explicit rounding operations
1486 1465 // to implement the UseStrictFP mode.
1487 1466 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1488 1467
1489 1468 // Are floats conerted to double when stored to stack during deoptimization?
1490 1469 // On x32 it is stored with convertion only when FPU is used for floats.
1491 1470 bool Matcher::float_in_double() { return (UseSSE == 0); }
1492 1471
1493 1472 // Do ints take an entire long register or just half?
1494 1473 const bool Matcher::int_in_long = false;
1495 1474
1496 1475 // Return whether or not this register is ever used as an argument. This
1497 1476 // function is used on startup to build the trampoline stubs in generateOptoStub.
1498 1477 // Registers not mentioned will be killed by the VM call in the trampoline, and
1499 1478 // arguments in those registers not be available to the callee.
1500 1479 bool Matcher::can_be_java_arg( int reg ) {
1501 1480 if( reg == ECX_num || reg == EDX_num ) return true;
1502 1481 if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1503 1482 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1504 1483 return false;
1505 1484 }
1506 1485
1507 1486 bool Matcher::is_spillable_arg( int reg ) {
1508 1487 return can_be_java_arg(reg);
1509 1488 }
1510 1489
1511 1490 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1512 1491 // Use hardware integer DIV instruction when
1513 1492 // it is faster than a code which use multiply.
1514 1493 // Only when constant divisor fits into 32 bit
1515 1494 // (min_jint is excluded to get only correct
1516 1495 // positive 32 bit values from negative).
1517 1496 return VM_Version::has_fast_idiv() &&
1518 1497 (divisor == (int)divisor && divisor != min_jint);
1519 1498 }
1520 1499
1521 1500 // Register for DIVI projection of divmodI
1522 1501 RegMask Matcher::divI_proj_mask() {
1523 1502 return EAX_REG_mask;
1524 1503 }
1525 1504
1526 1505 // Register for MODI projection of divmodI
1527 1506 RegMask Matcher::modI_proj_mask() {
1528 1507 return EDX_REG_mask;
1529 1508 }
1530 1509
1531 1510 // Register for DIVL projection of divmodL
1532 1511 RegMask Matcher::divL_proj_mask() {
1533 1512 ShouldNotReachHere();
1534 1513 return RegMask();
1535 1514 }
1536 1515
1537 1516 // Register for MODL projection of divmodL
1538 1517 RegMask Matcher::modL_proj_mask() {
1539 1518 ShouldNotReachHere();
1540 1519 return RegMask();
1541 1520 }
1542 1521
1543 1522 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1544 1523 return EBP_REG_mask;
1545 1524 }
1546 1525
1547 1526 // Returns true if the high 32 bits of the value is known to be zero.
1548 1527 bool is_operand_hi32_zero(Node* n) {
1549 1528 int opc = n->Opcode();
1550 1529 if (opc == Op_LoadUI2L) {
1551 1530 return true;
1552 1531 }
1553 1532 if (opc == Op_AndL) {
1554 1533 Node* o2 = n->in(2);
1555 1534 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1556 1535 return true;
1557 1536 }
1558 1537 }
1559 1538 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1560 1539 return true;
1561 1540 }
1562 1541 return false;
1563 1542 }
1564 1543
1565 1544 %}
1566 1545
1567 1546 //----------ENCODING BLOCK-----------------------------------------------------
1568 1547 // This block specifies the encoding classes used by the compiler to output
1569 1548 // byte streams. Encoding classes generate functions which are called by
1570 1549 // Machine Instruction Nodes in order to generate the bit encoding of the
1571 1550 // instruction. Operands specify their base encoding interface with the
1572 1551 // interface keyword. There are currently supported four interfaces,
1573 1552 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
1574 1553 // operand to generate a function which returns its register number when
1575 1554 // queried. CONST_INTER causes an operand to generate a function which
1576 1555 // returns the value of the constant when queried. MEMORY_INTER causes an
1577 1556 // operand to generate four functions which return the Base Register, the
1578 1557 // Index Register, the Scale Value, and the Offset Value of the operand when
1579 1558 // queried. COND_INTER causes an operand to generate six functions which
1580 1559 // return the encoding code (ie - encoding bits for the instruction)
1581 1560 // associated with each basic boolean condition for a conditional instruction.
1582 1561 // Instructions specify two basic values for encoding. They use the
1583 1562 // ins_encode keyword to specify their encoding class (which must be one of
1584 1563 // the class names specified in the encoding block), and they use the
1585 1564 // opcode keyword to specify, in order, their primary, secondary, and
1586 1565 // tertiary opcode. Only the opcode sections which a particular instruction
1587 1566 // needs for encoding need to be specified.
1588 1567 encode %{
1589 1568 // Build emit functions for each basic byte or larger field in the intel
1590 1569 // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1591 1570 // code in the enc_class source block. Emit functions will live in the
1592 1571 // main source block for now. In future, we can generalize this by
1593 1572 // adding a syntax that specifies the sizes of fields in an order,
1594 1573 // so that the adlc can build the emit functions automagically
1595 1574
1596 1575 // Emit primary opcode
1597 1576 enc_class OpcP %{
1598 1577 emit_opcode(cbuf, $primary);
1599 1578 %}
1600 1579
1601 1580 // Emit secondary opcode
1602 1581 enc_class OpcS %{
1603 1582 emit_opcode(cbuf, $secondary);
1604 1583 %}
1605 1584
1606 1585 // Emit opcode directly
1607 1586 enc_class Opcode(immI d8) %{
1608 1587 emit_opcode(cbuf, $d8$$constant);
1609 1588 %}
1610 1589
1611 1590 enc_class SizePrefix %{
1612 1591 emit_opcode(cbuf,0x66);
1613 1592 %}
1614 1593
1615 1594 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
1616 1595 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1617 1596 %}
1618 1597
1619 1598 enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many)
1620 1599 emit_opcode(cbuf,$opcode$$constant);
1621 1600 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1622 1601 %}
1623 1602
1624 1603 enc_class mov_r32_imm0( eRegI dst ) %{
1625 1604 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
1626 1605 emit_d32 ( cbuf, 0x0 ); // imm32==0x0
1627 1606 %}
1628 1607
1629 1608 enc_class cdq_enc %{
1630 1609 // Full implementation of Java idiv and irem; checks for
1631 1610 // special case as described in JVM spec., p.243 & p.271.
1632 1611 //
1633 1612 // normal case special case
1634 1613 //
1635 1614 // input : rax,: dividend min_int
1636 1615 // reg: divisor -1
1637 1616 //
1638 1617 // output: rax,: quotient (= rax, idiv reg) min_int
1639 1618 // rdx: remainder (= rax, irem reg) 0
1640 1619 //
1641 1620 // Code sequnce:
1642 1621 //
1643 1622 // 81 F8 00 00 00 80 cmp rax,80000000h
1644 1623 // 0F 85 0B 00 00 00 jne normal_case
1645 1624 // 33 D2 xor rdx,edx
1646 1625 // 83 F9 FF cmp rcx,0FFh
1647 1626 // 0F 84 03 00 00 00 je done
1648 1627 // normal_case:
1649 1628 // 99 cdq
1650 1629 // F7 F9 idiv rax,ecx
1651 1630 // done:
1652 1631 //
1653 1632 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1654 1633 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1655 1634 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h
1656 1635 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1657 1636 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1658 1637 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case
1659 1638 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx
1660 1639 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1661 1640 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1662 1641 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1663 1642 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done
1664 1643 // normal_case:
1665 1644 emit_opcode(cbuf,0x99); // cdq
1666 1645 // idiv (note: must be emitted by the user of this rule)
1667 1646 // normal:
1668 1647 %}
1669 1648
1670 1649 // Dense encoding for older common ops
1671 1650 enc_class Opc_plus(immI opcode, eRegI reg) %{
1672 1651 emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1673 1652 %}
1674 1653
1675 1654
1676 1655 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1677 1656 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1678 1657 // Check for 8-bit immediate, and set sign extend bit in opcode
1679 1658 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1680 1659 emit_opcode(cbuf, $primary | 0x02);
1681 1660 }
1682 1661 else { // If 32-bit immediate
1683 1662 emit_opcode(cbuf, $primary);
1684 1663 }
1685 1664 %}
1686 1665
1687 1666 enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m
1688 1667 // Emit primary opcode and set sign-extend bit
1689 1668 // Check for 8-bit immediate, and set sign extend bit in opcode
1690 1669 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1691 1670 emit_opcode(cbuf, $primary | 0x02); }
1692 1671 else { // If 32-bit immediate
1693 1672 emit_opcode(cbuf, $primary);
1694 1673 }
1695 1674 // Emit r/m byte with secondary opcode, after primary opcode.
1696 1675 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1697 1676 %}
1698 1677
1699 1678 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
1700 1679 // Check for 8-bit immediate, and set sign extend bit in opcode
1701 1680 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1702 1681 $$$emit8$imm$$constant;
1703 1682 }
1704 1683 else { // If 32-bit immediate
1705 1684 // Output immediate
1706 1685 $$$emit32$imm$$constant;
1707 1686 }
1708 1687 %}
1709 1688
1710 1689 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1711 1690 // Emit primary opcode and set sign-extend bit
1712 1691 // Check for 8-bit immediate, and set sign extend bit in opcode
1713 1692 int con = (int)$imm$$constant; // Throw away top bits
1714 1693 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1715 1694 // Emit r/m byte with secondary opcode, after primary opcode.
1716 1695 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1717 1696 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1718 1697 else emit_d32(cbuf,con);
1719 1698 %}
1720 1699
1721 1700 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1722 1701 // Emit primary opcode and set sign-extend bit
1723 1702 // Check for 8-bit immediate, and set sign extend bit in opcode
1724 1703 int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1725 1704 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1726 1705 // Emit r/m byte with tertiary opcode, after primary opcode.
1727 1706 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1728 1707 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1729 1708 else emit_d32(cbuf,con);
1730 1709 %}
1731 1710
1732 1711 enc_class Lbl (label labl) %{ // JMP, CALL
1733 1712 Label *l = $labl$$label;
1734 1713 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1735 1714 %}
1736 1715
1737 1716 enc_class LblShort (label labl) %{ // JMP, CALL
1738 1717 Label *l = $labl$$label;
1739 1718 int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1740 1719 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1741 1720 emit_d8(cbuf, disp);
1742 1721 %}
1743 1722
1744 1723 enc_class OpcSReg (eRegI dst) %{ // BSWAP
1745 1724 emit_cc(cbuf, $secondary, $dst$$reg );
1746 1725 %}
1747 1726
1748 1727 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1749 1728 int destlo = $dst$$reg;
1750 1729 int desthi = HIGH_FROM_LOW(destlo);
1751 1730 // bswap lo
1752 1731 emit_opcode(cbuf, 0x0F);
1753 1732 emit_cc(cbuf, 0xC8, destlo);
1754 1733 // bswap hi
1755 1734 emit_opcode(cbuf, 0x0F);
1756 1735 emit_cc(cbuf, 0xC8, desthi);
1757 1736 // xchg lo and hi
1758 1737 emit_opcode(cbuf, 0x87);
1759 1738 emit_rm(cbuf, 0x3, destlo, desthi);
1760 1739 %}
1761 1740
1762 1741 enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1763 1742 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1764 1743 %}
1765 1744
1766 1745 enc_class Jcc (cmpOp cop, label labl) %{ // JCC
1767 1746 Label *l = $labl$$label;
1768 1747 $$$emit8$primary;
1769 1748 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1770 1749 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1771 1750 %}
1772 1751
1773 1752 enc_class JccShort (cmpOp cop, label labl) %{ // JCC
1774 1753 Label *l = $labl$$label;
1775 1754 emit_cc(cbuf, $primary, $cop$$cmpcode);
1776 1755 int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1777 1756 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1778 1757 emit_d8(cbuf, disp);
1779 1758 %}
1780 1759
1781 1760 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1782 1761 $$$emit8$primary;
1783 1762 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1784 1763 %}
1785 1764
1786 1765 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1787 1766 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1788 1767 emit_d8(cbuf, op >> 8 );
1789 1768 emit_d8(cbuf, op & 255);
1790 1769 %}
1791 1770
1792 1771 // emulate a CMOV with a conditional branch around a MOV
1793 1772 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1794 1773 // Invert sense of branch from sense of CMOV
1795 1774 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1796 1775 emit_d8( cbuf, $brOffs$$constant );
1797 1776 %}
1798 1777
1799 1778 enc_class enc_PartialSubtypeCheck( ) %{
1800 1779 Register Redi = as_Register(EDI_enc); // result register
1801 1780 Register Reax = as_Register(EAX_enc); // super class
1802 1781 Register Recx = as_Register(ECX_enc); // killed
1803 1782 Register Resi = as_Register(ESI_enc); // sub class
1804 1783 Label miss;
1805 1784
1806 1785 MacroAssembler _masm(&cbuf);
1807 1786 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1808 1787 NULL, &miss,
1809 1788 /*set_cond_codes:*/ true);
1810 1789 if ($primary) {
1811 1790 __ xorptr(Redi, Redi);
1812 1791 }
1813 1792 __ bind(miss);
1814 1793 %}
1815 1794
1816 1795 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
1817 1796 MacroAssembler masm(&cbuf);
1818 1797 int start = masm.offset();
1819 1798 if (UseSSE >= 2) {
1820 1799 if (VerifyFPU) {
1821 1800 masm.verify_FPU(0, "must be empty in SSE2+ mode");
1822 1801 }
1823 1802 } else {
1824 1803 // External c_calling_convention expects the FPU stack to be 'clean'.
1825 1804 // Compiled code leaves it dirty. Do cleanup now.
1826 1805 masm.empty_FPU_stack();
1827 1806 }
1828 1807 if (sizeof_FFree_Float_Stack_All == -1) {
1829 1808 sizeof_FFree_Float_Stack_All = masm.offset() - start;
1830 1809 } else {
1831 1810 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1832 1811 }
1833 1812 %}
1834 1813
1835 1814 enc_class Verify_FPU_For_Leaf %{
1836 1815 if( VerifyFPU ) {
1837 1816 MacroAssembler masm(&cbuf);
1838 1817 masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1839 1818 }
1840 1819 %}
1841 1820
1842 1821 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1843 1822 // This is the instruction starting address for relocation info.
1844 1823 cbuf.set_insts_mark();
1845 1824 $$$emit8$primary;
1846 1825 // CALL directly to the runtime
1847 1826 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1848 1827 runtime_call_Relocation::spec(), RELOC_IMM32 );
1849 1828
1850 1829 if (UseSSE >= 2) {
1851 1830 MacroAssembler _masm(&cbuf);
1852 1831 BasicType rt = tf()->return_type();
1853 1832
1854 1833 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1855 1834 // A C runtime call where the return value is unused. In SSE2+
1856 1835 // mode the result needs to be removed from the FPU stack. It's
1857 1836 // likely that this function call could be removed by the
1858 1837 // optimizer if the C function is a pure function.
1859 1838 __ ffree(0);
1860 1839 } else if (rt == T_FLOAT) {
1861 1840 __ lea(rsp, Address(rsp, -4));
1862 1841 __ fstp_s(Address(rsp, 0));
1863 1842 __ movflt(xmm0, Address(rsp, 0));
1864 1843 __ lea(rsp, Address(rsp, 4));
1865 1844 } else if (rt == T_DOUBLE) {
1866 1845 __ lea(rsp, Address(rsp, -8));
1867 1846 __ fstp_d(Address(rsp, 0));
1868 1847 __ movdbl(xmm0, Address(rsp, 0));
1869 1848 __ lea(rsp, Address(rsp, 8));
1870 1849 }
1871 1850 }
1872 1851 %}
1873 1852
1874 1853
1875 1854 enc_class pre_call_FPU %{
1876 1855 // If method sets FPU control word restore it here
1877 1856 debug_only(int off0 = cbuf.insts_size());
1878 1857 if( Compile::current()->in_24_bit_fp_mode() ) {
1879 1858 MacroAssembler masm(&cbuf);
1880 1859 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1881 1860 }
1882 1861 debug_only(int off1 = cbuf.insts_size());
1883 1862 assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1884 1863 %}
1885 1864
1886 1865 enc_class post_call_FPU %{
1887 1866 // If method sets FPU control word do it here also
1888 1867 if( Compile::current()->in_24_bit_fp_mode() ) {
1889 1868 MacroAssembler masm(&cbuf);
1890 1869 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1891 1870 }
1892 1871 %}
1893 1872
1894 1873 enc_class preserve_SP %{
1895 1874 debug_only(int off0 = cbuf.insts_size());
1896 1875 MacroAssembler _masm(&cbuf);
1897 1876 // RBP is preserved across all calls, even compiled calls.
1898 1877 // Use it to preserve RSP in places where the callee might change the SP.
1899 1878 __ movptr(rbp_mh_SP_save, rsp);
1900 1879 debug_only(int off1 = cbuf.insts_size());
1901 1880 assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
1902 1881 %}
1903 1882
1904 1883 enc_class restore_SP %{
1905 1884 MacroAssembler _masm(&cbuf);
1906 1885 __ movptr(rsp, rbp_mh_SP_save);
1907 1886 %}
1908 1887
1909 1888 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
1910 1889 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1911 1890 // who we intended to call.
1912 1891 cbuf.set_insts_mark();
1913 1892 $$$emit8$primary;
1914 1893 if ( !_method ) {
1915 1894 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916 1895 runtime_call_Relocation::spec(), RELOC_IMM32 );
1917 1896 } else if(_optimized_virtual) {
1918 1897 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1919 1898 opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1920 1899 } else {
1921 1900 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1922 1901 static_call_Relocation::spec(), RELOC_IMM32 );
1923 1902 }
1924 1903 if( _method ) { // Emit stub for static call
1925 1904 emit_java_to_interp(cbuf);
1926 1905 }
1927 1906 %}
1928 1907
1929 1908 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
1930 1909 // !!!!!
1931 1910 // Generate "Mov EAX,0x00", placeholder instruction to load oop-info
1932 1911 // emit_call_dynamic_prologue( cbuf );
1933 1912 cbuf.set_insts_mark();
1934 1913 emit_opcode(cbuf, 0xB8 + EAX_enc); // mov EAX,-1
1935 1914 emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1936 1915 address virtual_call_oop_addr = cbuf.insts_mark();
1937 1916 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1938 1917 // who we intended to call.
1939 1918 cbuf.set_insts_mark();
1940 1919 $$$emit8$primary;
1941 1920 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1942 1921 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1943 1922 %}
1944 1923
1945 1924 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1946 1925 int disp = in_bytes(methodOopDesc::from_compiled_offset());
1947 1926 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1948 1927
1949 1928 // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1950 1929 cbuf.set_insts_mark();
1951 1930 $$$emit8$primary;
1952 1931 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1953 1932 emit_d8(cbuf, disp); // Displacement
1954 1933
1955 1934 %}
1956 1935
1957 1936 enc_class Xor_Reg (eRegI dst) %{
1958 1937 emit_opcode(cbuf, 0x33);
1959 1938 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1960 1939 %}
1961 1940
1962 1941 // Following encoding is no longer used, but may be restored if calling
1963 1942 // convention changes significantly.
1964 1943 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1965 1944 //
1966 1945 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
1967 1946 // // int ic_reg = Matcher::inline_cache_reg();
1968 1947 // // int ic_encode = Matcher::_regEncode[ic_reg];
1969 1948 // // int imo_reg = Matcher::interpreter_method_oop_reg();
1970 1949 // // int imo_encode = Matcher::_regEncode[imo_reg];
1971 1950 //
1972 1951 // // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1973 1952 // // // so we load it immediately before the call
1974 1953 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop
1975 1954 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1976 1955 //
1977 1956 // // xor rbp,ebp
1978 1957 // emit_opcode(cbuf, 0x33);
1979 1958 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1980 1959 //
1981 1960 // // CALL to interpreter.
1982 1961 // cbuf.set_insts_mark();
1983 1962 // $$$emit8$primary;
1984 1963 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1985 1964 // runtime_call_Relocation::spec(), RELOC_IMM32 );
1986 1965 // %}
1987 1966
1988 1967 enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR
1989 1968 $$$emit8$primary;
1990 1969 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1991 1970 $$$emit8$shift$$constant;
1992 1971 %}
1993 1972
1994 1973 enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate
1995 1974 // Load immediate does not have a zero or sign extended version
1996 1975 // for 8-bit immediates
1997 1976 emit_opcode(cbuf, 0xB8 + $dst$$reg);
1998 1977 $$$emit32$src$$constant;
1999 1978 %}
2000 1979
2001 1980 enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate
2002 1981 // Load immediate does not have a zero or sign extended version
2003 1982 // for 8-bit immediates
2004 1983 emit_opcode(cbuf, $primary + $dst$$reg);
2005 1984 $$$emit32$src$$constant;
2006 1985 %}
2007 1986
2008 1987 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
2009 1988 // Load immediate does not have a zero or sign extended version
2010 1989 // for 8-bit immediates
2011 1990 int dst_enc = $dst$$reg;
2012 1991 int src_con = $src$$constant & 0x0FFFFFFFFL;
2013 1992 if (src_con == 0) {
2014 1993 // xor dst, dst
2015 1994 emit_opcode(cbuf, 0x33);
2016 1995 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2017 1996 } else {
2018 1997 emit_opcode(cbuf, $primary + dst_enc);
2019 1998 emit_d32(cbuf, src_con);
2020 1999 }
2021 2000 %}
2022 2001
2023 2002 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
2024 2003 // Load immediate does not have a zero or sign extended version
2025 2004 // for 8-bit immediates
2026 2005 int dst_enc = $dst$$reg + 2;
2027 2006 int src_con = ((julong)($src$$constant)) >> 32;
2028 2007 if (src_con == 0) {
↓ open down ↓ |
646 lines elided |
↑ open up ↑ |
2029 2008 // xor dst, dst
2030 2009 emit_opcode(cbuf, 0x33);
2031 2010 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2032 2011 } else {
2033 2012 emit_opcode(cbuf, $primary + dst_enc);
2034 2013 emit_d32(cbuf, src_con);
2035 2014 }
2036 2015 %}
2037 2016
2038 2017
2039 - enc_class LdImmD (immD src) %{ // Load Immediate
2040 - if( is_positive_zero_double($src$$constant)) {
2041 - // FLDZ
2042 - emit_opcode(cbuf,0xD9);
2043 - emit_opcode(cbuf,0xEE);
2044 - } else if( is_positive_one_double($src$$constant)) {
2045 - // FLD1
2046 - emit_opcode(cbuf,0xD9);
2047 - emit_opcode(cbuf,0xE8);
2048 - } else {
2049 - emit_opcode(cbuf,0xDD);
2050 - emit_rm(cbuf, 0x0, 0x0, 0x5);
2051 - emit_double_constant(cbuf, $src$$constant);
2052 - }
2053 - %}
2054 -
2055 -
2056 - enc_class LdImmF (immF src) %{ // Load Immediate
2057 - if( is_positive_zero_float($src$$constant)) {
2058 - emit_opcode(cbuf,0xD9);
2059 - emit_opcode(cbuf,0xEE);
2060 - } else if( is_positive_one_float($src$$constant)) {
2061 - emit_opcode(cbuf,0xD9);
2062 - emit_opcode(cbuf,0xE8);
2063 - } else {
2064 - $$$emit8$primary;
2065 - // Load immediate does not have a zero or sign extended version
2066 - // for 8-bit immediates
2067 - // First load to TOS, then move to dst
2068 - emit_rm(cbuf, 0x0, 0x0, 0x5);
2069 - emit_float_constant(cbuf, $src$$constant);
2070 - }
2071 - %}
2072 -
2073 - enc_class LdImmX (regX dst, immXF con) %{ // Load Immediate
2074 - emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2075 - emit_float_constant(cbuf, $con$$constant);
2076 - %}
2077 -
2078 - enc_class LdImmXD (regXD dst, immXD con) %{ // Load Immediate
2079 - emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2080 - emit_double_constant(cbuf, $con$$constant);
2081 - %}
2082 -
2083 - enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
2084 - // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2085 - emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2086 - emit_opcode(cbuf, 0x0F);
2087 - emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2088 - emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2089 - emit_double_constant(cbuf, $con$$constant);
2090 - %}
2091 -
2092 - enc_class Opc_MemImm_F(immF src) %{
2093 - cbuf.set_insts_mark();
2094 - $$$emit8$primary;
2095 - emit_rm(cbuf, 0x0, $secondary, 0x5);
2096 - emit_float_constant(cbuf, $src$$constant);
2097 - %}
2098 -
2099 -
2100 2018 enc_class MovI2X_reg(regX dst, eRegI src) %{
2101 2019 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
2102 2020 emit_opcode(cbuf, 0x0F );
2103 2021 emit_opcode(cbuf, 0x6E );
2104 2022 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2105 2023 %}
2106 2024
2107 2025 enc_class MovX2I_reg(eRegI dst, regX src) %{
2108 2026 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
2109 2027 emit_opcode(cbuf, 0x0F );
2110 2028 emit_opcode(cbuf, 0x7E );
2111 2029 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2112 2030 %}
2113 2031
2114 2032 enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2115 2033 { // MOVD $dst,$src.lo
2116 2034 emit_opcode(cbuf,0x66);
2117 2035 emit_opcode(cbuf,0x0F);
2118 2036 emit_opcode(cbuf,0x6E);
2119 2037 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2120 2038 }
2121 2039 { // MOVD $tmp,$src.hi
2122 2040 emit_opcode(cbuf,0x66);
2123 2041 emit_opcode(cbuf,0x0F);
2124 2042 emit_opcode(cbuf,0x6E);
2125 2043 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2126 2044 }
2127 2045 { // PUNPCKLDQ $dst,$tmp
2128 2046 emit_opcode(cbuf,0x66);
2129 2047 emit_opcode(cbuf,0x0F);
2130 2048 emit_opcode(cbuf,0x62);
2131 2049 emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2132 2050 }
2133 2051 %}
2134 2052
2135 2053 enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2136 2054 { // MOVD $dst.lo,$src
2137 2055 emit_opcode(cbuf,0x66);
2138 2056 emit_opcode(cbuf,0x0F);
2139 2057 emit_opcode(cbuf,0x7E);
2140 2058 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2141 2059 }
2142 2060 { // PSHUFLW $tmp,$src,0x4E (01001110b)
2143 2061 emit_opcode(cbuf,0xF2);
2144 2062 emit_opcode(cbuf,0x0F);
2145 2063 emit_opcode(cbuf,0x70);
2146 2064 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2147 2065 emit_d8(cbuf, 0x4E);
2148 2066 }
2149 2067 { // MOVD $dst.hi,$tmp
2150 2068 emit_opcode(cbuf,0x66);
2151 2069 emit_opcode(cbuf,0x0F);
2152 2070 emit_opcode(cbuf,0x7E);
2153 2071 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2154 2072 }
2155 2073 %}
2156 2074
2157 2075
2158 2076 // Encode a reg-reg copy. If it is useless, then empty encoding.
2159 2077 enc_class enc_Copy( eRegI dst, eRegI src ) %{
2160 2078 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2161 2079 %}
2162 2080
2163 2081 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2164 2082 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2165 2083 %}
2166 2084
2167 2085 // Encode xmm reg-reg copy. If it is useless, then empty encoding.
2168 2086 enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2169 2087 encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2170 2088 %}
2171 2089
2172 2090 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
2173 2091 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2174 2092 %}
2175 2093
2176 2094 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
2177 2095 $$$emit8$primary;
2178 2096 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2179 2097 %}
2180 2098
2181 2099 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
2182 2100 $$$emit8$secondary;
2183 2101 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2184 2102 %}
2185 2103
2186 2104 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
2187 2105 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2188 2106 %}
2189 2107
2190 2108 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
2191 2109 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2192 2110 %}
2193 2111
2194 2112 enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2195 2113 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2196 2114 %}
2197 2115
2198 2116 enc_class Con32 (immI src) %{ // Con32(storeImmI)
2199 2117 // Output immediate
2200 2118 $$$emit32$src$$constant;
2201 2119 %}
2202 2120
2203 2121 enc_class Con32F_as_bits(immF src) %{ // storeF_imm
2204 2122 // Output Float immediate bits
2205 2123 jfloat jf = $src$$constant;
2206 2124 int jf_as_bits = jint_cast( jf );
2207 2125 emit_d32(cbuf, jf_as_bits);
2208 2126 %}
2209 2127
2210 2128 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm
2211 2129 // Output Float immediate bits
2212 2130 jfloat jf = $src$$constant;
2213 2131 int jf_as_bits = jint_cast( jf );
2214 2132 emit_d32(cbuf, jf_as_bits);
2215 2133 %}
2216 2134
2217 2135 enc_class Con16 (immI src) %{ // Con16(storeImmI)
2218 2136 // Output immediate
2219 2137 $$$emit16$src$$constant;
2220 2138 %}
2221 2139
2222 2140 enc_class Con_d32(immI src) %{
2223 2141 emit_d32(cbuf,$src$$constant);
2224 2142 %}
2225 2143
2226 2144 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
2227 2145 // Output immediate memory reference
2228 2146 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2229 2147 emit_d32(cbuf, 0x00);
2230 2148 %}
2231 2149
2232 2150 enc_class lock_prefix( ) %{
2233 2151 if( os::is_MP() )
2234 2152 emit_opcode(cbuf,0xF0); // [Lock]
2235 2153 %}
2236 2154
2237 2155 // Cmp-xchg long value.
2238 2156 // Note: we need to swap rbx, and rcx before and after the
2239 2157 // cmpxchg8 instruction because the instruction uses
2240 2158 // rcx as the high order word of the new value to store but
2241 2159 // our register encoding uses rbx,.
2242 2160 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2243 2161
2244 2162 // XCHG rbx,ecx
2245 2163 emit_opcode(cbuf,0x87);
2246 2164 emit_opcode(cbuf,0xD9);
2247 2165 // [Lock]
2248 2166 if( os::is_MP() )
2249 2167 emit_opcode(cbuf,0xF0);
2250 2168 // CMPXCHG8 [Eptr]
2251 2169 emit_opcode(cbuf,0x0F);
2252 2170 emit_opcode(cbuf,0xC7);
2253 2171 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2254 2172 // XCHG rbx,ecx
2255 2173 emit_opcode(cbuf,0x87);
2256 2174 emit_opcode(cbuf,0xD9);
2257 2175 %}
2258 2176
2259 2177 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2260 2178 // [Lock]
2261 2179 if( os::is_MP() )
2262 2180 emit_opcode(cbuf,0xF0);
2263 2181
2264 2182 // CMPXCHG [Eptr]
2265 2183 emit_opcode(cbuf,0x0F);
2266 2184 emit_opcode(cbuf,0xB1);
2267 2185 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2268 2186 %}
2269 2187
2270 2188 enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2271 2189 int res_encoding = $res$$reg;
2272 2190
2273 2191 // MOV res,0
2274 2192 emit_opcode( cbuf, 0xB8 + res_encoding);
2275 2193 emit_d32( cbuf, 0 );
2276 2194 // JNE,s fail
2277 2195 emit_opcode(cbuf,0x75);
2278 2196 emit_d8(cbuf, 5 );
2279 2197 // MOV res,1
2280 2198 emit_opcode( cbuf, 0xB8 + res_encoding);
2281 2199 emit_d32( cbuf, 1 );
2282 2200 // fail:
2283 2201 %}
2284 2202
2285 2203 enc_class set_instruction_start( ) %{
2286 2204 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2287 2205 %}
2288 2206
2289 2207 enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem
2290 2208 int reg_encoding = $ereg$$reg;
2291 2209 int base = $mem$$base;
2292 2210 int index = $mem$$index;
2293 2211 int scale = $mem$$scale;
2294 2212 int displace = $mem$$disp;
2295 2213 bool disp_is_oop = $mem->disp_is_oop();
2296 2214 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2297 2215 %}
2298 2216
2299 2217 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
2300 2218 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo
2301 2219 int base = $mem$$base;
2302 2220 int index = $mem$$index;
2303 2221 int scale = $mem$$scale;
2304 2222 int displace = $mem$$disp + 4; // Offset is 4 further in memory
2305 2223 assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2306 2224 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2307 2225 %}
2308 2226
2309 2227 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2310 2228 int r1, r2;
2311 2229 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2312 2230 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2313 2231 emit_opcode(cbuf,0x0F);
2314 2232 emit_opcode(cbuf,$tertiary);
2315 2233 emit_rm(cbuf, 0x3, r1, r2);
2316 2234 emit_d8(cbuf,$cnt$$constant);
2317 2235 emit_d8(cbuf,$primary);
2318 2236 emit_rm(cbuf, 0x3, $secondary, r1);
2319 2237 emit_d8(cbuf,$cnt$$constant);
2320 2238 %}
2321 2239
2322 2240 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2323 2241 emit_opcode( cbuf, 0x8B ); // Move
2324 2242 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2325 2243 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2326 2244 emit_d8(cbuf,$primary);
2327 2245 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2328 2246 emit_d8(cbuf,$cnt$$constant-32);
2329 2247 }
2330 2248 emit_d8(cbuf,$primary);
2331 2249 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2332 2250 emit_d8(cbuf,31);
2333 2251 %}
2334 2252
2335 2253 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2336 2254 int r1, r2;
2337 2255 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2338 2256 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2339 2257
2340 2258 emit_opcode( cbuf, 0x8B ); // Move r1,r2
2341 2259 emit_rm(cbuf, 0x3, r1, r2);
2342 2260 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2343 2261 emit_opcode(cbuf,$primary);
2344 2262 emit_rm(cbuf, 0x3, $secondary, r1);
2345 2263 emit_d8(cbuf,$cnt$$constant-32);
2346 2264 }
2347 2265 emit_opcode(cbuf,0x33); // XOR r2,r2
2348 2266 emit_rm(cbuf, 0x3, r2, r2);
2349 2267 %}
2350 2268
2351 2269 // Clone of RegMem but accepts an extra parameter to access each
2352 2270 // half of a double in memory; it never needs relocation info.
2353 2271 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2354 2272 emit_opcode(cbuf,$opcode$$constant);
2355 2273 int reg_encoding = $rm_reg$$reg;
2356 2274 int base = $mem$$base;
2357 2275 int index = $mem$$index;
2358 2276 int scale = $mem$$scale;
2359 2277 int displace = $mem$$disp + $disp_for_half$$constant;
2360 2278 bool disp_is_oop = false;
2361 2279 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2362 2280 %}
2363 2281
2364 2282 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2365 2283 //
2366 2284 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2367 2285 // and it never needs relocation information.
2368 2286 // Frequently used to move data between FPU's Stack Top and memory.
2369 2287 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2370 2288 int rm_byte_opcode = $rm_opcode$$constant;
2371 2289 int base = $mem$$base;
2372 2290 int index = $mem$$index;
2373 2291 int scale = $mem$$scale;
2374 2292 int displace = $mem$$disp;
2375 2293 assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2376 2294 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2377 2295 %}
2378 2296
2379 2297 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2380 2298 int rm_byte_opcode = $rm_opcode$$constant;
2381 2299 int base = $mem$$base;
2382 2300 int index = $mem$$index;
2383 2301 int scale = $mem$$scale;
2384 2302 int displace = $mem$$disp;
2385 2303 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2386 2304 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2387 2305 %}
2388 2306
2389 2307 enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea
2390 2308 int reg_encoding = $dst$$reg;
2391 2309 int base = $src0$$reg; // 0xFFFFFFFF indicates no base
2392 2310 int index = 0x04; // 0x04 indicates no index
2393 2311 int scale = 0x00; // 0x00 indicates no scale
2394 2312 int displace = $src1$$constant; // 0x00 indicates no displacement
2395 2313 bool disp_is_oop = false;
2396 2314 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2397 2315 %}
2398 2316
2399 2317 enc_class min_enc (eRegI dst, eRegI src) %{ // MIN
2400 2318 // Compare dst,src
2401 2319 emit_opcode(cbuf,0x3B);
2402 2320 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2403 2321 // jmp dst < src around move
2404 2322 emit_opcode(cbuf,0x7C);
2405 2323 emit_d8(cbuf,2);
2406 2324 // move dst,src
2407 2325 emit_opcode(cbuf,0x8B);
2408 2326 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2409 2327 %}
2410 2328
2411 2329 enc_class max_enc (eRegI dst, eRegI src) %{ // MAX
2412 2330 // Compare dst,src
2413 2331 emit_opcode(cbuf,0x3B);
2414 2332 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2415 2333 // jmp dst > src around move
2416 2334 emit_opcode(cbuf,0x7F);
2417 2335 emit_d8(cbuf,2);
2418 2336 // move dst,src
2419 2337 emit_opcode(cbuf,0x8B);
2420 2338 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2421 2339 %}
2422 2340
2423 2341 enc_class enc_FP_store(memory mem, regD src) %{
2424 2342 // If src is FPR1, we can just FST to store it.
2425 2343 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2426 2344 int reg_encoding = 0x2; // Just store
2427 2345 int base = $mem$$base;
2428 2346 int index = $mem$$index;
2429 2347 int scale = $mem$$scale;
2430 2348 int displace = $mem$$disp;
2431 2349 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2432 2350 if( $src$$reg != FPR1L_enc ) {
2433 2351 reg_encoding = 0x3; // Store & pop
2434 2352 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2435 2353 emit_d8( cbuf, 0xC0-1+$src$$reg );
2436 2354 }
2437 2355 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2438 2356 emit_opcode(cbuf,$primary);
2439 2357 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2440 2358 %}
2441 2359
2442 2360 enc_class neg_reg(eRegI dst) %{
2443 2361 // NEG $dst
2444 2362 emit_opcode(cbuf,0xF7);
2445 2363 emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2446 2364 %}
2447 2365
2448 2366 enc_class setLT_reg(eCXRegI dst) %{
2449 2367 // SETLT $dst
2450 2368 emit_opcode(cbuf,0x0F);
2451 2369 emit_opcode(cbuf,0x9C);
2452 2370 emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2453 2371 %}
2454 2372
2455 2373 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
2456 2374 int tmpReg = $tmp$$reg;
2457 2375
2458 2376 // SUB $p,$q
2459 2377 emit_opcode(cbuf,0x2B);
2460 2378 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2461 2379 // SBB $tmp,$tmp
2462 2380 emit_opcode(cbuf,0x1B);
2463 2381 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2464 2382 // AND $tmp,$y
2465 2383 emit_opcode(cbuf,0x23);
2466 2384 emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2467 2385 // ADD $p,$tmp
2468 2386 emit_opcode(cbuf,0x03);
2469 2387 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2470 2388 %}
2471 2389
2472 2390 enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT
2473 2391 int tmpReg = $tmp$$reg;
2474 2392
2475 2393 // SUB $p,$q
2476 2394 emit_opcode(cbuf,0x2B);
2477 2395 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2478 2396 // SBB $tmp,$tmp
2479 2397 emit_opcode(cbuf,0x1B);
2480 2398 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2481 2399 // AND $tmp,$y
2482 2400 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2483 2401 emit_opcode(cbuf,0x23);
2484 2402 int reg_encoding = tmpReg;
2485 2403 int base = $mem$$base;
2486 2404 int index = $mem$$index;
2487 2405 int scale = $mem$$scale;
2488 2406 int displace = $mem$$disp;
2489 2407 bool disp_is_oop = $mem->disp_is_oop();
2490 2408 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2491 2409 // ADD $p,$tmp
2492 2410 emit_opcode(cbuf,0x03);
2493 2411 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2494 2412 %}
2495 2413
2496 2414 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2497 2415 // TEST shift,32
2498 2416 emit_opcode(cbuf,0xF7);
2499 2417 emit_rm(cbuf, 0x3, 0, ECX_enc);
2500 2418 emit_d32(cbuf,0x20);
2501 2419 // JEQ,s small
2502 2420 emit_opcode(cbuf, 0x74);
2503 2421 emit_d8(cbuf, 0x04);
2504 2422 // MOV $dst.hi,$dst.lo
2505 2423 emit_opcode( cbuf, 0x8B );
2506 2424 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2507 2425 // CLR $dst.lo
2508 2426 emit_opcode(cbuf, 0x33);
2509 2427 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2510 2428 // small:
2511 2429 // SHLD $dst.hi,$dst.lo,$shift
2512 2430 emit_opcode(cbuf,0x0F);
2513 2431 emit_opcode(cbuf,0xA5);
2514 2432 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2515 2433 // SHL $dst.lo,$shift"
2516 2434 emit_opcode(cbuf,0xD3);
2517 2435 emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2518 2436 %}
2519 2437
2520 2438 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2521 2439 // TEST shift,32
2522 2440 emit_opcode(cbuf,0xF7);
2523 2441 emit_rm(cbuf, 0x3, 0, ECX_enc);
2524 2442 emit_d32(cbuf,0x20);
2525 2443 // JEQ,s small
2526 2444 emit_opcode(cbuf, 0x74);
2527 2445 emit_d8(cbuf, 0x04);
2528 2446 // MOV $dst.lo,$dst.hi
2529 2447 emit_opcode( cbuf, 0x8B );
2530 2448 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2531 2449 // CLR $dst.hi
2532 2450 emit_opcode(cbuf, 0x33);
2533 2451 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2534 2452 // small:
2535 2453 // SHRD $dst.lo,$dst.hi,$shift
2536 2454 emit_opcode(cbuf,0x0F);
2537 2455 emit_opcode(cbuf,0xAD);
2538 2456 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2539 2457 // SHR $dst.hi,$shift"
2540 2458 emit_opcode(cbuf,0xD3);
2541 2459 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2542 2460 %}
2543 2461
2544 2462 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2545 2463 // TEST shift,32
2546 2464 emit_opcode(cbuf,0xF7);
2547 2465 emit_rm(cbuf, 0x3, 0, ECX_enc);
2548 2466 emit_d32(cbuf,0x20);
2549 2467 // JEQ,s small
2550 2468 emit_opcode(cbuf, 0x74);
2551 2469 emit_d8(cbuf, 0x05);
2552 2470 // MOV $dst.lo,$dst.hi
2553 2471 emit_opcode( cbuf, 0x8B );
2554 2472 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2555 2473 // SAR $dst.hi,31
2556 2474 emit_opcode(cbuf, 0xC1);
2557 2475 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2558 2476 emit_d8(cbuf, 0x1F );
2559 2477 // small:
2560 2478 // SHRD $dst.lo,$dst.hi,$shift
2561 2479 emit_opcode(cbuf,0x0F);
2562 2480 emit_opcode(cbuf,0xAD);
2563 2481 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2564 2482 // SAR $dst.hi,$shift"
2565 2483 emit_opcode(cbuf,0xD3);
2566 2484 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2567 2485 %}
2568 2486
2569 2487
2570 2488 // ----------------- Encodings for floating point unit -----------------
2571 2489 // May leave result in FPU-TOS or FPU reg depending on opcodes
2572 2490 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV
2573 2491 $$$emit8$primary;
2574 2492 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2575 2493 %}
2576 2494
2577 2495 // Pop argument in FPR0 with FSTP ST(0)
2578 2496 enc_class PopFPU() %{
2579 2497 emit_opcode( cbuf, 0xDD );
2580 2498 emit_d8( cbuf, 0xD8 );
2581 2499 %}
2582 2500
2583 2501 // !!!!! equivalent to Pop_Reg_F
2584 2502 enc_class Pop_Reg_D( regD dst ) %{
2585 2503 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2586 2504 emit_d8( cbuf, 0xD8+$dst$$reg );
2587 2505 %}
2588 2506
2589 2507 enc_class Push_Reg_D( regD dst ) %{
2590 2508 emit_opcode( cbuf, 0xD9 );
2591 2509 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2592 2510 %}
2593 2511
2594 2512 enc_class strictfp_bias1( regD dst ) %{
2595 2513 emit_opcode( cbuf, 0xDB ); // FLD m80real
2596 2514 emit_opcode( cbuf, 0x2D );
2597 2515 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2598 2516 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2599 2517 emit_opcode( cbuf, 0xC8+$dst$$reg );
2600 2518 %}
2601 2519
2602 2520 enc_class strictfp_bias2( regD dst ) %{
2603 2521 emit_opcode( cbuf, 0xDB ); // FLD m80real
2604 2522 emit_opcode( cbuf, 0x2D );
2605 2523 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2606 2524 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2607 2525 emit_opcode( cbuf, 0xC8+$dst$$reg );
2608 2526 %}
2609 2527
2610 2528 // Special case for moving an integer register to a stack slot.
2611 2529 enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2612 2530 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2613 2531 %}
2614 2532
2615 2533 // Special case for moving a register to a stack slot.
2616 2534 enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2617 2535 // Opcode already emitted
2618 2536 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
2619 2537 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
2620 2538 emit_d32(cbuf, $dst$$disp); // Displacement
2621 2539 %}
2622 2540
2623 2541 // Push the integer in stackSlot 'src' onto FP-stack
2624 2542 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2625 2543 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2626 2544 %}
2627 2545
2628 2546 // Push the float in stackSlot 'src' onto FP-stack
2629 2547 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src]
2630 2548 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2631 2549 %}
2632 2550
2633 2551 // Push the double in stackSlot 'src' onto FP-stack
2634 2552 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src]
2635 2553 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2636 2554 %}
2637 2555
2638 2556 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2639 2557 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2640 2558 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2641 2559 %}
2642 2560
2643 2561 // Same as Pop_Mem_F except for opcode
2644 2562 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2645 2563 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2646 2564 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2647 2565 %}
2648 2566
2649 2567 enc_class Pop_Reg_F( regF dst ) %{
2650 2568 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2651 2569 emit_d8( cbuf, 0xD8+$dst$$reg );
2652 2570 %}
2653 2571
2654 2572 enc_class Push_Reg_F( regF dst ) %{
2655 2573 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2656 2574 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2657 2575 %}
2658 2576
2659 2577 // Push FPU's float to a stack-slot, and pop FPU-stack
2660 2578 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2661 2579 int pop = 0x02;
2662 2580 if ($src$$reg != FPR1L_enc) {
2663 2581 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2664 2582 emit_d8( cbuf, 0xC0-1+$src$$reg );
2665 2583 pop = 0x03;
2666 2584 }
2667 2585 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2668 2586 %}
2669 2587
2670 2588 // Push FPU's double to a stack-slot, and pop FPU-stack
2671 2589 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2672 2590 int pop = 0x02;
2673 2591 if ($src$$reg != FPR1L_enc) {
2674 2592 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2675 2593 emit_d8( cbuf, 0xC0-1+$src$$reg );
2676 2594 pop = 0x03;
2677 2595 }
2678 2596 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2679 2597 %}
2680 2598
2681 2599 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2682 2600 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2683 2601 int pop = 0xD0 - 1; // -1 since we skip FLD
2684 2602 if ($src$$reg != FPR1L_enc) {
2685 2603 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2686 2604 emit_d8( cbuf, 0xC0-1+$src$$reg );
2687 2605 pop = 0xD8;
2688 2606 }
2689 2607 emit_opcode( cbuf, 0xDD );
2690 2608 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2691 2609 %}
2692 2610
2693 2611
2694 2612 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2695 2613 MacroAssembler masm(&cbuf);
2696 2614 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg
2697 2615 masm.fmul( $src2$$reg+0); // value at TOS
2698 2616 masm.fadd( $src$$reg+0); // value at TOS
2699 2617 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store
2700 2618 %}
2701 2619
2702 2620
2703 2621 enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2704 2622 // load dst in FPR0
2705 2623 emit_opcode( cbuf, 0xD9 );
2706 2624 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2707 2625 if ($src$$reg != FPR1L_enc) {
2708 2626 // fincstp
2709 2627 emit_opcode (cbuf, 0xD9);
2710 2628 emit_opcode (cbuf, 0xF7);
2711 2629 // swap src with FPR1:
2712 2630 // FXCH FPR1 with src
2713 2631 emit_opcode(cbuf, 0xD9);
2714 2632 emit_d8(cbuf, 0xC8-1+$src$$reg );
2715 2633 // fdecstp
2716 2634 emit_opcode (cbuf, 0xD9);
2717 2635 emit_opcode (cbuf, 0xF6);
2718 2636 }
2719 2637 %}
2720 2638
2721 2639 enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2722 2640 // Allocate a word
2723 2641 emit_opcode(cbuf,0x83); // SUB ESP,8
2724 2642 emit_opcode(cbuf,0xEC);
2725 2643 emit_d8(cbuf,0x08);
2726 2644
2727 2645 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1
2728 2646 emit_opcode (cbuf, 0x0F );
2729 2647 emit_opcode (cbuf, 0x11 );
2730 2648 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2731 2649
2732 2650 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2733 2651 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2734 2652
2735 2653 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0
2736 2654 emit_opcode (cbuf, 0x0F );
2737 2655 emit_opcode (cbuf, 0x11 );
2738 2656 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2739 2657
2740 2658 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2741 2659 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2742 2660
2743 2661 %}
2744 2662
2745 2663 enc_class Push_ModX_encoding( regX src0, regX src1) %{
2746 2664 // Allocate a word
2747 2665 emit_opcode(cbuf,0x83); // SUB ESP,4
2748 2666 emit_opcode(cbuf,0xEC);
2749 2667 emit_d8(cbuf,0x04);
2750 2668
2751 2669 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1
2752 2670 emit_opcode (cbuf, 0x0F );
2753 2671 emit_opcode (cbuf, 0x11 );
2754 2672 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2755 2673
2756 2674 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2757 2675 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2758 2676
2759 2677 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0
2760 2678 emit_opcode (cbuf, 0x0F );
2761 2679 emit_opcode (cbuf, 0x11 );
2762 2680 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2763 2681
2764 2682 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2765 2683 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2766 2684
2767 2685 %}
2768 2686
2769 2687 enc_class Push_ResultXD(regXD dst) %{
2770 2688 store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2771 2689
2772 2690 // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2773 2691 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2774 2692 emit_opcode (cbuf, 0x0F );
2775 2693 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2776 2694 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2777 2695
2778 2696 emit_opcode(cbuf,0x83); // ADD ESP,8
2779 2697 emit_opcode(cbuf,0xC4);
2780 2698 emit_d8(cbuf,0x08);
2781 2699 %}
2782 2700
2783 2701 enc_class Push_ResultX(regX dst, immI d8) %{
2784 2702 store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2785 2703
2786 2704 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
2787 2705 emit_opcode (cbuf, 0x0F );
2788 2706 emit_opcode (cbuf, 0x10 );
2789 2707 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2790 2708
2791 2709 emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8)
2792 2710 emit_opcode(cbuf,0xC4);
2793 2711 emit_d8(cbuf,$d8$$constant);
2794 2712 %}
2795 2713
2796 2714 enc_class Push_SrcXD(regXD src) %{
2797 2715 // Allocate a word
2798 2716 emit_opcode(cbuf,0x83); // SUB ESP,8
2799 2717 emit_opcode(cbuf,0xEC);
2800 2718 emit_d8(cbuf,0x08);
2801 2719
2802 2720 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
2803 2721 emit_opcode (cbuf, 0x0F );
2804 2722 emit_opcode (cbuf, 0x11 );
2805 2723 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2806 2724
2807 2725 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2808 2726 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2809 2727 %}
2810 2728
2811 2729 enc_class push_stack_temp_qword() %{
2812 2730 emit_opcode(cbuf,0x83); // SUB ESP,8
2813 2731 emit_opcode(cbuf,0xEC);
2814 2732 emit_d8 (cbuf,0x08);
2815 2733 %}
2816 2734
2817 2735 enc_class pop_stack_temp_qword() %{
2818 2736 emit_opcode(cbuf,0x83); // ADD ESP,8
2819 2737 emit_opcode(cbuf,0xC4);
2820 2738 emit_d8 (cbuf,0x08);
2821 2739 %}
2822 2740
2823 2741 enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2824 2742 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src
2825 2743 emit_opcode (cbuf, 0x0F );
2826 2744 emit_opcode (cbuf, 0x11 );
2827 2745 encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2828 2746
2829 2747 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2830 2748 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2831 2749 %}
2832 2750
2833 2751 // Compute X^Y using Intel's fast hardware instructions, if possible.
2834 2752 // Otherwise return a NaN.
2835 2753 enc_class pow_exp_core_encoding %{
2836 2754 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X))
2837 2755 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q
2838 2756 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q
2839 2757 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q)
2840 2758 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q)
2841 2759 emit_opcode(cbuf,0x1C);
2842 2760 emit_d8(cbuf,0x24);
2843 2761 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1
2844 2762 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1
2845 2763 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q)
2846 2764 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q)
2847 2765 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2848 2766 emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask
2849 2767 emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2850 2768 emit_d32(cbuf,0xFFFFF800);
2851 2769 emit_opcode(cbuf,0x81); // add rax,1023 - the double exponent bias
2852 2770 emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2853 2771 emit_d32(cbuf,1023);
2854 2772 emit_opcode(cbuf,0x8B); // mov rbx,eax
2855 2773 emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2856 2774 emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position
2857 2775 emit_rm(cbuf,0x3,0x4,EAX_enc);
2858 2776 emit_d8(cbuf,20);
2859 2777 emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow
2860 2778 emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2861 2779 emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX
2862 2780 emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2863 2781 emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word
2864 2782 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2865 2783 emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2866 2784 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2867 2785 emit_d32(cbuf,0);
2868 2786 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2869 2787 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2870 2788 %}
2871 2789
2872 2790 // enc_class Pop_Reg_Mod_D( regD dst, regD src)
2873 2791 // was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2874 2792
2875 2793 enc_class Push_Result_Mod_D( regD src) %{
2876 2794 if ($src$$reg != FPR1L_enc) {
2877 2795 // fincstp
2878 2796 emit_opcode (cbuf, 0xD9);
2879 2797 emit_opcode (cbuf, 0xF7);
2880 2798 // FXCH FPR1 with src
2881 2799 emit_opcode(cbuf, 0xD9);
2882 2800 emit_d8(cbuf, 0xC8-1+$src$$reg );
2883 2801 // fdecstp
2884 2802 emit_opcode (cbuf, 0xD9);
2885 2803 emit_opcode (cbuf, 0xF6);
2886 2804 }
2887 2805 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2888 2806 // // FSTP FPR$dst$$reg
2889 2807 // emit_opcode( cbuf, 0xDD );
2890 2808 // emit_d8( cbuf, 0xD8+$dst$$reg );
2891 2809 %}
2892 2810
2893 2811 enc_class fnstsw_sahf_skip_parity() %{
2894 2812 // fnstsw ax
2895 2813 emit_opcode( cbuf, 0xDF );
2896 2814 emit_opcode( cbuf, 0xE0 );
2897 2815 // sahf
2898 2816 emit_opcode( cbuf, 0x9E );
2899 2817 // jnp ::skip
2900 2818 emit_opcode( cbuf, 0x7B );
2901 2819 emit_opcode( cbuf, 0x05 );
2902 2820 %}
2903 2821
2904 2822 enc_class emitModD() %{
2905 2823 // fprem must be iterative
2906 2824 // :: loop
2907 2825 // fprem
2908 2826 emit_opcode( cbuf, 0xD9 );
2909 2827 emit_opcode( cbuf, 0xF8 );
2910 2828 // wait
2911 2829 emit_opcode( cbuf, 0x9b );
2912 2830 // fnstsw ax
2913 2831 emit_opcode( cbuf, 0xDF );
2914 2832 emit_opcode( cbuf, 0xE0 );
2915 2833 // sahf
2916 2834 emit_opcode( cbuf, 0x9E );
2917 2835 // jp ::loop
2918 2836 emit_opcode( cbuf, 0x0F );
2919 2837 emit_opcode( cbuf, 0x8A );
2920 2838 emit_opcode( cbuf, 0xF4 );
2921 2839 emit_opcode( cbuf, 0xFF );
2922 2840 emit_opcode( cbuf, 0xFF );
2923 2841 emit_opcode( cbuf, 0xFF );
2924 2842 %}
2925 2843
2926 2844 enc_class fpu_flags() %{
2927 2845 // fnstsw_ax
2928 2846 emit_opcode( cbuf, 0xDF);
2929 2847 emit_opcode( cbuf, 0xE0);
2930 2848 // test ax,0x0400
2931 2849 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate
2932 2850 emit_opcode( cbuf, 0xA9 );
2933 2851 emit_d16 ( cbuf, 0x0400 );
2934 2852 // // // This sequence works, but stalls for 12-16 cycles on PPro
2935 2853 // // test rax,0x0400
2936 2854 // emit_opcode( cbuf, 0xA9 );
2937 2855 // emit_d32 ( cbuf, 0x00000400 );
2938 2856 //
2939 2857 // jz exit (no unordered comparison)
2940 2858 emit_opcode( cbuf, 0x74 );
2941 2859 emit_d8 ( cbuf, 0x02 );
2942 2860 // mov ah,1 - treat as LT case (set carry flag)
2943 2861 emit_opcode( cbuf, 0xB4 );
2944 2862 emit_d8 ( cbuf, 0x01 );
2945 2863 // sahf
2946 2864 emit_opcode( cbuf, 0x9E);
2947 2865 %}
2948 2866
2949 2867 enc_class cmpF_P6_fixup() %{
2950 2868 // Fixup the integer flags in case comparison involved a NaN
2951 2869 //
2952 2870 // JNP exit (no unordered comparison, P-flag is set by NaN)
2953 2871 emit_opcode( cbuf, 0x7B );
2954 2872 emit_d8 ( cbuf, 0x03 );
2955 2873 // MOV AH,1 - treat as LT case (set carry flag)
2956 2874 emit_opcode( cbuf, 0xB4 );
2957 2875 emit_d8 ( cbuf, 0x01 );
2958 2876 // SAHF
2959 2877 emit_opcode( cbuf, 0x9E);
2960 2878 // NOP // target for branch to avoid branch to branch
2961 2879 emit_opcode( cbuf, 0x90);
2962 2880 %}
2963 2881
2964 2882 // fnstsw_ax();
2965 2883 // sahf();
2966 2884 // movl(dst, nan_result);
2967 2885 // jcc(Assembler::parity, exit);
2968 2886 // movl(dst, less_result);
2969 2887 // jcc(Assembler::below, exit);
2970 2888 // movl(dst, equal_result);
2971 2889 // jcc(Assembler::equal, exit);
2972 2890 // movl(dst, greater_result);
2973 2891
2974 2892 // less_result = 1;
2975 2893 // greater_result = -1;
2976 2894 // equal_result = 0;
2977 2895 // nan_result = -1;
2978 2896
2979 2897 enc_class CmpF_Result(eRegI dst) %{
2980 2898 // fnstsw_ax();
2981 2899 emit_opcode( cbuf, 0xDF);
2982 2900 emit_opcode( cbuf, 0xE0);
2983 2901 // sahf
2984 2902 emit_opcode( cbuf, 0x9E);
2985 2903 // movl(dst, nan_result);
2986 2904 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2987 2905 emit_d32( cbuf, -1 );
2988 2906 // jcc(Assembler::parity, exit);
2989 2907 emit_opcode( cbuf, 0x7A );
2990 2908 emit_d8 ( cbuf, 0x13 );
2991 2909 // movl(dst, less_result);
2992 2910 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2993 2911 emit_d32( cbuf, -1 );
2994 2912 // jcc(Assembler::below, exit);
2995 2913 emit_opcode( cbuf, 0x72 );
2996 2914 emit_d8 ( cbuf, 0x0C );
2997 2915 // movl(dst, equal_result);
2998 2916 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2999 2917 emit_d32( cbuf, 0 );
3000 2918 // jcc(Assembler::equal, exit);
3001 2919 emit_opcode( cbuf, 0x74 );
3002 2920 emit_d8 ( cbuf, 0x05 );
3003 2921 // movl(dst, greater_result);
3004 2922 emit_opcode( cbuf, 0xB8 + $dst$$reg);
3005 2923 emit_d32( cbuf, 1 );
3006 2924 %}
3007 2925
3008 2926
3009 2927 // XMM version of CmpF_Result. Because the XMM compare
3010 2928 // instructions set the EFLAGS directly. It becomes simpler than
3011 2929 // the float version above.
3012 2930 enc_class CmpX_Result(eRegI dst) %{
3013 2931 MacroAssembler _masm(&cbuf);
3014 2932 Label nan, inc, done;
3015 2933
3016 2934 __ jccb(Assembler::parity, nan);
3017 2935 __ jccb(Assembler::equal, done);
3018 2936 __ jccb(Assembler::above, inc);
3019 2937 __ bind(nan);
3020 2938 __ decrement(as_Register($dst$$reg)); // NO L qqq
3021 2939 __ jmpb(done);
3022 2940 __ bind(inc);
3023 2941 __ increment(as_Register($dst$$reg)); // NO L qqq
3024 2942 __ bind(done);
3025 2943 %}
3026 2944
3027 2945 // Compare the longs and set flags
3028 2946 // BROKEN! Do Not use as-is
3029 2947 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
3030 2948 // CMP $src1.hi,$src2.hi
3031 2949 emit_opcode( cbuf, 0x3B );
3032 2950 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3033 2951 // JNE,s done
3034 2952 emit_opcode(cbuf,0x75);
3035 2953 emit_d8(cbuf, 2 );
3036 2954 // CMP $src1.lo,$src2.lo
3037 2955 emit_opcode( cbuf, 0x3B );
3038 2956 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3039 2957 // done:
3040 2958 %}
3041 2959
3042 2960 enc_class convert_int_long( regL dst, eRegI src ) %{
3043 2961 // mov $dst.lo,$src
3044 2962 int dst_encoding = $dst$$reg;
3045 2963 int src_encoding = $src$$reg;
3046 2964 encode_Copy( cbuf, dst_encoding , src_encoding );
3047 2965 // mov $dst.hi,$src
3048 2966 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
3049 2967 // sar $dst.hi,31
3050 2968 emit_opcode( cbuf, 0xC1 );
3051 2969 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
3052 2970 emit_d8(cbuf, 0x1F );
3053 2971 %}
3054 2972
3055 2973 enc_class convert_long_double( eRegL src ) %{
3056 2974 // push $src.hi
3057 2975 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3058 2976 // push $src.lo
3059 2977 emit_opcode(cbuf, 0x50+$src$$reg );
3060 2978 // fild 64-bits at [SP]
3061 2979 emit_opcode(cbuf,0xdf);
3062 2980 emit_d8(cbuf, 0x6C);
3063 2981 emit_d8(cbuf, 0x24);
3064 2982 emit_d8(cbuf, 0x00);
3065 2983 // pop stack
3066 2984 emit_opcode(cbuf, 0x83); // add SP, #8
3067 2985 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3068 2986 emit_d8(cbuf, 0x8);
3069 2987 %}
3070 2988
3071 2989 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
3072 2990 // IMUL EDX:EAX,$src1
3073 2991 emit_opcode( cbuf, 0xF7 );
3074 2992 emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
3075 2993 // SAR EDX,$cnt-32
3076 2994 int shift_count = ((int)$cnt$$constant) - 32;
3077 2995 if (shift_count > 0) {
3078 2996 emit_opcode(cbuf, 0xC1);
3079 2997 emit_rm(cbuf, 0x3, 7, $dst$$reg );
3080 2998 emit_d8(cbuf, shift_count);
3081 2999 }
3082 3000 %}
3083 3001
3084 3002 // this version doesn't have add sp, 8
3085 3003 enc_class convert_long_double2( eRegL src ) %{
3086 3004 // push $src.hi
3087 3005 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3088 3006 // push $src.lo
3089 3007 emit_opcode(cbuf, 0x50+$src$$reg );
3090 3008 // fild 64-bits at [SP]
3091 3009 emit_opcode(cbuf,0xdf);
3092 3010 emit_d8(cbuf, 0x6C);
3093 3011 emit_d8(cbuf, 0x24);
3094 3012 emit_d8(cbuf, 0x00);
3095 3013 %}
3096 3014
3097 3015 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
3098 3016 // Basic idea: long = (long)int * (long)int
3099 3017 // IMUL EDX:EAX, src
3100 3018 emit_opcode( cbuf, 0xF7 );
3101 3019 emit_rm( cbuf, 0x3, 0x5, $src$$reg);
3102 3020 %}
3103 3021
3104 3022 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
3105 3023 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
3106 3024 // MUL EDX:EAX, src
3107 3025 emit_opcode( cbuf, 0xF7 );
3108 3026 emit_rm( cbuf, 0x3, 0x4, $src$$reg);
3109 3027 %}
3110 3028
3111 3029 enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
3112 3030 // Basic idea: lo(result) = lo(x_lo * y_lo)
3113 3031 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
3114 3032 // MOV $tmp,$src.lo
3115 3033 encode_Copy( cbuf, $tmp$$reg, $src$$reg );
3116 3034 // IMUL $tmp,EDX
3117 3035 emit_opcode( cbuf, 0x0F );
3118 3036 emit_opcode( cbuf, 0xAF );
3119 3037 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3120 3038 // MOV EDX,$src.hi
3121 3039 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3122 3040 // IMUL EDX,EAX
3123 3041 emit_opcode( cbuf, 0x0F );
3124 3042 emit_opcode( cbuf, 0xAF );
3125 3043 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3126 3044 // ADD $tmp,EDX
3127 3045 emit_opcode( cbuf, 0x03 );
3128 3046 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3129 3047 // MUL EDX:EAX,$src.lo
3130 3048 emit_opcode( cbuf, 0xF7 );
3131 3049 emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3132 3050 // ADD EDX,ESI
3133 3051 emit_opcode( cbuf, 0x03 );
3134 3052 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
3135 3053 %}
3136 3054
3137 3055 enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
3138 3056 // Basic idea: lo(result) = lo(src * y_lo)
3139 3057 // hi(result) = hi(src * y_lo) + lo(src * y_hi)
3140 3058 // IMUL $tmp,EDX,$src
3141 3059 emit_opcode( cbuf, 0x6B );
3142 3060 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3143 3061 emit_d8( cbuf, (int)$src$$constant );
3144 3062 // MOV EDX,$src
3145 3063 emit_opcode(cbuf, 0xB8 + EDX_enc);
3146 3064 emit_d32( cbuf, (int)$src$$constant );
3147 3065 // MUL EDX:EAX,EDX
3148 3066 emit_opcode( cbuf, 0xF7 );
3149 3067 emit_rm( cbuf, 0x3, 0x4, EDX_enc );
3150 3068 // ADD EDX,ESI
3151 3069 emit_opcode( cbuf, 0x03 );
3152 3070 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
3153 3071 %}
3154 3072
3155 3073 enc_class long_div( eRegL src1, eRegL src2 ) %{
3156 3074 // PUSH src1.hi
3157 3075 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3158 3076 // PUSH src1.lo
3159 3077 emit_opcode(cbuf, 0x50+$src1$$reg );
3160 3078 // PUSH src2.hi
3161 3079 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3162 3080 // PUSH src2.lo
3163 3081 emit_opcode(cbuf, 0x50+$src2$$reg );
3164 3082 // CALL directly to the runtime
3165 3083 cbuf.set_insts_mark();
3166 3084 emit_opcode(cbuf,0xE8); // Call into runtime
3167 3085 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3168 3086 // Restore stack
3169 3087 emit_opcode(cbuf, 0x83); // add SP, #framesize
3170 3088 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3171 3089 emit_d8(cbuf, 4*4);
3172 3090 %}
3173 3091
3174 3092 enc_class long_mod( eRegL src1, eRegL src2 ) %{
3175 3093 // PUSH src1.hi
3176 3094 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3177 3095 // PUSH src1.lo
3178 3096 emit_opcode(cbuf, 0x50+$src1$$reg );
3179 3097 // PUSH src2.hi
3180 3098 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3181 3099 // PUSH src2.lo
3182 3100 emit_opcode(cbuf, 0x50+$src2$$reg );
3183 3101 // CALL directly to the runtime
3184 3102 cbuf.set_insts_mark();
3185 3103 emit_opcode(cbuf,0xE8); // Call into runtime
3186 3104 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3187 3105 // Restore stack
3188 3106 emit_opcode(cbuf, 0x83); // add SP, #framesize
3189 3107 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3190 3108 emit_d8(cbuf, 4*4);
3191 3109 %}
3192 3110
3193 3111 enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
3194 3112 // MOV $tmp,$src.lo
3195 3113 emit_opcode(cbuf, 0x8B);
3196 3114 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
3197 3115 // OR $tmp,$src.hi
3198 3116 emit_opcode(cbuf, 0x0B);
3199 3117 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
3200 3118 %}
3201 3119
3202 3120 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
3203 3121 // CMP $src1.lo,$src2.lo
3204 3122 emit_opcode( cbuf, 0x3B );
3205 3123 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3206 3124 // JNE,s skip
3207 3125 emit_cc(cbuf, 0x70, 0x5);
3208 3126 emit_d8(cbuf,2);
3209 3127 // CMP $src1.hi,$src2.hi
3210 3128 emit_opcode( cbuf, 0x3B );
3211 3129 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3212 3130 %}
3213 3131
3214 3132 enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
3215 3133 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
3216 3134 emit_opcode( cbuf, 0x3B );
3217 3135 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3218 3136 // MOV $tmp,$src1.hi
3219 3137 emit_opcode( cbuf, 0x8B );
3220 3138 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
3221 3139 // SBB $tmp,$src2.hi\t! Compute flags for long compare
3222 3140 emit_opcode( cbuf, 0x1B );
3223 3141 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
3224 3142 %}
3225 3143
3226 3144 enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
3227 3145 // XOR $tmp,$tmp
3228 3146 emit_opcode(cbuf,0x33); // XOR
3229 3147 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3230 3148 // CMP $tmp,$src.lo
3231 3149 emit_opcode( cbuf, 0x3B );
3232 3150 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3233 3151 // SBB $tmp,$src.hi
3234 3152 emit_opcode( cbuf, 0x1B );
3235 3153 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3236 3154 %}
3237 3155
3238 3156 // Sniff, sniff... smells like Gnu Superoptimizer
3239 3157 enc_class neg_long( eRegL dst ) %{
3240 3158 emit_opcode(cbuf,0xF7); // NEG hi
3241 3159 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3242 3160 emit_opcode(cbuf,0xF7); // NEG lo
3243 3161 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
3244 3162 emit_opcode(cbuf,0x83); // SBB hi,0
3245 3163 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3246 3164 emit_d8 (cbuf,0 );
3247 3165 %}
3248 3166
3249 3167 enc_class movq_ld(regXD dst, memory mem) %{
3250 3168 MacroAssembler _masm(&cbuf);
3251 3169 __ movq($dst$$XMMRegister, $mem$$Address);
3252 3170 %}
3253 3171
3254 3172 enc_class movq_st(memory mem, regXD src) %{
3255 3173 MacroAssembler _masm(&cbuf);
3256 3174 __ movq($mem$$Address, $src$$XMMRegister);
3257 3175 %}
3258 3176
3259 3177 enc_class pshufd_8x8(regX dst, regX src) %{
3260 3178 MacroAssembler _masm(&cbuf);
3261 3179
3262 3180 encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3263 3181 __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3264 3182 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3265 3183 %}
3266 3184
3267 3185 enc_class pshufd_4x16(regX dst, regX src) %{
3268 3186 MacroAssembler _masm(&cbuf);
3269 3187
3270 3188 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3271 3189 %}
3272 3190
3273 3191 enc_class pshufd(regXD dst, regXD src, int mode) %{
3274 3192 MacroAssembler _masm(&cbuf);
3275 3193
3276 3194 __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3277 3195 %}
3278 3196
3279 3197 enc_class pxor(regXD dst, regXD src) %{
3280 3198 MacroAssembler _masm(&cbuf);
3281 3199
3282 3200 __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3283 3201 %}
3284 3202
3285 3203 enc_class mov_i2x(regXD dst, eRegI src) %{
3286 3204 MacroAssembler _masm(&cbuf);
3287 3205
3288 3206 __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3289 3207 %}
3290 3208
3291 3209
3292 3210 // Because the transitions from emitted code to the runtime
3293 3211 // monitorenter/exit helper stubs are so slow it's critical that
3294 3212 // we inline both the stack-locking fast-path and the inflated fast path.
3295 3213 //
3296 3214 // See also: cmpFastLock and cmpFastUnlock.
3297 3215 //
3298 3216 // What follows is a specialized inline transliteration of the code
3299 3217 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
3300 3218 // another option would be to emit TrySlowEnter and TrySlowExit methods
3301 3219 // at startup-time. These methods would accept arguments as
3302 3220 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3303 3221 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
3304 3222 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3305 3223 // In practice, however, the # of lock sites is bounded and is usually small.
3306 3224 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3307 3225 // if the processor uses simple bimodal branch predictors keyed by EIP
3308 3226 // Since the helper routines would be called from multiple synchronization
3309 3227 // sites.
3310 3228 //
3311 3229 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3312 3230 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3313 3231 // to those specialized methods. That'd give us a mostly platform-independent
3314 3232 // implementation that the JITs could optimize and inline at their pleasure.
3315 3233 // Done correctly, the only time we'd need to cross to native could would be
3316 3234 // to park() or unpark() threads. We'd also need a few more unsafe operators
3317 3235 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3318 3236 // (b) explicit barriers or fence operations.
3319 3237 //
3320 3238 // TODO:
3321 3239 //
3322 3240 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3323 3241 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3324 3242 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
3325 3243 // the lock operators would typically be faster than reifying Self.
3326 3244 //
3327 3245 // * Ideally I'd define the primitives as:
3328 3246 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3329 3247 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3330 3248 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
3331 3249 // Instead, we're stuck with a rather awkward and brittle register assignments below.
3332 3250 // Furthermore the register assignments are overconstrained, possibly resulting in
3333 3251 // sub-optimal code near the synchronization site.
3334 3252 //
3335 3253 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
3336 3254 // Alternately, use a better sp-proximity test.
3337 3255 //
3338 3256 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3339 3257 // Either one is sufficient to uniquely identify a thread.
3340 3258 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3341 3259 //
3342 3260 // * Intrinsify notify() and notifyAll() for the common cases where the
3343 3261 // object is locked by the calling thread but the waitlist is empty.
3344 3262 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3345 3263 //
3346 3264 // * use jccb and jmpb instead of jcc and jmp to improve code density.
3347 3265 // But beware of excessive branch density on AMD Opterons.
3348 3266 //
3349 3267 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3350 3268 // or failure of the fast-path. If the fast-path fails then we pass
3351 3269 // control to the slow-path, typically in C. In Fast_Lock and
3352 3270 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3353 3271 // will emit a conditional branch immediately after the node.
3354 3272 // So we have branches to branches and lots of ICC.ZF games.
3355 3273 // Instead, it might be better to have C2 pass a "FailureLabel"
3356 3274 // into Fast_Lock and Fast_Unlock. In the case of success, control
3357 3275 // will drop through the node. ICC.ZF is undefined at exit.
3358 3276 // In the case of failure, the node will branch directly to the
3359 3277 // FailureLabel
3360 3278
3361 3279
3362 3280 // obj: object to lock
3363 3281 // box: on-stack box address (displaced header location) - KILLED
3364 3282 // rax,: tmp -- KILLED
3365 3283 // scr: tmp -- KILLED
3366 3284 enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3367 3285
3368 3286 Register objReg = as_Register($obj$$reg);
3369 3287 Register boxReg = as_Register($box$$reg);
3370 3288 Register tmpReg = as_Register($tmp$$reg);
3371 3289 Register scrReg = as_Register($scr$$reg);
3372 3290
3373 3291 // Ensure the register assignents are disjoint
3374 3292 guarantee (objReg != boxReg, "") ;
3375 3293 guarantee (objReg != tmpReg, "") ;
3376 3294 guarantee (objReg != scrReg, "") ;
3377 3295 guarantee (boxReg != tmpReg, "") ;
3378 3296 guarantee (boxReg != scrReg, "") ;
3379 3297 guarantee (tmpReg == as_Register(EAX_enc), "") ;
3380 3298
3381 3299 MacroAssembler masm(&cbuf);
3382 3300
3383 3301 if (_counters != NULL) {
3384 3302 masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3385 3303 }
3386 3304 if (EmitSync & 1) {
3387 3305 // set box->dhw = unused_mark (3)
3388 3306 // Force all sync thru slow-path: slow_enter() and slow_exit()
3389 3307 masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;
3390 3308 masm.cmpptr (rsp, (int32_t)0) ;
3391 3309 } else
3392 3310 if (EmitSync & 2) {
3393 3311 Label DONE_LABEL ;
3394 3312 if (UseBiasedLocking) {
3395 3313 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3396 3314 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3397 3315 }
3398 3316
3399 3317 masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword
3400 3318 masm.orptr (tmpReg, 0x1);
3401 3319 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
3402 3320 if (os::is_MP()) { masm.lock(); }
3403 3321 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3404 3322 masm.jcc(Assembler::equal, DONE_LABEL);
3405 3323 // Recursive locking
3406 3324 masm.subptr(tmpReg, rsp);
3407 3325 masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3408 3326 masm.movptr(Address(boxReg, 0), tmpReg);
3409 3327 masm.bind(DONE_LABEL) ;
3410 3328 } else {
3411 3329 // Possible cases that we'll encounter in fast_lock
3412 3330 // ------------------------------------------------
3413 3331 // * Inflated
3414 3332 // -- unlocked
3415 3333 // -- Locked
3416 3334 // = by self
3417 3335 // = by other
3418 3336 // * biased
3419 3337 // -- by Self
3420 3338 // -- by other
3421 3339 // * neutral
3422 3340 // * stack-locked
3423 3341 // -- by self
3424 3342 // = sp-proximity test hits
3425 3343 // = sp-proximity test generates false-negative
3426 3344 // -- by other
3427 3345 //
3428 3346
3429 3347 Label IsInflated, DONE_LABEL, PopDone ;
3430 3348
3431 3349 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3432 3350 // order to reduce the number of conditional branches in the most common cases.
3433 3351 // Beware -- there's a subtle invariant that fetch of the markword
3434 3352 // at [FETCH], below, will never observe a biased encoding (*101b).
3435 3353 // If this invariant is not held we risk exclusion (safety) failure.
3436 3354 if (UseBiasedLocking && !UseOptoBiasInlining) {
3437 3355 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3438 3356 }
3439 3357
3440 3358 masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
3441 3359 masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral)
3442 3360 masm.jccb (Assembler::notZero, IsInflated) ;
3443 3361
3444 3362 // Attempt stack-locking ...
3445 3363 masm.orptr (tmpReg, 0x1);
3446 3364 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
3447 3365 if (os::is_MP()) { masm.lock(); }
3448 3366 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3449 3367 if (_counters != NULL) {
3450 3368 masm.cond_inc32(Assembler::equal,
3451 3369 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3452 3370 }
3453 3371 masm.jccb (Assembler::equal, DONE_LABEL);
3454 3372
3455 3373 // Recursive locking
3456 3374 masm.subptr(tmpReg, rsp);
3457 3375 masm.andptr(tmpReg, 0xFFFFF003 );
3458 3376 masm.movptr(Address(boxReg, 0), tmpReg);
3459 3377 if (_counters != NULL) {
3460 3378 masm.cond_inc32(Assembler::equal,
3461 3379 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3462 3380 }
3463 3381 masm.jmp (DONE_LABEL) ;
3464 3382
3465 3383 masm.bind (IsInflated) ;
3466 3384
3467 3385 // The object is inflated.
3468 3386 //
3469 3387 // TODO-FIXME: eliminate the ugly use of manifest constants:
3470 3388 // Use markOopDesc::monitor_value instead of "2".
3471 3389 // use markOop::unused_mark() instead of "3".
3472 3390 // The tmpReg value is an objectMonitor reference ORed with
3473 3391 // markOopDesc::monitor_value (2). We can either convert tmpReg to an
3474 3392 // objectmonitor pointer by masking off the "2" bit or we can just
3475 3393 // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3476 3394 // field offsets with "-2" to compensate for and annul the low-order tag bit.
3477 3395 //
3478 3396 // I use the latter as it avoids AGI stalls.
3479 3397 // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3480 3398 // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3481 3399 //
3482 3400 #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3483 3401
3484 3402 // boxReg refers to the on-stack BasicLock in the current frame.
3485 3403 // We'd like to write:
3486 3404 // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
3487 3405 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
3488 3406 // additional latency as we have another ST in the store buffer that must drain.
3489 3407
3490 3408 if (EmitSync & 8192) {
3491 3409 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
3492 3410 masm.get_thread (scrReg) ;
3493 3411 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
3494 3412 masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov
3495 3413 if (os::is_MP()) { masm.lock(); }
3496 3414 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3497 3415 } else
3498 3416 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
3499 3417 masm.movptr(scrReg, boxReg) ;
3500 3418 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
3501 3419
3502 3420 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3503 3421 if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3504 3422 // prefetchw [eax + Offset(_owner)-2]
3505 3423 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3506 3424 }
3507 3425
3508 3426 if ((EmitSync & 64) == 0) {
3509 3427 // Optimistic form: consider XORL tmpReg,tmpReg
3510 3428 masm.movptr(tmpReg, NULL_WORD) ;
3511 3429 } else {
3512 3430 // Can suffer RTS->RTO upgrades on shared or cold $ lines
3513 3431 // Test-And-CAS instead of CAS
3514 3432 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
3515 3433 masm.testptr(tmpReg, tmpReg) ; // Locked ?
3516 3434 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3517 3435 }
3518 3436
3519 3437 // Appears unlocked - try to swing _owner from null to non-null.
3520 3438 // Ideally, I'd manifest "Self" with get_thread and then attempt
3521 3439 // to CAS the register containing Self into m->Owner.
3522 3440 // But we don't have enough registers, so instead we can either try to CAS
3523 3441 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
3524 3442 // we later store "Self" into m->Owner. Transiently storing a stack address
3525 3443 // (rsp or the address of the box) into m->owner is harmless.
3526 3444 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
3527 3445 if (os::is_MP()) { masm.lock(); }
3528 3446 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3529 3447 masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3
3530 3448 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3531 3449 masm.get_thread (scrReg) ; // beware: clobbers ICCs
3532 3450 masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
3533 3451 masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success
3534 3452
3535 3453 // If the CAS fails we can either retry or pass control to the slow-path.
3536 3454 // We use the latter tactic.
3537 3455 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3538 3456 // If the CAS was successful ...
3539 3457 // Self has acquired the lock
3540 3458 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3541 3459 // Intentional fall-through into DONE_LABEL ...
3542 3460 } else {
3543 3461 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
3544 3462 masm.movptr(boxReg, tmpReg) ;
3545 3463
3546 3464 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3547 3465 if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3548 3466 // prefetchw [eax + Offset(_owner)-2]
3549 3467 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3550 3468 }
3551 3469
3552 3470 if ((EmitSync & 64) == 0) {
3553 3471 // Optimistic form
3554 3472 masm.xorptr (tmpReg, tmpReg) ;
3555 3473 } else {
3556 3474 // Can suffer RTS->RTO upgrades on shared or cold $ lines
3557 3475 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
3558 3476 masm.testptr(tmpReg, tmpReg) ; // Locked ?
3559 3477 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3560 3478 }
3561 3479
3562 3480 // Appears unlocked - try to swing _owner from null to non-null.
3563 3481 // Use either "Self" (in scr) or rsp as thread identity in _owner.
3564 3482 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
3565 3483 masm.get_thread (scrReg) ;
3566 3484 if (os::is_MP()) { masm.lock(); }
3567 3485 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3568 3486
3569 3487 // If the CAS fails we can either retry or pass control to the slow-path.
3570 3488 // We use the latter tactic.
3571 3489 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3572 3490 // If the CAS was successful ...
3573 3491 // Self has acquired the lock
3574 3492 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3575 3493 // Intentional fall-through into DONE_LABEL ...
3576 3494 }
3577 3495
3578 3496 // DONE_LABEL is a hot target - we'd really like to place it at the
3579 3497 // start of cache line by padding with NOPs.
3580 3498 // See the AMD and Intel software optimization manuals for the
3581 3499 // most efficient "long" NOP encodings.
3582 3500 // Unfortunately none of our alignment mechanisms suffice.
3583 3501 masm.bind(DONE_LABEL);
3584 3502
3585 3503 // Avoid branch-to-branch on AMD processors
3586 3504 // This appears to be superstition.
3587 3505 if (EmitSync & 32) masm.nop() ;
3588 3506
3589 3507
3590 3508 // At DONE_LABEL the icc ZFlag is set as follows ...
3591 3509 // Fast_Unlock uses the same protocol.
3592 3510 // ZFlag == 1 -> Success
3593 3511 // ZFlag == 0 -> Failure - force control through the slow-path
3594 3512 }
3595 3513 %}
3596 3514
3597 3515 // obj: object to unlock
3598 3516 // box: box address (displaced header location), killed. Must be EAX.
3599 3517 // rbx,: killed tmp; cannot be obj nor box.
3600 3518 //
3601 3519 // Some commentary on balanced locking:
3602 3520 //
3603 3521 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3604 3522 // Methods that don't have provably balanced locking are forced to run in the
3605 3523 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3606 3524 // The interpreter provides two properties:
3607 3525 // I1: At return-time the interpreter automatically and quietly unlocks any
3608 3526 // objects acquired the current activation (frame). Recall that the
3609 3527 // interpreter maintains an on-stack list of locks currently held by
3610 3528 // a frame.
3611 3529 // I2: If a method attempts to unlock an object that is not held by the
3612 3530 // the frame the interpreter throws IMSX.
3613 3531 //
3614 3532 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3615 3533 // B() doesn't have provably balanced locking so it runs in the interpreter.
3616 3534 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3617 3535 // is still locked by A().
3618 3536 //
3619 3537 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3620 3538 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3621 3539 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3622 3540 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3623 3541
3624 3542 enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3625 3543
3626 3544 Register objReg = as_Register($obj$$reg);
3627 3545 Register boxReg = as_Register($box$$reg);
3628 3546 Register tmpReg = as_Register($tmp$$reg);
3629 3547
3630 3548 guarantee (objReg != boxReg, "") ;
3631 3549 guarantee (objReg != tmpReg, "") ;
3632 3550 guarantee (boxReg != tmpReg, "") ;
3633 3551 guarantee (boxReg == as_Register(EAX_enc), "") ;
3634 3552 MacroAssembler masm(&cbuf);
3635 3553
3636 3554 if (EmitSync & 4) {
3637 3555 // Disable - inhibit all inlining. Force control through the slow-path
3638 3556 masm.cmpptr (rsp, 0) ;
3639 3557 } else
3640 3558 if (EmitSync & 8) {
3641 3559 Label DONE_LABEL ;
3642 3560 if (UseBiasedLocking) {
3643 3561 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3644 3562 }
3645 3563 // classic stack-locking code ...
3646 3564 masm.movptr(tmpReg, Address(boxReg, 0)) ;
3647 3565 masm.testptr(tmpReg, tmpReg) ;
3648 3566 masm.jcc (Assembler::zero, DONE_LABEL) ;
3649 3567 if (os::is_MP()) { masm.lock(); }
3650 3568 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3651 3569 masm.bind(DONE_LABEL);
3652 3570 } else {
3653 3571 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3654 3572
3655 3573 // Critically, the biased locking test must have precedence over
3656 3574 // and appear before the (box->dhw == 0) recursive stack-lock test.
3657 3575 if (UseBiasedLocking && !UseOptoBiasInlining) {
3658 3576 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3659 3577 }
3660 3578
3661 3579 masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header
3662 3580 masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3663 3581 masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock
3664 3582
3665 3583 masm.testptr(tmpReg, 0x02) ; // Inflated?
3666 3584 masm.jccb (Assembler::zero, Stacked) ;
3667 3585
3668 3586 masm.bind (Inflated) ;
3669 3587 // It's inflated.
3670 3588 // Despite our balanced locking property we still check that m->_owner == Self
3671 3589 // as java routines or native JNI code called by this thread might
3672 3590 // have released the lock.
3673 3591 // Refer to the comments in synchronizer.cpp for how we might encode extra
3674 3592 // state in _succ so we can avoid fetching EntryList|cxq.
3675 3593 //
3676 3594 // I'd like to add more cases in fast_lock() and fast_unlock() --
3677 3595 // such as recursive enter and exit -- but we have to be wary of
3678 3596 // I$ bloat, T$ effects and BP$ effects.
3679 3597 //
3680 3598 // If there's no contention try a 1-0 exit. That is, exit without
3681 3599 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3682 3600 // we detect and recover from the race that the 1-0 exit admits.
3683 3601 //
3684 3602 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3685 3603 // before it STs null into _owner, releasing the lock. Updates
3686 3604 // to data protected by the critical section must be visible before
3687 3605 // we drop the lock (and thus before any other thread could acquire
3688 3606 // the lock and observe the fields protected by the lock).
3689 3607 // IA32's memory-model is SPO, so STs are ordered with respect to
3690 3608 // each other and there's no need for an explicit barrier (fence).
3691 3609 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3692 3610
3693 3611 masm.get_thread (boxReg) ;
3694 3612 if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
3695 3613 // prefetchw [ebx + Offset(_owner)-2]
3696 3614 masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3697 3615 }
3698 3616
3699 3617 // Note that we could employ various encoding schemes to reduce
3700 3618 // the number of loads below (currently 4) to just 2 or 3.
3701 3619 // Refer to the comments in synchronizer.cpp.
3702 3620 // In practice the chain of fetches doesn't seem to impact performance, however.
3703 3621 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3704 3622 // Attempt to reduce branch density - AMD's branch predictor.
3705 3623 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3706 3624 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3707 3625 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3708 3626 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3709 3627 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3710 3628 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3711 3629 masm.jmpb (DONE_LABEL) ;
3712 3630 } else {
3713 3631 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3714 3632 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3715 3633 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3716 3634 masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3717 3635 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3718 3636 masm.jccb (Assembler::notZero, CheckSucc) ;
3719 3637 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3720 3638 masm.jmpb (DONE_LABEL) ;
3721 3639 }
3722 3640
3723 3641 // The Following code fragment (EmitSync & 65536) improves the performance of
3724 3642 // contended applications and contended synchronization microbenchmarks.
3725 3643 // Unfortunately the emission of the code - even though not executed - causes regressions
3726 3644 // in scimark and jetstream, evidently because of $ effects. Replacing the code
3727 3645 // with an equal number of never-executed NOPs results in the same regression.
3728 3646 // We leave it off by default.
3729 3647
3730 3648 if ((EmitSync & 65536) != 0) {
3731 3649 Label LSuccess, LGoSlowPath ;
3732 3650
3733 3651 masm.bind (CheckSucc) ;
3734 3652
3735 3653 // Optional pre-test ... it's safe to elide this
3736 3654 if ((EmitSync & 16) == 0) {
3737 3655 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3738 3656 masm.jccb (Assembler::zero, LGoSlowPath) ;
3739 3657 }
3740 3658
3741 3659 // We have a classic Dekker-style idiom:
3742 3660 // ST m->_owner = 0 ; MEMBAR; LD m->_succ
3743 3661 // There are a number of ways to implement the barrier:
3744 3662 // (1) lock:andl &m->_owner, 0
3745 3663 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3746 3664 // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3747 3665 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3748 3666 // (2) If supported, an explicit MFENCE is appealing.
3749 3667 // In older IA32 processors MFENCE is slower than lock:add or xchg
3750 3668 // particularly if the write-buffer is full as might be the case if
3751 3669 // if stores closely precede the fence or fence-equivalent instruction.
3752 3670 // In more modern implementations MFENCE appears faster, however.
3753 3671 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3754 3672 // The $lines underlying the top-of-stack should be in M-state.
3755 3673 // The locked add instruction is serializing, of course.
3756 3674 // (4) Use xchg, which is serializing
3757 3675 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3758 3676 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3759 3677 // The integer condition codes will tell us if succ was 0.
3760 3678 // Since _succ and _owner should reside in the same $line and
3761 3679 // we just stored into _owner, it's likely that the $line
3762 3680 // remains in M-state for the lock:orl.
3763 3681 //
3764 3682 // We currently use (3), although it's likely that switching to (2)
3765 3683 // is correct for the future.
3766 3684
3767 3685 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3768 3686 if (os::is_MP()) {
3769 3687 if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
3770 3688 masm.mfence();
3771 3689 } else {
3772 3690 masm.lock () ; masm.addptr(Address(rsp, 0), 0) ;
3773 3691 }
3774 3692 }
3775 3693 // Ratify _succ remains non-null
3776 3694 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3777 3695 masm.jccb (Assembler::notZero, LSuccess) ;
3778 3696
3779 3697 masm.xorptr(boxReg, boxReg) ; // box is really EAX
3780 3698 if (os::is_MP()) { masm.lock(); }
3781 3699 masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3782 3700 masm.jccb (Assembler::notEqual, LSuccess) ;
3783 3701 // Since we're low on registers we installed rsp as a placeholding in _owner.
3784 3702 // Now install Self over rsp. This is safe as we're transitioning from
3785 3703 // non-null to non=null
3786 3704 masm.get_thread (boxReg) ;
3787 3705 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3788 3706 // Intentional fall-through into LGoSlowPath ...
3789 3707
3790 3708 masm.bind (LGoSlowPath) ;
3791 3709 masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure
3792 3710 masm.jmpb (DONE_LABEL) ;
3793 3711
3794 3712 masm.bind (LSuccess) ;
3795 3713 masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success
3796 3714 masm.jmpb (DONE_LABEL) ;
3797 3715 }
3798 3716
3799 3717 masm.bind (Stacked) ;
3800 3718 // It's not inflated and it's not recursively stack-locked and it's not biased.
3801 3719 // It must be stack-locked.
3802 3720 // Try to reset the header to displaced header.
3803 3721 // The "box" value on the stack is stable, so we can reload
3804 3722 // and be assured we observe the same value as above.
3805 3723 masm.movptr(tmpReg, Address(boxReg, 0)) ;
3806 3724 if (os::is_MP()) { masm.lock(); }
3807 3725 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3808 3726 // Intention fall-thru into DONE_LABEL
3809 3727
3810 3728
3811 3729 // DONE_LABEL is a hot target - we'd really like to place it at the
3812 3730 // start of cache line by padding with NOPs.
3813 3731 // See the AMD and Intel software optimization manuals for the
3814 3732 // most efficient "long" NOP encodings.
3815 3733 // Unfortunately none of our alignment mechanisms suffice.
3816 3734 if ((EmitSync & 65536) == 0) {
3817 3735 masm.bind (CheckSucc) ;
3818 3736 }
3819 3737 masm.bind(DONE_LABEL);
3820 3738
3821 3739 // Avoid branch to branch on AMD processors
3822 3740 if (EmitSync & 32768) { masm.nop() ; }
3823 3741 }
3824 3742 %}
3825 3743
3826 3744
3827 3745 enc_class enc_pop_rdx() %{
3828 3746 emit_opcode(cbuf,0x5A);
3829 3747 %}
3830 3748
3831 3749 enc_class enc_rethrow() %{
3832 3750 cbuf.set_insts_mark();
3833 3751 emit_opcode(cbuf, 0xE9); // jmp entry
3834 3752 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3835 3753 runtime_call_Relocation::spec(), RELOC_IMM32 );
3836 3754 %}
3837 3755
3838 3756
3839 3757 // Convert a double to an int. Java semantics require we do complex
3840 3758 // manglelations in the corner cases. So we set the rounding mode to
3841 3759 // 'zero', store the darned double down as an int, and reset the
3842 3760 // rounding mode to 'nearest'. The hardware throws an exception which
3843 3761 // patches up the correct value directly to the stack.
3844 3762 enc_class D2I_encoding( regD src ) %{
3845 3763 // Flip to round-to-zero mode. We attempted to allow invalid-op
3846 3764 // exceptions here, so that a NAN or other corner-case value will
3847 3765 // thrown an exception (but normal values get converted at full speed).
3848 3766 // However, I2C adapters and other float-stack manglers leave pending
3849 3767 // invalid-op exceptions hanging. We would have to clear them before
3850 3768 // enabling them and that is more expensive than just testing for the
3851 3769 // invalid value Intel stores down in the corner cases.
3852 3770 emit_opcode(cbuf,0xD9); // FLDCW trunc
3853 3771 emit_opcode(cbuf,0x2D);
3854 3772 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3855 3773 // Allocate a word
3856 3774 emit_opcode(cbuf,0x83); // SUB ESP,4
3857 3775 emit_opcode(cbuf,0xEC);
3858 3776 emit_d8(cbuf,0x04);
3859 3777 // Encoding assumes a double has been pushed into FPR0.
3860 3778 // Store down the double as an int, popping the FPU stack
3861 3779 emit_opcode(cbuf,0xDB); // FISTP [ESP]
3862 3780 emit_opcode(cbuf,0x1C);
3863 3781 emit_d8(cbuf,0x24);
3864 3782 // Restore the rounding mode; mask the exception
3865 3783 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3866 3784 emit_opcode(cbuf,0x2D);
3867 3785 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3868 3786 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3869 3787 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3870 3788
3871 3789 // Load the converted int; adjust CPU stack
3872 3790 emit_opcode(cbuf,0x58); // POP EAX
3873 3791 emit_opcode(cbuf,0x3D); // CMP EAX,imm
3874 3792 emit_d32 (cbuf,0x80000000); // 0x80000000
3875 3793 emit_opcode(cbuf,0x75); // JNE around_slow_call
3876 3794 emit_d8 (cbuf,0x07); // Size of slow_call
3877 3795 // Push src onto stack slow-path
3878 3796 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3879 3797 emit_d8 (cbuf,0xC0-1+$src$$reg );
3880 3798 // CALL directly to the runtime
3881 3799 cbuf.set_insts_mark();
3882 3800 emit_opcode(cbuf,0xE8); // Call into runtime
3883 3801 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3884 3802 // Carry on here...
3885 3803 %}
3886 3804
3887 3805 enc_class D2L_encoding( regD src ) %{
3888 3806 emit_opcode(cbuf,0xD9); // FLDCW trunc
3889 3807 emit_opcode(cbuf,0x2D);
3890 3808 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3891 3809 // Allocate a word
3892 3810 emit_opcode(cbuf,0x83); // SUB ESP,8
3893 3811 emit_opcode(cbuf,0xEC);
3894 3812 emit_d8(cbuf,0x08);
3895 3813 // Encoding assumes a double has been pushed into FPR0.
3896 3814 // Store down the double as a long, popping the FPU stack
3897 3815 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3898 3816 emit_opcode(cbuf,0x3C);
3899 3817 emit_d8(cbuf,0x24);
3900 3818 // Restore the rounding mode; mask the exception
3901 3819 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3902 3820 emit_opcode(cbuf,0x2D);
3903 3821 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3904 3822 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3905 3823 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3906 3824
3907 3825 // Load the converted int; adjust CPU stack
3908 3826 emit_opcode(cbuf,0x58); // POP EAX
3909 3827 emit_opcode(cbuf,0x5A); // POP EDX
3910 3828 emit_opcode(cbuf,0x81); // CMP EDX,imm
3911 3829 emit_d8 (cbuf,0xFA); // rdx
3912 3830 emit_d32 (cbuf,0x80000000); // 0x80000000
3913 3831 emit_opcode(cbuf,0x75); // JNE around_slow_call
3914 3832 emit_d8 (cbuf,0x07+4); // Size of slow_call
3915 3833 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3916 3834 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3917 3835 emit_opcode(cbuf,0x75); // JNE around_slow_call
3918 3836 emit_d8 (cbuf,0x07); // Size of slow_call
3919 3837 // Push src onto stack slow-path
3920 3838 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3921 3839 emit_d8 (cbuf,0xC0-1+$src$$reg );
3922 3840 // CALL directly to the runtime
3923 3841 cbuf.set_insts_mark();
3924 3842 emit_opcode(cbuf,0xE8); // Call into runtime
3925 3843 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3926 3844 // Carry on here...
3927 3845 %}
3928 3846
3929 3847 enc_class X2L_encoding( regX src ) %{
3930 3848 // Allocate a word
3931 3849 emit_opcode(cbuf,0x83); // SUB ESP,8
3932 3850 emit_opcode(cbuf,0xEC);
3933 3851 emit_d8(cbuf,0x08);
3934 3852
3935 3853 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
3936 3854 emit_opcode (cbuf, 0x0F );
3937 3855 emit_opcode (cbuf, 0x11 );
3938 3856 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3939 3857
3940 3858 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
3941 3859 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3942 3860
3943 3861 emit_opcode(cbuf,0xD9); // FLDCW trunc
3944 3862 emit_opcode(cbuf,0x2D);
3945 3863 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3946 3864
3947 3865 // Encoding assumes a double has been pushed into FPR0.
3948 3866 // Store down the double as a long, popping the FPU stack
3949 3867 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3950 3868 emit_opcode(cbuf,0x3C);
3951 3869 emit_d8(cbuf,0x24);
3952 3870
3953 3871 // Restore the rounding mode; mask the exception
3954 3872 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3955 3873 emit_opcode(cbuf,0x2D);
3956 3874 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3957 3875 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3958 3876 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3959 3877
3960 3878 // Load the converted int; adjust CPU stack
3961 3879 emit_opcode(cbuf,0x58); // POP EAX
3962 3880
3963 3881 emit_opcode(cbuf,0x5A); // POP EDX
3964 3882
3965 3883 emit_opcode(cbuf,0x81); // CMP EDX,imm
3966 3884 emit_d8 (cbuf,0xFA); // rdx
3967 3885 emit_d32 (cbuf,0x80000000);// 0x80000000
3968 3886
3969 3887 emit_opcode(cbuf,0x75); // JNE around_slow_call
3970 3888 emit_d8 (cbuf,0x13+4); // Size of slow_call
3971 3889
3972 3890 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3973 3891 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3974 3892
3975 3893 emit_opcode(cbuf,0x75); // JNE around_slow_call
3976 3894 emit_d8 (cbuf,0x13); // Size of slow_call
3977 3895
3978 3896 // Allocate a word
3979 3897 emit_opcode(cbuf,0x83); // SUB ESP,4
3980 3898 emit_opcode(cbuf,0xEC);
3981 3899 emit_d8(cbuf,0x04);
3982 3900
3983 3901 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
3984 3902 emit_opcode (cbuf, 0x0F );
3985 3903 emit_opcode (cbuf, 0x11 );
3986 3904 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3987 3905
3988 3906 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
3989 3907 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3990 3908
3991 3909 emit_opcode(cbuf,0x83); // ADD ESP,4
3992 3910 emit_opcode(cbuf,0xC4);
3993 3911 emit_d8(cbuf,0x04);
3994 3912
3995 3913 // CALL directly to the runtime
3996 3914 cbuf.set_insts_mark();
3997 3915 emit_opcode(cbuf,0xE8); // Call into runtime
3998 3916 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3999 3917 // Carry on here...
4000 3918 %}
4001 3919
4002 3920 enc_class XD2L_encoding( regXD src ) %{
4003 3921 // Allocate a word
4004 3922 emit_opcode(cbuf,0x83); // SUB ESP,8
4005 3923 emit_opcode(cbuf,0xEC);
4006 3924 emit_d8(cbuf,0x08);
4007 3925
4008 3926 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
4009 3927 emit_opcode (cbuf, 0x0F );
4010 3928 emit_opcode (cbuf, 0x11 );
4011 3929 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4012 3930
4013 3931 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
4014 3932 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4015 3933
4016 3934 emit_opcode(cbuf,0xD9); // FLDCW trunc
4017 3935 emit_opcode(cbuf,0x2D);
4018 3936 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
4019 3937
4020 3938 // Encoding assumes a double has been pushed into FPR0.
4021 3939 // Store down the double as a long, popping the FPU stack
4022 3940 emit_opcode(cbuf,0xDF); // FISTP [ESP]
4023 3941 emit_opcode(cbuf,0x3C);
4024 3942 emit_d8(cbuf,0x24);
4025 3943
4026 3944 // Restore the rounding mode; mask the exception
4027 3945 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
4028 3946 emit_opcode(cbuf,0x2D);
4029 3947 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
4030 3948 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
4031 3949 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
4032 3950
4033 3951 // Load the converted int; adjust CPU stack
4034 3952 emit_opcode(cbuf,0x58); // POP EAX
4035 3953
4036 3954 emit_opcode(cbuf,0x5A); // POP EDX
4037 3955
4038 3956 emit_opcode(cbuf,0x81); // CMP EDX,imm
4039 3957 emit_d8 (cbuf,0xFA); // rdx
4040 3958 emit_d32 (cbuf,0x80000000); // 0x80000000
4041 3959
4042 3960 emit_opcode(cbuf,0x75); // JNE around_slow_call
4043 3961 emit_d8 (cbuf,0x13+4); // Size of slow_call
4044 3962
4045 3963 emit_opcode(cbuf,0x85); // TEST EAX,EAX
4046 3964 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
4047 3965
4048 3966 emit_opcode(cbuf,0x75); // JNE around_slow_call
4049 3967 emit_d8 (cbuf,0x13); // Size of slow_call
4050 3968
4051 3969 // Push src onto stack slow-path
4052 3970 // Allocate a word
4053 3971 emit_opcode(cbuf,0x83); // SUB ESP,8
4054 3972 emit_opcode(cbuf,0xEC);
4055 3973 emit_d8(cbuf,0x08);
4056 3974
4057 3975 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
4058 3976 emit_opcode (cbuf, 0x0F );
4059 3977 emit_opcode (cbuf, 0x11 );
4060 3978 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4061 3979
4062 3980 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
4063 3981 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4064 3982
4065 3983 emit_opcode(cbuf,0x83); // ADD ESP,8
4066 3984 emit_opcode(cbuf,0xC4);
4067 3985 emit_d8(cbuf,0x08);
4068 3986
4069 3987 // CALL directly to the runtime
4070 3988 cbuf.set_insts_mark();
4071 3989 emit_opcode(cbuf,0xE8); // Call into runtime
4072 3990 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4073 3991 // Carry on here...
4074 3992 %}
4075 3993
4076 3994 enc_class D2X_encoding( regX dst, regD src ) %{
4077 3995 // Allocate a word
4078 3996 emit_opcode(cbuf,0x83); // SUB ESP,4
4079 3997 emit_opcode(cbuf,0xEC);
4080 3998 emit_d8(cbuf,0x04);
4081 3999 int pop = 0x02;
4082 4000 if ($src$$reg != FPR1L_enc) {
4083 4001 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
4084 4002 emit_d8( cbuf, 0xC0-1+$src$$reg );
4085 4003 pop = 0x03;
4086 4004 }
4087 4005 store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP]
4088 4006
4089 4007 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
4090 4008 emit_opcode (cbuf, 0x0F );
4091 4009 emit_opcode (cbuf, 0x10 );
4092 4010 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4093 4011
4094 4012 emit_opcode(cbuf,0x83); // ADD ESP,4
4095 4013 emit_opcode(cbuf,0xC4);
4096 4014 emit_d8(cbuf,0x04);
4097 4015 // Carry on here...
4098 4016 %}
4099 4017
4100 4018 enc_class FX2I_encoding( regX src, eRegI dst ) %{
4101 4019 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4102 4020
4103 4021 // Compare the result to see if we need to go to the slow path
4104 4022 emit_opcode(cbuf,0x81); // CMP dst,imm
4105 4023 emit_rm (cbuf,0x3,0x7,$dst$$reg);
4106 4024 emit_d32 (cbuf,0x80000000); // 0x80000000
4107 4025
4108 4026 emit_opcode(cbuf,0x75); // JNE around_slow_call
4109 4027 emit_d8 (cbuf,0x13); // Size of slow_call
4110 4028 // Store xmm to a temp memory
4111 4029 // location and push it onto stack.
4112 4030
4113 4031 emit_opcode(cbuf,0x83); // SUB ESP,4
4114 4032 emit_opcode(cbuf,0xEC);
4115 4033 emit_d8(cbuf, $primary ? 0x8 : 0x4);
4116 4034
4117 4035 emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm
4118 4036 emit_opcode (cbuf, 0x0F );
4119 4037 emit_opcode (cbuf, 0x11 );
4120 4038 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4121 4039
4122 4040 emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP]
4123 4041 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4124 4042
4125 4043 emit_opcode(cbuf,0x83); // ADD ESP,4
4126 4044 emit_opcode(cbuf,0xC4);
4127 4045 emit_d8(cbuf, $primary ? 0x8 : 0x4);
4128 4046
4129 4047 // CALL directly to the runtime
4130 4048 cbuf.set_insts_mark();
4131 4049 emit_opcode(cbuf,0xE8); // Call into runtime
4132 4050 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4133 4051
4134 4052 // Carry on here...
4135 4053 %}
4136 4054
4137 4055 enc_class X2D_encoding( regD dst, regX src ) %{
4138 4056 // Allocate a word
4139 4057 emit_opcode(cbuf,0x83); // SUB ESP,4
4140 4058 emit_opcode(cbuf,0xEC);
4141 4059 emit_d8(cbuf,0x04);
4142 4060
4143 4061 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4144 4062 emit_opcode (cbuf, 0x0F );
4145 4063 emit_opcode (cbuf, 0x11 );
4146 4064 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4147 4065
4148 4066 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
4149 4067 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4150 4068
4151 4069 emit_opcode(cbuf,0x83); // ADD ESP,4
4152 4070 emit_opcode(cbuf,0xC4);
4153 4071 emit_d8(cbuf,0x04);
4154 4072
4155 4073 // Carry on here...
4156 4074 %}
4157 4075
4158 4076 enc_class AbsXF_encoding(regX dst) %{
4159 4077 address signmask_address=(address)float_signmask_pool;
4160 4078 // andpd:\tANDPS $dst,[signconst]
4161 4079 emit_opcode(cbuf, 0x0F);
4162 4080 emit_opcode(cbuf, 0x54);
4163 4081 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4164 4082 emit_d32(cbuf, (int)signmask_address);
4165 4083 %}
4166 4084
4167 4085 enc_class AbsXD_encoding(regXD dst) %{
4168 4086 address signmask_address=(address)double_signmask_pool;
4169 4087 // andpd:\tANDPD $dst,[signconst]
4170 4088 emit_opcode(cbuf, 0x66);
4171 4089 emit_opcode(cbuf, 0x0F);
4172 4090 emit_opcode(cbuf, 0x54);
4173 4091 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4174 4092 emit_d32(cbuf, (int)signmask_address);
4175 4093 %}
4176 4094
4177 4095 enc_class NegXF_encoding(regX dst) %{
4178 4096 address signmask_address=(address)float_signflip_pool;
4179 4097 // andpd:\tXORPS $dst,[signconst]
4180 4098 emit_opcode(cbuf, 0x0F);
4181 4099 emit_opcode(cbuf, 0x57);
4182 4100 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4183 4101 emit_d32(cbuf, (int)signmask_address);
4184 4102 %}
4185 4103
4186 4104 enc_class NegXD_encoding(regXD dst) %{
4187 4105 address signmask_address=(address)double_signflip_pool;
4188 4106 // andpd:\tXORPD $dst,[signconst]
4189 4107 emit_opcode(cbuf, 0x66);
4190 4108 emit_opcode(cbuf, 0x0F);
4191 4109 emit_opcode(cbuf, 0x57);
4192 4110 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4193 4111 emit_d32(cbuf, (int)signmask_address);
4194 4112 %}
4195 4113
4196 4114 enc_class FMul_ST_reg( eRegF src1 ) %{
4197 4115 // Operand was loaded from memory into fp ST (stack top)
4198 4116 // FMUL ST,$src /* D8 C8+i */
4199 4117 emit_opcode(cbuf, 0xD8);
4200 4118 emit_opcode(cbuf, 0xC8 + $src1$$reg);
4201 4119 %}
4202 4120
4203 4121 enc_class FAdd_ST_reg( eRegF src2 ) %{
4204 4122 // FADDP ST,src2 /* D8 C0+i */
4205 4123 emit_opcode(cbuf, 0xD8);
4206 4124 emit_opcode(cbuf, 0xC0 + $src2$$reg);
4207 4125 //could use FADDP src2,fpST /* DE C0+i */
4208 4126 %}
4209 4127
4210 4128 enc_class FAddP_reg_ST( eRegF src2 ) %{
4211 4129 // FADDP src2,ST /* DE C0+i */
4212 4130 emit_opcode(cbuf, 0xDE);
4213 4131 emit_opcode(cbuf, 0xC0 + $src2$$reg);
4214 4132 %}
4215 4133
4216 4134 enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
4217 4135 // Operand has been loaded into fp ST (stack top)
4218 4136 // FSUB ST,$src1
4219 4137 emit_opcode(cbuf, 0xD8);
4220 4138 emit_opcode(cbuf, 0xE0 + $src1$$reg);
4221 4139
4222 4140 // FDIV
4223 4141 emit_opcode(cbuf, 0xD8);
4224 4142 emit_opcode(cbuf, 0xF0 + $src2$$reg);
4225 4143 %}
4226 4144
4227 4145 enc_class MulFAddF (eRegF src1, eRegF src2) %{
4228 4146 // Operand was loaded from memory into fp ST (stack top)
4229 4147 // FADD ST,$src /* D8 C0+i */
4230 4148 emit_opcode(cbuf, 0xD8);
4231 4149 emit_opcode(cbuf, 0xC0 + $src1$$reg);
4232 4150
4233 4151 // FMUL ST,src2 /* D8 C*+i */
4234 4152 emit_opcode(cbuf, 0xD8);
4235 4153 emit_opcode(cbuf, 0xC8 + $src2$$reg);
4236 4154 %}
4237 4155
4238 4156
4239 4157 enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
4240 4158 // Operand was loaded from memory into fp ST (stack top)
4241 4159 // FADD ST,$src /* D8 C0+i */
4242 4160 emit_opcode(cbuf, 0xD8);
4243 4161 emit_opcode(cbuf, 0xC0 + $src1$$reg);
4244 4162
4245 4163 // FMULP src2,ST /* DE C8+i */
4246 4164 emit_opcode(cbuf, 0xDE);
4247 4165 emit_opcode(cbuf, 0xC8 + $src2$$reg);
4248 4166 %}
4249 4167
4250 4168 // Atomically load the volatile long
4251 4169 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4252 4170 emit_opcode(cbuf,0xDF);
4253 4171 int rm_byte_opcode = 0x05;
4254 4172 int base = $mem$$base;
4255 4173 int index = $mem$$index;
4256 4174 int scale = $mem$$scale;
4257 4175 int displace = $mem$$disp;
4258 4176 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4259 4177 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4260 4178 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4261 4179 %}
4262 4180
4263 4181 enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4264 4182 { // Atomic long load
4265 4183 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4266 4184 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4267 4185 emit_opcode(cbuf,0x0F);
4268 4186 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4269 4187 int base = $mem$$base;
4270 4188 int index = $mem$$index;
4271 4189 int scale = $mem$$scale;
4272 4190 int displace = $mem$$disp;
4273 4191 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4274 4192 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4275 4193 }
4276 4194 { // MOVSD $dst,$tmp ! atomic long store
4277 4195 emit_opcode(cbuf,0xF2);
4278 4196 emit_opcode(cbuf,0x0F);
4279 4197 emit_opcode(cbuf,0x11);
4280 4198 int base = $dst$$base;
4281 4199 int index = $dst$$index;
4282 4200 int scale = $dst$$scale;
4283 4201 int displace = $dst$$disp;
4284 4202 bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4285 4203 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4286 4204 }
4287 4205 %}
4288 4206
4289 4207 enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4290 4208 { // Atomic long load
4291 4209 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4292 4210 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4293 4211 emit_opcode(cbuf,0x0F);
4294 4212 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4295 4213 int base = $mem$$base;
4296 4214 int index = $mem$$index;
4297 4215 int scale = $mem$$scale;
4298 4216 int displace = $mem$$disp;
4299 4217 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4300 4218 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4301 4219 }
4302 4220 { // MOVD $dst.lo,$tmp
4303 4221 emit_opcode(cbuf,0x66);
4304 4222 emit_opcode(cbuf,0x0F);
4305 4223 emit_opcode(cbuf,0x7E);
4306 4224 emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4307 4225 }
4308 4226 { // PSRLQ $tmp,32
4309 4227 emit_opcode(cbuf,0x66);
4310 4228 emit_opcode(cbuf,0x0F);
4311 4229 emit_opcode(cbuf,0x73);
4312 4230 emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4313 4231 emit_d8(cbuf, 0x20);
4314 4232 }
4315 4233 { // MOVD $dst.hi,$tmp
4316 4234 emit_opcode(cbuf,0x66);
4317 4235 emit_opcode(cbuf,0x0F);
4318 4236 emit_opcode(cbuf,0x7E);
4319 4237 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4320 4238 }
4321 4239 %}
4322 4240
4323 4241 // Volatile Store Long. Must be atomic, so move it into
4324 4242 // the FP TOS and then do a 64-bit FIST. Has to probe the
4325 4243 // target address before the store (for null-ptr checks)
4326 4244 // so the memory operand is used twice in the encoding.
4327 4245 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4328 4246 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4329 4247 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop
4330 4248 emit_opcode(cbuf,0xDF);
4331 4249 int rm_byte_opcode = 0x07;
4332 4250 int base = $mem$$base;
4333 4251 int index = $mem$$index;
4334 4252 int scale = $mem$$scale;
4335 4253 int displace = $mem$$disp;
4336 4254 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4337 4255 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4338 4256 %}
4339 4257
4340 4258 enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4341 4259 { // Atomic long load
4342 4260 // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4343 4261 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4344 4262 emit_opcode(cbuf,0x0F);
4345 4263 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4346 4264 int base = $src$$base;
4347 4265 int index = $src$$index;
4348 4266 int scale = $src$$scale;
4349 4267 int displace = $src$$disp;
4350 4268 bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4351 4269 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4352 4270 }
4353 4271 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
4354 4272 { // MOVSD $mem,$tmp ! atomic long store
4355 4273 emit_opcode(cbuf,0xF2);
4356 4274 emit_opcode(cbuf,0x0F);
4357 4275 emit_opcode(cbuf,0x11);
4358 4276 int base = $mem$$base;
4359 4277 int index = $mem$$index;
4360 4278 int scale = $mem$$scale;
4361 4279 int displace = $mem$$disp;
4362 4280 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4363 4281 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4364 4282 }
4365 4283 %}
4366 4284
4367 4285 enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4368 4286 { // MOVD $tmp,$src.lo
4369 4287 emit_opcode(cbuf,0x66);
4370 4288 emit_opcode(cbuf,0x0F);
4371 4289 emit_opcode(cbuf,0x6E);
4372 4290 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4373 4291 }
4374 4292 { // MOVD $tmp2,$src.hi
4375 4293 emit_opcode(cbuf,0x66);
4376 4294 emit_opcode(cbuf,0x0F);
4377 4295 emit_opcode(cbuf,0x6E);
4378 4296 emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4379 4297 }
4380 4298 { // PUNPCKLDQ $tmp,$tmp2
4381 4299 emit_opcode(cbuf,0x66);
4382 4300 emit_opcode(cbuf,0x0F);
4383 4301 emit_opcode(cbuf,0x62);
4384 4302 emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4385 4303 }
4386 4304 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
4387 4305 { // MOVSD $mem,$tmp ! atomic long store
4388 4306 emit_opcode(cbuf,0xF2);
4389 4307 emit_opcode(cbuf,0x0F);
4390 4308 emit_opcode(cbuf,0x11);
4391 4309 int base = $mem$$base;
4392 4310 int index = $mem$$index;
4393 4311 int scale = $mem$$scale;
4394 4312 int displace = $mem$$disp;
4395 4313 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4396 4314 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4397 4315 }
4398 4316 %}
4399 4317
4400 4318 // Safepoint Poll. This polls the safepoint page, and causes an
4401 4319 // exception if it is not readable. Unfortunately, it kills the condition code
4402 4320 // in the process
4403 4321 // We current use TESTL [spp],EDI
4404 4322 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4405 4323
4406 4324 enc_class Safepoint_Poll() %{
4407 4325 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
4408 4326 emit_opcode(cbuf,0x85);
4409 4327 emit_rm (cbuf, 0x0, 0x7, 0x5);
4410 4328 emit_d32(cbuf, (intptr_t)os::get_polling_page());
4411 4329 %}
4412 4330 %}
4413 4331
4414 4332
4415 4333 //----------FRAME--------------------------------------------------------------
4416 4334 // Definition of frame structure and management information.
4417 4335 //
4418 4336 // S T A C K L A Y O U T Allocators stack-slot number
4419 4337 // | (to get allocators register number
4420 4338 // G Owned by | | v add OptoReg::stack0())
4421 4339 // r CALLER | |
4422 4340 // o | +--------+ pad to even-align allocators stack-slot
4423 4341 // w V | pad0 | numbers; owned by CALLER
4424 4342 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4425 4343 // h ^ | in | 5
4426 4344 // | | args | 4 Holes in incoming args owned by SELF
4427 4345 // | | | | 3
4428 4346 // | | +--------+
4429 4347 // V | | old out| Empty on Intel, window on Sparc
4430 4348 // | old |preserve| Must be even aligned.
4431 4349 // | SP-+--------+----> Matcher::_old_SP, even aligned
4432 4350 // | | in | 3 area for Intel ret address
4433 4351 // Owned by |preserve| Empty on Sparc.
4434 4352 // SELF +--------+
4435 4353 // | | pad2 | 2 pad to align old SP
4436 4354 // | +--------+ 1
4437 4355 // | | locks | 0
4438 4356 // | +--------+----> OptoReg::stack0(), even aligned
4439 4357 // | | pad1 | 11 pad to align new SP
4440 4358 // | +--------+
4441 4359 // | | | 10
4442 4360 // | | spills | 9 spills
4443 4361 // V | | 8 (pad0 slot for callee)
4444 4362 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4445 4363 // ^ | out | 7
4446 4364 // | | args | 6 Holes in outgoing args owned by CALLEE
4447 4365 // Owned by +--------+
4448 4366 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4449 4367 // | new |preserve| Must be even-aligned.
4450 4368 // | SP-+--------+----> Matcher::_new_SP, even aligned
4451 4369 // | | |
4452 4370 //
4453 4371 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4454 4372 // known from SELF's arguments and the Java calling convention.
4455 4373 // Region 6-7 is determined per call site.
4456 4374 // Note 2: If the calling convention leaves holes in the incoming argument
4457 4375 // area, those holes are owned by SELF. Holes in the outgoing area
4458 4376 // are owned by the CALLEE. Holes should not be nessecary in the
4459 4377 // incoming area, as the Java calling convention is completely under
4460 4378 // the control of the AD file. Doubles can be sorted and packed to
4461 4379 // avoid holes. Holes in the outgoing arguments may be nessecary for
4462 4380 // varargs C calling conventions.
4463 4381 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4464 4382 // even aligned with pad0 as needed.
4465 4383 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4466 4384 // region 6-11 is even aligned; it may be padded out more so that
4467 4385 // the region from SP to FP meets the minimum stack alignment.
4468 4386
4469 4387 frame %{
4470 4388 // What direction does stack grow in (assumed to be same for C & Java)
4471 4389 stack_direction(TOWARDS_LOW);
4472 4390
4473 4391 // These three registers define part of the calling convention
4474 4392 // between compiled code and the interpreter.
4475 4393 inline_cache_reg(EAX); // Inline Cache Register
4476 4394 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter
4477 4395
4478 4396 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
4479 4397 cisc_spilling_operand_name(indOffset32);
4480 4398
4481 4399 // Number of stack slots consumed by locking an object
4482 4400 sync_stack_slots(1);
4483 4401
4484 4402 // Compiled code's Frame Pointer
4485 4403 frame_pointer(ESP);
4486 4404 // Interpreter stores its frame pointer in a register which is
4487 4405 // stored to the stack by I2CAdaptors.
4488 4406 // I2CAdaptors convert from interpreted java to compiled java.
4489 4407 interpreter_frame_pointer(EBP);
4490 4408
4491 4409 // Stack alignment requirement
4492 4410 // Alignment size in bytes (128-bit -> 16 bytes)
4493 4411 stack_alignment(StackAlignmentInBytes);
4494 4412
4495 4413 // Number of stack slots between incoming argument block and the start of
4496 4414 // a new frame. The PROLOG must add this many slots to the stack. The
4497 4415 // EPILOG must remove this many slots. Intel needs one slot for
4498 4416 // return address and one for rbp, (must save rbp)
4499 4417 in_preserve_stack_slots(2+VerifyStackAtCalls);
4500 4418
4501 4419 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4502 4420 // for calls to C. Supports the var-args backing area for register parms.
4503 4421 varargs_C_out_slots_killed(0);
4504 4422
4505 4423 // The after-PROLOG location of the return address. Location of
4506 4424 // return address specifies a type (REG or STACK) and a number
4507 4425 // representing the register number (i.e. - use a register name) or
4508 4426 // stack slot.
4509 4427 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4510 4428 // Otherwise, it is above the locks and verification slot and alignment word
4511 4429 return_addr(STACK - 1 +
4512 4430 round_to(1+VerifyStackAtCalls+
4513 4431 Compile::current()->fixed_slots(),
4514 4432 (StackAlignmentInBytes/wordSize)));
4515 4433
4516 4434 // Body of function which returns an integer array locating
4517 4435 // arguments either in registers or in stack slots. Passed an array
4518 4436 // of ideal registers called "sig" and a "length" count. Stack-slot
4519 4437 // offsets are based on outgoing arguments, i.e. a CALLER setting up
4520 4438 // arguments for a CALLEE. Incoming stack arguments are
4521 4439 // automatically biased by the preserve_stack_slots field above.
4522 4440 calling_convention %{
4523 4441 // No difference between ingoing/outgoing just pass false
4524 4442 SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4525 4443 %}
4526 4444
4527 4445
4528 4446 // Body of function which returns an integer array locating
4529 4447 // arguments either in registers or in stack slots. Passed an array
4530 4448 // of ideal registers called "sig" and a "length" count. Stack-slot
4531 4449 // offsets are based on outgoing arguments, i.e. a CALLER setting up
4532 4450 // arguments for a CALLEE. Incoming stack arguments are
4533 4451 // automatically biased by the preserve_stack_slots field above.
4534 4452 c_calling_convention %{
4535 4453 // This is obviously always outgoing
4536 4454 (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4537 4455 %}
4538 4456
4539 4457 // Location of C & interpreter return values
4540 4458 c_return_value %{
4541 4459 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4542 4460 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
4543 4461 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4544 4462
4545 4463 // in SSE2+ mode we want to keep the FPU stack clean so pretend
4546 4464 // that C functions return float and double results in XMM0.
4547 4465 if( ideal_reg == Op_RegD && UseSSE>=2 )
4548 4466 return OptoRegPair(XMM0b_num,XMM0a_num);
4549 4467 if( ideal_reg == Op_RegF && UseSSE>=2 )
4550 4468 return OptoRegPair(OptoReg::Bad,XMM0a_num);
4551 4469
4552 4470 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4553 4471 %}
4554 4472
4555 4473 // Location of return values
4556 4474 return_value %{
4557 4475 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4558 4476 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
4559 4477 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4560 4478 if( ideal_reg == Op_RegD && UseSSE>=2 )
4561 4479 return OptoRegPair(XMM0b_num,XMM0a_num);
4562 4480 if( ideal_reg == Op_RegF && UseSSE>=1 )
4563 4481 return OptoRegPair(OptoReg::Bad,XMM0a_num);
4564 4482 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4565 4483 %}
4566 4484
4567 4485 %}
4568 4486
4569 4487 //----------ATTRIBUTES---------------------------------------------------------
4570 4488 //----------Operand Attributes-------------------------------------------------
4571 4489 op_attrib op_cost(0); // Required cost attribute
4572 4490
4573 4491 //----------Instruction Attributes---------------------------------------------
4574 4492 ins_attrib ins_cost(100); // Required cost attribute
4575 4493 ins_attrib ins_size(8); // Required size attribute (in bits)
4576 4494 ins_attrib ins_pc_relative(0); // Required PC Relative flag
4577 4495 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
4578 4496 // non-matching short branch variant of some
4579 4497 // long branch?
4580 4498 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2)
4581 4499 // specifies the alignment that some part of the instruction (not
4582 4500 // necessarily the start) requires. If > 1, a compute_padding()
4583 4501 // function must be provided for the instruction
4584 4502
4585 4503 //----------OPERANDS-----------------------------------------------------------
4586 4504 // Operand definitions must precede instruction definitions for correct parsing
4587 4505 // in the ADLC because operands constitute user defined types which are used in
4588 4506 // instruction definitions.
4589 4507
4590 4508 //----------Simple Operands----------------------------------------------------
4591 4509 // Immediate Operands
4592 4510 // Integer Immediate
4593 4511 operand immI() %{
4594 4512 match(ConI);
4595 4513
4596 4514 op_cost(10);
4597 4515 format %{ %}
4598 4516 interface(CONST_INTER);
4599 4517 %}
4600 4518
4601 4519 // Constant for test vs zero
4602 4520 operand immI0() %{
4603 4521 predicate(n->get_int() == 0);
4604 4522 match(ConI);
4605 4523
4606 4524 op_cost(0);
4607 4525 format %{ %}
4608 4526 interface(CONST_INTER);
4609 4527 %}
4610 4528
4611 4529 // Constant for increment
4612 4530 operand immI1() %{
4613 4531 predicate(n->get_int() == 1);
4614 4532 match(ConI);
4615 4533
4616 4534 op_cost(0);
4617 4535 format %{ %}
4618 4536 interface(CONST_INTER);
4619 4537 %}
4620 4538
4621 4539 // Constant for decrement
4622 4540 operand immI_M1() %{
4623 4541 predicate(n->get_int() == -1);
4624 4542 match(ConI);
4625 4543
4626 4544 op_cost(0);
4627 4545 format %{ %}
4628 4546 interface(CONST_INTER);
4629 4547 %}
4630 4548
4631 4549 // Valid scale values for addressing modes
4632 4550 operand immI2() %{
4633 4551 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4634 4552 match(ConI);
4635 4553
4636 4554 format %{ %}
4637 4555 interface(CONST_INTER);
4638 4556 %}
4639 4557
4640 4558 operand immI8() %{
4641 4559 predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
4642 4560 match(ConI);
4643 4561
4644 4562 op_cost(5);
4645 4563 format %{ %}
4646 4564 interface(CONST_INTER);
4647 4565 %}
4648 4566
4649 4567 operand immI16() %{
4650 4568 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4651 4569 match(ConI);
4652 4570
4653 4571 op_cost(10);
4654 4572 format %{ %}
4655 4573 interface(CONST_INTER);
4656 4574 %}
4657 4575
4658 4576 // Constant for long shifts
4659 4577 operand immI_32() %{
4660 4578 predicate( n->get_int() == 32 );
4661 4579 match(ConI);
4662 4580
4663 4581 op_cost(0);
4664 4582 format %{ %}
4665 4583 interface(CONST_INTER);
4666 4584 %}
4667 4585
4668 4586 operand immI_1_31() %{
4669 4587 predicate( n->get_int() >= 1 && n->get_int() <= 31 );
4670 4588 match(ConI);
4671 4589
4672 4590 op_cost(0);
4673 4591 format %{ %}
4674 4592 interface(CONST_INTER);
4675 4593 %}
4676 4594
4677 4595 operand immI_32_63() %{
4678 4596 predicate( n->get_int() >= 32 && n->get_int() <= 63 );
4679 4597 match(ConI);
4680 4598 op_cost(0);
4681 4599
4682 4600 format %{ %}
4683 4601 interface(CONST_INTER);
4684 4602 %}
4685 4603
4686 4604 operand immI_1() %{
4687 4605 predicate( n->get_int() == 1 );
4688 4606 match(ConI);
4689 4607
4690 4608 op_cost(0);
4691 4609 format %{ %}
4692 4610 interface(CONST_INTER);
4693 4611 %}
4694 4612
4695 4613 operand immI_2() %{
4696 4614 predicate( n->get_int() == 2 );
4697 4615 match(ConI);
4698 4616
4699 4617 op_cost(0);
4700 4618 format %{ %}
4701 4619 interface(CONST_INTER);
4702 4620 %}
4703 4621
4704 4622 operand immI_3() %{
4705 4623 predicate( n->get_int() == 3 );
4706 4624 match(ConI);
4707 4625
4708 4626 op_cost(0);
4709 4627 format %{ %}
4710 4628 interface(CONST_INTER);
4711 4629 %}
4712 4630
4713 4631 // Pointer Immediate
4714 4632 operand immP() %{
4715 4633 match(ConP);
4716 4634
4717 4635 op_cost(10);
4718 4636 format %{ %}
4719 4637 interface(CONST_INTER);
4720 4638 %}
4721 4639
4722 4640 // NULL Pointer Immediate
4723 4641 operand immP0() %{
4724 4642 predicate( n->get_ptr() == 0 );
4725 4643 match(ConP);
4726 4644 op_cost(0);
4727 4645
4728 4646 format %{ %}
4729 4647 interface(CONST_INTER);
4730 4648 %}
4731 4649
4732 4650 // Long Immediate
4733 4651 operand immL() %{
4734 4652 match(ConL);
4735 4653
4736 4654 op_cost(20);
4737 4655 format %{ %}
4738 4656 interface(CONST_INTER);
4739 4657 %}
4740 4658
4741 4659 // Long Immediate zero
4742 4660 operand immL0() %{
4743 4661 predicate( n->get_long() == 0L );
4744 4662 match(ConL);
4745 4663 op_cost(0);
4746 4664
4747 4665 format %{ %}
4748 4666 interface(CONST_INTER);
4749 4667 %}
4750 4668
4751 4669 // Long Immediate zero
4752 4670 operand immL_M1() %{
4753 4671 predicate( n->get_long() == -1L );
4754 4672 match(ConL);
4755 4673 op_cost(0);
4756 4674
4757 4675 format %{ %}
4758 4676 interface(CONST_INTER);
4759 4677 %}
4760 4678
4761 4679 // Long immediate from 0 to 127.
4762 4680 // Used for a shorter form of long mul by 10.
4763 4681 operand immL_127() %{
4764 4682 predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4765 4683 match(ConL);
4766 4684 op_cost(0);
4767 4685
4768 4686 format %{ %}
4769 4687 interface(CONST_INTER);
4770 4688 %}
4771 4689
4772 4690 // Long Immediate: low 32-bit mask
4773 4691 operand immL_32bits() %{
4774 4692 predicate(n->get_long() == 0xFFFFFFFFL);
4775 4693 match(ConL);
4776 4694 op_cost(0);
4777 4695
4778 4696 format %{ %}
4779 4697 interface(CONST_INTER);
4780 4698 %}
4781 4699
4782 4700 // Long Immediate: low 32-bit mask
4783 4701 operand immL32() %{
4784 4702 predicate(n->get_long() == (int)(n->get_long()));
4785 4703 match(ConL);
4786 4704 op_cost(20);
4787 4705
4788 4706 format %{ %}
4789 4707 interface(CONST_INTER);
4790 4708 %}
4791 4709
4792 4710 //Double Immediate zero
4793 4711 operand immD0() %{
↓ open down ↓ |
2684 lines elided |
↑ open up ↑ |
4794 4712 // Do additional (and counter-intuitive) test against NaN to work around VC++
4795 4713 // bug that generates code such that NaNs compare equal to 0.0
4796 4714 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4797 4715 match(ConD);
4798 4716
4799 4717 op_cost(5);
4800 4718 format %{ %}
4801 4719 interface(CONST_INTER);
4802 4720 %}
4803 4721
4804 -// Double Immediate
4722 +// Double Immediate one
4805 4723 operand immD1() %{
4806 4724 predicate( UseSSE<=1 && n->getd() == 1.0 );
4807 4725 match(ConD);
4808 4726
4809 4727 op_cost(5);
4810 4728 format %{ %}
4811 4729 interface(CONST_INTER);
4812 4730 %}
4813 4731
4814 4732 // Double Immediate
4815 4733 operand immD() %{
4816 4734 predicate(UseSSE<=1);
4817 4735 match(ConD);
4818 4736
4819 4737 op_cost(5);
4820 4738 format %{ %}
4821 4739 interface(CONST_INTER);
4822 4740 %}
4823 4741
4824 4742 operand immXD() %{
4825 4743 predicate(UseSSE>=2);
4826 4744 match(ConD);
4827 4745
4828 4746 op_cost(5);
4829 4747 format %{ %}
4830 4748 interface(CONST_INTER);
4831 4749 %}
4832 4750
4833 4751 // Double Immediate zero
4834 4752 operand immXD0() %{
4835 4753 // Do additional (and counter-intuitive) test against NaN to work around VC++
4836 4754 // bug that generates code such that NaNs compare equal to 0.0 AND do not
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
4837 4755 // compare equal to -0.0.
4838 4756 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4839 4757 match(ConD);
4840 4758
4841 4759 format %{ %}
4842 4760 interface(CONST_INTER);
4843 4761 %}
4844 4762
4845 4763 // Float Immediate zero
4846 4764 operand immF0() %{
4847 - predicate( UseSSE == 0 && n->getf() == 0.0 );
4765 + predicate(UseSSE == 0 && n->getf() == 0.0F);
4766 + match(ConF);
4767 +
4768 + op_cost(5);
4769 + format %{ %}
4770 + interface(CONST_INTER);
4771 +%}
4772 +
4773 +// Float Immediate one
4774 +operand immF1() %{
4775 + predicate(UseSSE == 0 && n->getf() == 1.0F);
4848 4776 match(ConF);
4849 4777
4850 4778 op_cost(5);
4851 4779 format %{ %}
4852 4780 interface(CONST_INTER);
4853 4781 %}
4854 4782
4855 4783 // Float Immediate
4856 4784 operand immF() %{
4857 4785 predicate( UseSSE == 0 );
4858 4786 match(ConF);
4859 4787
4860 4788 op_cost(5);
4861 4789 format %{ %}
4862 4790 interface(CONST_INTER);
4863 4791 %}
4864 4792
4865 4793 // Float Immediate
4866 4794 operand immXF() %{
4867 4795 predicate(UseSSE >= 1);
4868 4796 match(ConF);
4869 4797
4870 4798 op_cost(5);
4871 4799 format %{ %}
4872 4800 interface(CONST_INTER);
4873 4801 %}
4874 4802
4875 4803 // Float Immediate zero. Zero and not -0.0
4876 4804 operand immXF0() %{
4877 4805 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4878 4806 match(ConF);
4879 4807
4880 4808 op_cost(5);
4881 4809 format %{ %}
4882 4810 interface(CONST_INTER);
4883 4811 %}
4884 4812
4885 4813 // Immediates for special shifts (sign extend)
4886 4814
4887 4815 // Constants for increment
4888 4816 operand immI_16() %{
4889 4817 predicate( n->get_int() == 16 );
4890 4818 match(ConI);
4891 4819
4892 4820 format %{ %}
4893 4821 interface(CONST_INTER);
4894 4822 %}
4895 4823
4896 4824 operand immI_24() %{
4897 4825 predicate( n->get_int() == 24 );
4898 4826 match(ConI);
4899 4827
4900 4828 format %{ %}
4901 4829 interface(CONST_INTER);
4902 4830 %}
4903 4831
4904 4832 // Constant for byte-wide masking
4905 4833 operand immI_255() %{
4906 4834 predicate( n->get_int() == 255 );
4907 4835 match(ConI);
4908 4836
4909 4837 format %{ %}
4910 4838 interface(CONST_INTER);
4911 4839 %}
4912 4840
4913 4841 // Constant for short-wide masking
4914 4842 operand immI_65535() %{
4915 4843 predicate(n->get_int() == 65535);
4916 4844 match(ConI);
4917 4845
4918 4846 format %{ %}
4919 4847 interface(CONST_INTER);
4920 4848 %}
4921 4849
4922 4850 // Register Operands
4923 4851 // Integer Register
4924 4852 operand eRegI() %{
4925 4853 constraint(ALLOC_IN_RC(e_reg));
4926 4854 match(RegI);
4927 4855 match(xRegI);
4928 4856 match(eAXRegI);
4929 4857 match(eBXRegI);
4930 4858 match(eCXRegI);
4931 4859 match(eDXRegI);
4932 4860 match(eDIRegI);
4933 4861 match(eSIRegI);
4934 4862
4935 4863 format %{ %}
4936 4864 interface(REG_INTER);
4937 4865 %}
4938 4866
4939 4867 // Subset of Integer Register
4940 4868 operand xRegI(eRegI reg) %{
4941 4869 constraint(ALLOC_IN_RC(x_reg));
4942 4870 match(reg);
4943 4871 match(eAXRegI);
4944 4872 match(eBXRegI);
4945 4873 match(eCXRegI);
4946 4874 match(eDXRegI);
4947 4875
4948 4876 format %{ %}
4949 4877 interface(REG_INTER);
4950 4878 %}
4951 4879
4952 4880 // Special Registers
4953 4881 operand eAXRegI(xRegI reg) %{
4954 4882 constraint(ALLOC_IN_RC(eax_reg));
4955 4883 match(reg);
4956 4884 match(eRegI);
4957 4885
4958 4886 format %{ "EAX" %}
4959 4887 interface(REG_INTER);
4960 4888 %}
4961 4889
4962 4890 // Special Registers
4963 4891 operand eBXRegI(xRegI reg) %{
4964 4892 constraint(ALLOC_IN_RC(ebx_reg));
4965 4893 match(reg);
4966 4894 match(eRegI);
4967 4895
4968 4896 format %{ "EBX" %}
4969 4897 interface(REG_INTER);
4970 4898 %}
4971 4899
4972 4900 operand eCXRegI(xRegI reg) %{
4973 4901 constraint(ALLOC_IN_RC(ecx_reg));
4974 4902 match(reg);
4975 4903 match(eRegI);
4976 4904
4977 4905 format %{ "ECX" %}
4978 4906 interface(REG_INTER);
4979 4907 %}
4980 4908
4981 4909 operand eDXRegI(xRegI reg) %{
4982 4910 constraint(ALLOC_IN_RC(edx_reg));
4983 4911 match(reg);
4984 4912 match(eRegI);
4985 4913
4986 4914 format %{ "EDX" %}
4987 4915 interface(REG_INTER);
4988 4916 %}
4989 4917
4990 4918 operand eDIRegI(xRegI reg) %{
4991 4919 constraint(ALLOC_IN_RC(edi_reg));
4992 4920 match(reg);
4993 4921 match(eRegI);
4994 4922
4995 4923 format %{ "EDI" %}
4996 4924 interface(REG_INTER);
4997 4925 %}
4998 4926
4999 4927 operand naxRegI() %{
5000 4928 constraint(ALLOC_IN_RC(nax_reg));
5001 4929 match(RegI);
5002 4930 match(eCXRegI);
5003 4931 match(eDXRegI);
5004 4932 match(eSIRegI);
5005 4933 match(eDIRegI);
5006 4934
5007 4935 format %{ %}
5008 4936 interface(REG_INTER);
5009 4937 %}
5010 4938
5011 4939 operand nadxRegI() %{
5012 4940 constraint(ALLOC_IN_RC(nadx_reg));
5013 4941 match(RegI);
5014 4942 match(eBXRegI);
5015 4943 match(eCXRegI);
5016 4944 match(eSIRegI);
5017 4945 match(eDIRegI);
5018 4946
5019 4947 format %{ %}
5020 4948 interface(REG_INTER);
5021 4949 %}
5022 4950
5023 4951 operand ncxRegI() %{
5024 4952 constraint(ALLOC_IN_RC(ncx_reg));
5025 4953 match(RegI);
5026 4954 match(eAXRegI);
5027 4955 match(eDXRegI);
5028 4956 match(eSIRegI);
5029 4957 match(eDIRegI);
5030 4958
5031 4959 format %{ %}
5032 4960 interface(REG_INTER);
5033 4961 %}
5034 4962
5035 4963 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
5036 4964 // //
5037 4965 operand eSIRegI(xRegI reg) %{
5038 4966 constraint(ALLOC_IN_RC(esi_reg));
5039 4967 match(reg);
5040 4968 match(eRegI);
5041 4969
5042 4970 format %{ "ESI" %}
5043 4971 interface(REG_INTER);
5044 4972 %}
5045 4973
5046 4974 // Pointer Register
5047 4975 operand anyRegP() %{
5048 4976 constraint(ALLOC_IN_RC(any_reg));
5049 4977 match(RegP);
5050 4978 match(eAXRegP);
5051 4979 match(eBXRegP);
5052 4980 match(eCXRegP);
5053 4981 match(eDIRegP);
5054 4982 match(eRegP);
5055 4983
5056 4984 format %{ %}
5057 4985 interface(REG_INTER);
5058 4986 %}
5059 4987
5060 4988 operand eRegP() %{
5061 4989 constraint(ALLOC_IN_RC(e_reg));
5062 4990 match(RegP);
5063 4991 match(eAXRegP);
5064 4992 match(eBXRegP);
5065 4993 match(eCXRegP);
5066 4994 match(eDIRegP);
5067 4995
5068 4996 format %{ %}
5069 4997 interface(REG_INTER);
5070 4998 %}
5071 4999
5072 5000 // On windows95, EBP is not safe to use for implicit null tests.
5073 5001 operand eRegP_no_EBP() %{
5074 5002 constraint(ALLOC_IN_RC(e_reg_no_rbp));
5075 5003 match(RegP);
5076 5004 match(eAXRegP);
5077 5005 match(eBXRegP);
5078 5006 match(eCXRegP);
5079 5007 match(eDIRegP);
5080 5008
5081 5009 op_cost(100);
5082 5010 format %{ %}
5083 5011 interface(REG_INTER);
5084 5012 %}
5085 5013
5086 5014 operand naxRegP() %{
5087 5015 constraint(ALLOC_IN_RC(nax_reg));
5088 5016 match(RegP);
5089 5017 match(eBXRegP);
5090 5018 match(eDXRegP);
5091 5019 match(eCXRegP);
5092 5020 match(eSIRegP);
5093 5021 match(eDIRegP);
5094 5022
5095 5023 format %{ %}
5096 5024 interface(REG_INTER);
5097 5025 %}
5098 5026
5099 5027 operand nabxRegP() %{
5100 5028 constraint(ALLOC_IN_RC(nabx_reg));
5101 5029 match(RegP);
5102 5030 match(eCXRegP);
5103 5031 match(eDXRegP);
5104 5032 match(eSIRegP);
5105 5033 match(eDIRegP);
5106 5034
5107 5035 format %{ %}
5108 5036 interface(REG_INTER);
5109 5037 %}
5110 5038
5111 5039 operand pRegP() %{
5112 5040 constraint(ALLOC_IN_RC(p_reg));
5113 5041 match(RegP);
5114 5042 match(eBXRegP);
5115 5043 match(eDXRegP);
5116 5044 match(eSIRegP);
5117 5045 match(eDIRegP);
5118 5046
5119 5047 format %{ %}
5120 5048 interface(REG_INTER);
5121 5049 %}
5122 5050
5123 5051 // Special Registers
5124 5052 // Return a pointer value
5125 5053 operand eAXRegP(eRegP reg) %{
5126 5054 constraint(ALLOC_IN_RC(eax_reg));
5127 5055 match(reg);
5128 5056 format %{ "EAX" %}
5129 5057 interface(REG_INTER);
5130 5058 %}
5131 5059
5132 5060 // Used in AtomicAdd
5133 5061 operand eBXRegP(eRegP reg) %{
5134 5062 constraint(ALLOC_IN_RC(ebx_reg));
5135 5063 match(reg);
5136 5064 format %{ "EBX" %}
5137 5065 interface(REG_INTER);
5138 5066 %}
5139 5067
5140 5068 // Tail-call (interprocedural jump) to interpreter
5141 5069 operand eCXRegP(eRegP reg) %{
5142 5070 constraint(ALLOC_IN_RC(ecx_reg));
5143 5071 match(reg);
5144 5072 format %{ "ECX" %}
5145 5073 interface(REG_INTER);
5146 5074 %}
5147 5075
5148 5076 operand eSIRegP(eRegP reg) %{
5149 5077 constraint(ALLOC_IN_RC(esi_reg));
5150 5078 match(reg);
5151 5079 format %{ "ESI" %}
5152 5080 interface(REG_INTER);
5153 5081 %}
5154 5082
5155 5083 // Used in rep stosw
5156 5084 operand eDIRegP(eRegP reg) %{
5157 5085 constraint(ALLOC_IN_RC(edi_reg));
5158 5086 match(reg);
5159 5087 format %{ "EDI" %}
5160 5088 interface(REG_INTER);
5161 5089 %}
5162 5090
5163 5091 operand eBPRegP() %{
5164 5092 constraint(ALLOC_IN_RC(ebp_reg));
5165 5093 match(RegP);
5166 5094 format %{ "EBP" %}
5167 5095 interface(REG_INTER);
5168 5096 %}
5169 5097
5170 5098 operand eRegL() %{
5171 5099 constraint(ALLOC_IN_RC(long_reg));
5172 5100 match(RegL);
5173 5101 match(eADXRegL);
5174 5102
5175 5103 format %{ %}
5176 5104 interface(REG_INTER);
5177 5105 %}
5178 5106
5179 5107 operand eADXRegL( eRegL reg ) %{
5180 5108 constraint(ALLOC_IN_RC(eadx_reg));
5181 5109 match(reg);
5182 5110
5183 5111 format %{ "EDX:EAX" %}
5184 5112 interface(REG_INTER);
5185 5113 %}
5186 5114
5187 5115 operand eBCXRegL( eRegL reg ) %{
5188 5116 constraint(ALLOC_IN_RC(ebcx_reg));
5189 5117 match(reg);
5190 5118
5191 5119 format %{ "EBX:ECX" %}
5192 5120 interface(REG_INTER);
5193 5121 %}
5194 5122
5195 5123 // Special case for integer high multiply
5196 5124 operand eADXRegL_low_only() %{
5197 5125 constraint(ALLOC_IN_RC(eadx_reg));
5198 5126 match(RegL);
5199 5127
5200 5128 format %{ "EAX" %}
5201 5129 interface(REG_INTER);
5202 5130 %}
5203 5131
5204 5132 // Flags register, used as output of compare instructions
5205 5133 operand eFlagsReg() %{
5206 5134 constraint(ALLOC_IN_RC(int_flags));
5207 5135 match(RegFlags);
5208 5136
5209 5137 format %{ "EFLAGS" %}
5210 5138 interface(REG_INTER);
5211 5139 %}
5212 5140
5213 5141 // Flags register, used as output of FLOATING POINT compare instructions
5214 5142 operand eFlagsRegU() %{
5215 5143 constraint(ALLOC_IN_RC(int_flags));
5216 5144 match(RegFlags);
5217 5145
5218 5146 format %{ "EFLAGS_U" %}
5219 5147 interface(REG_INTER);
5220 5148 %}
5221 5149
5222 5150 operand eFlagsRegUCF() %{
5223 5151 constraint(ALLOC_IN_RC(int_flags));
5224 5152 match(RegFlags);
5225 5153 predicate(false);
5226 5154
5227 5155 format %{ "EFLAGS_U_CF" %}
5228 5156 interface(REG_INTER);
5229 5157 %}
5230 5158
5231 5159 // Condition Code Register used by long compare
5232 5160 operand flagsReg_long_LTGE() %{
5233 5161 constraint(ALLOC_IN_RC(int_flags));
5234 5162 match(RegFlags);
5235 5163 format %{ "FLAGS_LTGE" %}
5236 5164 interface(REG_INTER);
5237 5165 %}
5238 5166 operand flagsReg_long_EQNE() %{
5239 5167 constraint(ALLOC_IN_RC(int_flags));
5240 5168 match(RegFlags);
5241 5169 format %{ "FLAGS_EQNE" %}
5242 5170 interface(REG_INTER);
5243 5171 %}
5244 5172 operand flagsReg_long_LEGT() %{
5245 5173 constraint(ALLOC_IN_RC(int_flags));
5246 5174 match(RegFlags);
5247 5175 format %{ "FLAGS_LEGT" %}
5248 5176 interface(REG_INTER);
5249 5177 %}
5250 5178
5251 5179 // Float register operands
5252 5180 operand regD() %{
5253 5181 predicate( UseSSE < 2 );
5254 5182 constraint(ALLOC_IN_RC(dbl_reg));
5255 5183 match(RegD);
5256 5184 match(regDPR1);
5257 5185 match(regDPR2);
5258 5186 format %{ %}
5259 5187 interface(REG_INTER);
5260 5188 %}
5261 5189
5262 5190 operand regDPR1(regD reg) %{
5263 5191 predicate( UseSSE < 2 );
5264 5192 constraint(ALLOC_IN_RC(dbl_reg0));
5265 5193 match(reg);
5266 5194 format %{ "FPR1" %}
5267 5195 interface(REG_INTER);
5268 5196 %}
5269 5197
5270 5198 operand regDPR2(regD reg) %{
5271 5199 predicate( UseSSE < 2 );
5272 5200 constraint(ALLOC_IN_RC(dbl_reg1));
5273 5201 match(reg);
5274 5202 format %{ "FPR2" %}
5275 5203 interface(REG_INTER);
5276 5204 %}
5277 5205
5278 5206 operand regnotDPR1(regD reg) %{
5279 5207 predicate( UseSSE < 2 );
5280 5208 constraint(ALLOC_IN_RC(dbl_notreg0));
5281 5209 match(reg);
5282 5210 format %{ %}
5283 5211 interface(REG_INTER);
5284 5212 %}
5285 5213
5286 5214 // XMM Double register operands
5287 5215 operand regXD() %{
5288 5216 predicate( UseSSE>=2 );
5289 5217 constraint(ALLOC_IN_RC(xdb_reg));
5290 5218 match(RegD);
5291 5219 match(regXD6);
5292 5220 match(regXD7);
5293 5221 format %{ %}
5294 5222 interface(REG_INTER);
5295 5223 %}
5296 5224
5297 5225 // XMM6 double register operands
5298 5226 operand regXD6(regXD reg) %{
5299 5227 predicate( UseSSE>=2 );
5300 5228 constraint(ALLOC_IN_RC(xdb_reg6));
5301 5229 match(reg);
5302 5230 format %{ "XMM6" %}
5303 5231 interface(REG_INTER);
5304 5232 %}
5305 5233
5306 5234 // XMM7 double register operands
5307 5235 operand regXD7(regXD reg) %{
5308 5236 predicate( UseSSE>=2 );
5309 5237 constraint(ALLOC_IN_RC(xdb_reg7));
5310 5238 match(reg);
5311 5239 format %{ "XMM7" %}
5312 5240 interface(REG_INTER);
5313 5241 %}
5314 5242
5315 5243 // Float register operands
5316 5244 operand regF() %{
5317 5245 predicate( UseSSE < 2 );
5318 5246 constraint(ALLOC_IN_RC(flt_reg));
5319 5247 match(RegF);
5320 5248 match(regFPR1);
5321 5249 format %{ %}
5322 5250 interface(REG_INTER);
5323 5251 %}
5324 5252
5325 5253 // Float register operands
5326 5254 operand regFPR1(regF reg) %{
5327 5255 predicate( UseSSE < 2 );
5328 5256 constraint(ALLOC_IN_RC(flt_reg0));
5329 5257 match(reg);
5330 5258 format %{ "FPR1" %}
5331 5259 interface(REG_INTER);
5332 5260 %}
5333 5261
5334 5262 // XMM register operands
5335 5263 operand regX() %{
5336 5264 predicate( UseSSE>=1 );
5337 5265 constraint(ALLOC_IN_RC(xmm_reg));
5338 5266 match(RegF);
5339 5267 format %{ %}
5340 5268 interface(REG_INTER);
5341 5269 %}
5342 5270
5343 5271
5344 5272 //----------Memory Operands----------------------------------------------------
5345 5273 // Direct Memory Operand
5346 5274 operand direct(immP addr) %{
5347 5275 match(addr);
5348 5276
5349 5277 format %{ "[$addr]" %}
5350 5278 interface(MEMORY_INTER) %{
5351 5279 base(0xFFFFFFFF);
5352 5280 index(0x4);
5353 5281 scale(0x0);
5354 5282 disp($addr);
5355 5283 %}
5356 5284 %}
5357 5285
5358 5286 // Indirect Memory Operand
5359 5287 operand indirect(eRegP reg) %{
5360 5288 constraint(ALLOC_IN_RC(e_reg));
5361 5289 match(reg);
5362 5290
5363 5291 format %{ "[$reg]" %}
5364 5292 interface(MEMORY_INTER) %{
5365 5293 base($reg);
5366 5294 index(0x4);
5367 5295 scale(0x0);
5368 5296 disp(0x0);
5369 5297 %}
5370 5298 %}
5371 5299
5372 5300 // Indirect Memory Plus Short Offset Operand
5373 5301 operand indOffset8(eRegP reg, immI8 off) %{
5374 5302 match(AddP reg off);
5375 5303
5376 5304 format %{ "[$reg + $off]" %}
5377 5305 interface(MEMORY_INTER) %{
5378 5306 base($reg);
5379 5307 index(0x4);
5380 5308 scale(0x0);
5381 5309 disp($off);
5382 5310 %}
5383 5311 %}
5384 5312
5385 5313 // Indirect Memory Plus Long Offset Operand
5386 5314 operand indOffset32(eRegP reg, immI off) %{
5387 5315 match(AddP reg off);
5388 5316
5389 5317 format %{ "[$reg + $off]" %}
5390 5318 interface(MEMORY_INTER) %{
5391 5319 base($reg);
5392 5320 index(0x4);
5393 5321 scale(0x0);
5394 5322 disp($off);
5395 5323 %}
5396 5324 %}
5397 5325
5398 5326 // Indirect Memory Plus Long Offset Operand
5399 5327 operand indOffset32X(eRegI reg, immP off) %{
5400 5328 match(AddP off reg);
5401 5329
5402 5330 format %{ "[$reg + $off]" %}
5403 5331 interface(MEMORY_INTER) %{
5404 5332 base($reg);
5405 5333 index(0x4);
5406 5334 scale(0x0);
5407 5335 disp($off);
5408 5336 %}
5409 5337 %}
5410 5338
5411 5339 // Indirect Memory Plus Index Register Plus Offset Operand
5412 5340 operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
5413 5341 match(AddP (AddP reg ireg) off);
5414 5342
5415 5343 op_cost(10);
5416 5344 format %{"[$reg + $off + $ireg]" %}
5417 5345 interface(MEMORY_INTER) %{
5418 5346 base($reg);
5419 5347 index($ireg);
5420 5348 scale(0x0);
5421 5349 disp($off);
5422 5350 %}
5423 5351 %}
5424 5352
5425 5353 // Indirect Memory Plus Index Register Plus Offset Operand
5426 5354 operand indIndex(eRegP reg, eRegI ireg) %{
5427 5355 match(AddP reg ireg);
5428 5356
5429 5357 op_cost(10);
5430 5358 format %{"[$reg + $ireg]" %}
5431 5359 interface(MEMORY_INTER) %{
5432 5360 base($reg);
5433 5361 index($ireg);
5434 5362 scale(0x0);
5435 5363 disp(0x0);
5436 5364 %}
5437 5365 %}
5438 5366
5439 5367 // // -------------------------------------------------------------------------
5440 5368 // // 486 architecture doesn't support "scale * index + offset" with out a base
5441 5369 // // -------------------------------------------------------------------------
5442 5370 // // Scaled Memory Operands
5443 5371 // // Indirect Memory Times Scale Plus Offset Operand
5444 5372 // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
5445 5373 // match(AddP off (LShiftI ireg scale));
5446 5374 //
5447 5375 // op_cost(10);
5448 5376 // format %{"[$off + $ireg << $scale]" %}
5449 5377 // interface(MEMORY_INTER) %{
5450 5378 // base(0x4);
5451 5379 // index($ireg);
5452 5380 // scale($scale);
5453 5381 // disp($off);
5454 5382 // %}
5455 5383 // %}
5456 5384
5457 5385 // Indirect Memory Times Scale Plus Index Register
5458 5386 operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
5459 5387 match(AddP reg (LShiftI ireg scale));
5460 5388
5461 5389 op_cost(10);
5462 5390 format %{"[$reg + $ireg << $scale]" %}
5463 5391 interface(MEMORY_INTER) %{
5464 5392 base($reg);
5465 5393 index($ireg);
5466 5394 scale($scale);
5467 5395 disp(0x0);
5468 5396 %}
5469 5397 %}
5470 5398
5471 5399 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5472 5400 operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
5473 5401 match(AddP (AddP reg (LShiftI ireg scale)) off);
5474 5402
5475 5403 op_cost(10);
5476 5404 format %{"[$reg + $off + $ireg << $scale]" %}
5477 5405 interface(MEMORY_INTER) %{
5478 5406 base($reg);
5479 5407 index($ireg);
5480 5408 scale($scale);
5481 5409 disp($off);
5482 5410 %}
5483 5411 %}
5484 5412
5485 5413 //----------Load Long Memory Operands------------------------------------------
5486 5414 // The load-long idiom will use it's address expression again after loading
5487 5415 // the first word of the long. If the load-long destination overlaps with
5488 5416 // registers used in the addressing expression, the 2nd half will be loaded
5489 5417 // from a clobbered address. Fix this by requiring that load-long use
5490 5418 // address registers that do not overlap with the load-long target.
5491 5419
5492 5420 // load-long support
5493 5421 operand load_long_RegP() %{
5494 5422 constraint(ALLOC_IN_RC(esi_reg));
5495 5423 match(RegP);
5496 5424 match(eSIRegP);
5497 5425 op_cost(100);
5498 5426 format %{ %}
5499 5427 interface(REG_INTER);
5500 5428 %}
5501 5429
5502 5430 // Indirect Memory Operand Long
5503 5431 operand load_long_indirect(load_long_RegP reg) %{
5504 5432 constraint(ALLOC_IN_RC(esi_reg));
5505 5433 match(reg);
5506 5434
5507 5435 format %{ "[$reg]" %}
5508 5436 interface(MEMORY_INTER) %{
5509 5437 base($reg);
5510 5438 index(0x4);
5511 5439 scale(0x0);
5512 5440 disp(0x0);
5513 5441 %}
5514 5442 %}
5515 5443
5516 5444 // Indirect Memory Plus Long Offset Operand
5517 5445 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
5518 5446 match(AddP reg off);
5519 5447
5520 5448 format %{ "[$reg + $off]" %}
5521 5449 interface(MEMORY_INTER) %{
5522 5450 base($reg);
5523 5451 index(0x4);
5524 5452 scale(0x0);
5525 5453 disp($off);
5526 5454 %}
5527 5455 %}
5528 5456
5529 5457 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
5530 5458
5531 5459
5532 5460 //----------Special Memory Operands--------------------------------------------
5533 5461 // Stack Slot Operand - This operand is used for loading and storing temporary
5534 5462 // values on the stack where a match requires a value to
5535 5463 // flow through memory.
5536 5464 operand stackSlotP(sRegP reg) %{
5537 5465 constraint(ALLOC_IN_RC(stack_slots));
5538 5466 // No match rule because this operand is only generated in matching
5539 5467 format %{ "[$reg]" %}
5540 5468 interface(MEMORY_INTER) %{
5541 5469 base(0x4); // ESP
5542 5470 index(0x4); // No Index
5543 5471 scale(0x0); // No Scale
5544 5472 disp($reg); // Stack Offset
5545 5473 %}
5546 5474 %}
5547 5475
5548 5476 operand stackSlotI(sRegI reg) %{
5549 5477 constraint(ALLOC_IN_RC(stack_slots));
5550 5478 // No match rule because this operand is only generated in matching
5551 5479 format %{ "[$reg]" %}
5552 5480 interface(MEMORY_INTER) %{
5553 5481 base(0x4); // ESP
5554 5482 index(0x4); // No Index
5555 5483 scale(0x0); // No Scale
5556 5484 disp($reg); // Stack Offset
5557 5485 %}
5558 5486 %}
5559 5487
5560 5488 operand stackSlotF(sRegF reg) %{
5561 5489 constraint(ALLOC_IN_RC(stack_slots));
5562 5490 // No match rule because this operand is only generated in matching
5563 5491 format %{ "[$reg]" %}
5564 5492 interface(MEMORY_INTER) %{
5565 5493 base(0x4); // ESP
5566 5494 index(0x4); // No Index
5567 5495 scale(0x0); // No Scale
5568 5496 disp($reg); // Stack Offset
5569 5497 %}
5570 5498 %}
5571 5499
5572 5500 operand stackSlotD(sRegD reg) %{
5573 5501 constraint(ALLOC_IN_RC(stack_slots));
5574 5502 // No match rule because this operand is only generated in matching
5575 5503 format %{ "[$reg]" %}
5576 5504 interface(MEMORY_INTER) %{
5577 5505 base(0x4); // ESP
5578 5506 index(0x4); // No Index
5579 5507 scale(0x0); // No Scale
5580 5508 disp($reg); // Stack Offset
5581 5509 %}
5582 5510 %}
5583 5511
5584 5512 operand stackSlotL(sRegL reg) %{
5585 5513 constraint(ALLOC_IN_RC(stack_slots));
5586 5514 // No match rule because this operand is only generated in matching
5587 5515 format %{ "[$reg]" %}
5588 5516 interface(MEMORY_INTER) %{
5589 5517 base(0x4); // ESP
5590 5518 index(0x4); // No Index
5591 5519 scale(0x0); // No Scale
5592 5520 disp($reg); // Stack Offset
5593 5521 %}
5594 5522 %}
5595 5523
5596 5524 //----------Memory Operands - Win95 Implicit Null Variants----------------
5597 5525 // Indirect Memory Operand
5598 5526 operand indirect_win95_safe(eRegP_no_EBP reg)
5599 5527 %{
5600 5528 constraint(ALLOC_IN_RC(e_reg));
5601 5529 match(reg);
5602 5530
5603 5531 op_cost(100);
5604 5532 format %{ "[$reg]" %}
5605 5533 interface(MEMORY_INTER) %{
5606 5534 base($reg);
5607 5535 index(0x4);
5608 5536 scale(0x0);
5609 5537 disp(0x0);
5610 5538 %}
5611 5539 %}
5612 5540
5613 5541 // Indirect Memory Plus Short Offset Operand
5614 5542 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
5615 5543 %{
5616 5544 match(AddP reg off);
5617 5545
5618 5546 op_cost(100);
5619 5547 format %{ "[$reg + $off]" %}
5620 5548 interface(MEMORY_INTER) %{
5621 5549 base($reg);
5622 5550 index(0x4);
5623 5551 scale(0x0);
5624 5552 disp($off);
5625 5553 %}
5626 5554 %}
5627 5555
5628 5556 // Indirect Memory Plus Long Offset Operand
5629 5557 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
5630 5558 %{
5631 5559 match(AddP reg off);
5632 5560
5633 5561 op_cost(100);
5634 5562 format %{ "[$reg + $off]" %}
5635 5563 interface(MEMORY_INTER) %{
5636 5564 base($reg);
5637 5565 index(0x4);
5638 5566 scale(0x0);
5639 5567 disp($off);
5640 5568 %}
5641 5569 %}
5642 5570
5643 5571 // Indirect Memory Plus Index Register Plus Offset Operand
5644 5572 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
5645 5573 %{
5646 5574 match(AddP (AddP reg ireg) off);
5647 5575
5648 5576 op_cost(100);
5649 5577 format %{"[$reg + $off + $ireg]" %}
5650 5578 interface(MEMORY_INTER) %{
5651 5579 base($reg);
5652 5580 index($ireg);
5653 5581 scale(0x0);
5654 5582 disp($off);
5655 5583 %}
5656 5584 %}
5657 5585
5658 5586 // Indirect Memory Times Scale Plus Index Register
5659 5587 operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
5660 5588 %{
5661 5589 match(AddP reg (LShiftI ireg scale));
5662 5590
5663 5591 op_cost(100);
5664 5592 format %{"[$reg + $ireg << $scale]" %}
5665 5593 interface(MEMORY_INTER) %{
5666 5594 base($reg);
5667 5595 index($ireg);
5668 5596 scale($scale);
5669 5597 disp(0x0);
5670 5598 %}
5671 5599 %}
5672 5600
5673 5601 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5674 5602 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
5675 5603 %{
5676 5604 match(AddP (AddP reg (LShiftI ireg scale)) off);
5677 5605
5678 5606 op_cost(100);
5679 5607 format %{"[$reg + $off + $ireg << $scale]" %}
5680 5608 interface(MEMORY_INTER) %{
5681 5609 base($reg);
5682 5610 index($ireg);
5683 5611 scale($scale);
5684 5612 disp($off);
5685 5613 %}
5686 5614 %}
5687 5615
5688 5616 //----------Conditional Branch Operands----------------------------------------
5689 5617 // Comparison Op - This is the operation of the comparison, and is limited to
5690 5618 // the following set of codes:
5691 5619 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5692 5620 //
5693 5621 // Other attributes of the comparison, such as unsignedness, are specified
5694 5622 // by the comparison instruction that sets a condition code flags register.
5695 5623 // That result is represented by a flags operand whose subtype is appropriate
5696 5624 // to the unsignedness (etc.) of the comparison.
5697 5625 //
5698 5626 // Later, the instruction which matches both the Comparison Op (a Bool) and
5699 5627 // the flags (produced by the Cmp) specifies the coding of the comparison op
5700 5628 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5701 5629
5702 5630 // Comparision Code
5703 5631 operand cmpOp() %{
5704 5632 match(Bool);
5705 5633
5706 5634 format %{ "" %}
5707 5635 interface(COND_INTER) %{
5708 5636 equal(0x4, "e");
5709 5637 not_equal(0x5, "ne");
5710 5638 less(0xC, "l");
5711 5639 greater_equal(0xD, "ge");
5712 5640 less_equal(0xE, "le");
5713 5641 greater(0xF, "g");
5714 5642 %}
5715 5643 %}
5716 5644
5717 5645 // Comparison Code, unsigned compare. Used by FP also, with
5718 5646 // C2 (unordered) turned into GT or LT already. The other bits
5719 5647 // C0 and C3 are turned into Carry & Zero flags.
5720 5648 operand cmpOpU() %{
5721 5649 match(Bool);
5722 5650
5723 5651 format %{ "" %}
5724 5652 interface(COND_INTER) %{
5725 5653 equal(0x4, "e");
5726 5654 not_equal(0x5, "ne");
5727 5655 less(0x2, "b");
5728 5656 greater_equal(0x3, "nb");
5729 5657 less_equal(0x6, "be");
5730 5658 greater(0x7, "nbe");
5731 5659 %}
5732 5660 %}
5733 5661
5734 5662 // Floating comparisons that don't require any fixup for the unordered case
5735 5663 operand cmpOpUCF() %{
5736 5664 match(Bool);
5737 5665 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5738 5666 n->as_Bool()->_test._test == BoolTest::ge ||
5739 5667 n->as_Bool()->_test._test == BoolTest::le ||
5740 5668 n->as_Bool()->_test._test == BoolTest::gt);
5741 5669 format %{ "" %}
5742 5670 interface(COND_INTER) %{
5743 5671 equal(0x4, "e");
5744 5672 not_equal(0x5, "ne");
5745 5673 less(0x2, "b");
5746 5674 greater_equal(0x3, "nb");
5747 5675 less_equal(0x6, "be");
5748 5676 greater(0x7, "nbe");
5749 5677 %}
5750 5678 %}
5751 5679
5752 5680
5753 5681 // Floating comparisons that can be fixed up with extra conditional jumps
5754 5682 operand cmpOpUCF2() %{
5755 5683 match(Bool);
5756 5684 predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5757 5685 n->as_Bool()->_test._test == BoolTest::eq);
5758 5686 format %{ "" %}
5759 5687 interface(COND_INTER) %{
5760 5688 equal(0x4, "e");
5761 5689 not_equal(0x5, "ne");
5762 5690 less(0x2, "b");
5763 5691 greater_equal(0x3, "nb");
5764 5692 less_equal(0x6, "be");
5765 5693 greater(0x7, "nbe");
5766 5694 %}
5767 5695 %}
5768 5696
5769 5697 // Comparison Code for FP conditional move
5770 5698 operand cmpOp_fcmov() %{
5771 5699 match(Bool);
5772 5700
5773 5701 format %{ "" %}
5774 5702 interface(COND_INTER) %{
5775 5703 equal (0x0C8);
5776 5704 not_equal (0x1C8);
5777 5705 less (0x0C0);
5778 5706 greater_equal(0x1C0);
5779 5707 less_equal (0x0D0);
5780 5708 greater (0x1D0);
5781 5709 %}
5782 5710 %}
5783 5711
5784 5712 // Comparision Code used in long compares
5785 5713 operand cmpOp_commute() %{
5786 5714 match(Bool);
5787 5715
5788 5716 format %{ "" %}
5789 5717 interface(COND_INTER) %{
5790 5718 equal(0x4, "e");
5791 5719 not_equal(0x5, "ne");
5792 5720 less(0xF, "g");
5793 5721 greater_equal(0xE, "le");
5794 5722 less_equal(0xD, "ge");
5795 5723 greater(0xC, "l");
5796 5724 %}
5797 5725 %}
5798 5726
5799 5727 //----------OPERAND CLASSES----------------------------------------------------
5800 5728 // Operand Classes are groups of operands that are used as to simplify
5801 5729 // instruction definitions by not requiring the AD writer to specify separate
5802 5730 // instructions for every form of operand when the instruction accepts
5803 5731 // multiple operand types with the same basic encoding and format. The classic
5804 5732 // case of this is memory operands.
5805 5733
5806 5734 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5807 5735 indIndex, indIndexScale, indIndexScaleOffset);
5808 5736
5809 5737 // Long memory operations are encoded in 2 instructions and a +4 offset.
5810 5738 // This means some kind of offset is always required and you cannot use
5811 5739 // an oop as the offset (done when working on static globals).
5812 5740 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5813 5741 indIndex, indIndexScale, indIndexScaleOffset);
5814 5742
5815 5743
5816 5744 //----------PIPELINE-----------------------------------------------------------
5817 5745 // Rules which define the behavior of the target architectures pipeline.
5818 5746 pipeline %{
5819 5747
5820 5748 //----------ATTRIBUTES---------------------------------------------------------
5821 5749 attributes %{
5822 5750 variable_size_instructions; // Fixed size instructions
5823 5751 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
5824 5752 instruction_unit_size = 1; // An instruction is 1 bytes long
5825 5753 instruction_fetch_unit_size = 16; // The processor fetches one line
5826 5754 instruction_fetch_units = 1; // of 16 bytes
5827 5755
5828 5756 // List of nop instructions
5829 5757 nops( MachNop );
5830 5758 %}
5831 5759
5832 5760 //----------RESOURCES----------------------------------------------------------
5833 5761 // Resources are the functional units available to the machine
5834 5762
5835 5763 // Generic P2/P3 pipeline
5836 5764 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5837 5765 // 3 instructions decoded per cycle.
5838 5766 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5839 5767 // 2 ALU op, only ALU0 handles mul/div instructions.
5840 5768 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5841 5769 MS0, MS1, MEM = MS0 | MS1,
5842 5770 BR, FPU,
5843 5771 ALU0, ALU1, ALU = ALU0 | ALU1 );
5844 5772
5845 5773 //----------PIPELINE DESCRIPTION-----------------------------------------------
5846 5774 // Pipeline Description specifies the stages in the machine's pipeline
5847 5775
5848 5776 // Generic P2/P3 pipeline
5849 5777 pipe_desc(S0, S1, S2, S3, S4, S5);
5850 5778
5851 5779 //----------PIPELINE CLASSES---------------------------------------------------
5852 5780 // Pipeline Classes describe the stages in which input and output are
5853 5781 // referenced by the hardware pipeline.
5854 5782
5855 5783 // Naming convention: ialu or fpu
5856 5784 // Then: _reg
5857 5785 // Then: _reg if there is a 2nd register
5858 5786 // Then: _long if it's a pair of instructions implementing a long
5859 5787 // Then: _fat if it requires the big decoder
5860 5788 // Or: _mem if it requires the big decoder and a memory unit.
5861 5789
5862 5790 // Integer ALU reg operation
5863 5791 pipe_class ialu_reg(eRegI dst) %{
5864 5792 single_instruction;
5865 5793 dst : S4(write);
5866 5794 dst : S3(read);
5867 5795 DECODE : S0; // any decoder
5868 5796 ALU : S3; // any alu
5869 5797 %}
5870 5798
5871 5799 // Long ALU reg operation
5872 5800 pipe_class ialu_reg_long(eRegL dst) %{
5873 5801 instruction_count(2);
5874 5802 dst : S4(write);
5875 5803 dst : S3(read);
5876 5804 DECODE : S0(2); // any 2 decoders
5877 5805 ALU : S3(2); // both alus
5878 5806 %}
5879 5807
5880 5808 // Integer ALU reg operation using big decoder
5881 5809 pipe_class ialu_reg_fat(eRegI dst) %{
5882 5810 single_instruction;
5883 5811 dst : S4(write);
5884 5812 dst : S3(read);
5885 5813 D0 : S0; // big decoder only
5886 5814 ALU : S3; // any alu
5887 5815 %}
5888 5816
5889 5817 // Long ALU reg operation using big decoder
5890 5818 pipe_class ialu_reg_long_fat(eRegL dst) %{
5891 5819 instruction_count(2);
5892 5820 dst : S4(write);
5893 5821 dst : S3(read);
5894 5822 D0 : S0(2); // big decoder only; twice
5895 5823 ALU : S3(2); // any 2 alus
5896 5824 %}
5897 5825
5898 5826 // Integer ALU reg-reg operation
5899 5827 pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5900 5828 single_instruction;
5901 5829 dst : S4(write);
5902 5830 src : S3(read);
5903 5831 DECODE : S0; // any decoder
5904 5832 ALU : S3; // any alu
5905 5833 %}
5906 5834
5907 5835 // Long ALU reg-reg operation
5908 5836 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5909 5837 instruction_count(2);
5910 5838 dst : S4(write);
5911 5839 src : S3(read);
5912 5840 DECODE : S0(2); // any 2 decoders
5913 5841 ALU : S3(2); // both alus
5914 5842 %}
5915 5843
5916 5844 // Integer ALU reg-reg operation
5917 5845 pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5918 5846 single_instruction;
5919 5847 dst : S4(write);
5920 5848 src : S3(read);
5921 5849 D0 : S0; // big decoder only
5922 5850 ALU : S3; // any alu
5923 5851 %}
5924 5852
5925 5853 // Long ALU reg-reg operation
5926 5854 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5927 5855 instruction_count(2);
5928 5856 dst : S4(write);
5929 5857 src : S3(read);
5930 5858 D0 : S0(2); // big decoder only; twice
5931 5859 ALU : S3(2); // both alus
5932 5860 %}
5933 5861
5934 5862 // Integer ALU reg-mem operation
5935 5863 pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5936 5864 single_instruction;
5937 5865 dst : S5(write);
5938 5866 mem : S3(read);
5939 5867 D0 : S0; // big decoder only
5940 5868 ALU : S4; // any alu
5941 5869 MEM : S3; // any mem
5942 5870 %}
5943 5871
5944 5872 // Long ALU reg-mem operation
5945 5873 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5946 5874 instruction_count(2);
5947 5875 dst : S5(write);
5948 5876 mem : S3(read);
5949 5877 D0 : S0(2); // big decoder only; twice
5950 5878 ALU : S4(2); // any 2 alus
5951 5879 MEM : S3(2); // both mems
5952 5880 %}
5953 5881
5954 5882 // Integer mem operation (prefetch)
5955 5883 pipe_class ialu_mem(memory mem)
5956 5884 %{
5957 5885 single_instruction;
5958 5886 mem : S3(read);
5959 5887 D0 : S0; // big decoder only
5960 5888 MEM : S3; // any mem
5961 5889 %}
5962 5890
5963 5891 // Integer Store to Memory
5964 5892 pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5965 5893 single_instruction;
5966 5894 mem : S3(read);
5967 5895 src : S5(read);
5968 5896 D0 : S0; // big decoder only
5969 5897 ALU : S4; // any alu
5970 5898 MEM : S3;
5971 5899 %}
5972 5900
5973 5901 // Long Store to Memory
5974 5902 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5975 5903 instruction_count(2);
5976 5904 mem : S3(read);
5977 5905 src : S5(read);
5978 5906 D0 : S0(2); // big decoder only; twice
5979 5907 ALU : S4(2); // any 2 alus
5980 5908 MEM : S3(2); // Both mems
5981 5909 %}
5982 5910
5983 5911 // Integer Store to Memory
5984 5912 pipe_class ialu_mem_imm(memory mem) %{
5985 5913 single_instruction;
5986 5914 mem : S3(read);
5987 5915 D0 : S0; // big decoder only
5988 5916 ALU : S4; // any alu
5989 5917 MEM : S3;
5990 5918 %}
5991 5919
5992 5920 // Integer ALU0 reg-reg operation
5993 5921 pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5994 5922 single_instruction;
5995 5923 dst : S4(write);
5996 5924 src : S3(read);
5997 5925 D0 : S0; // Big decoder only
5998 5926 ALU0 : S3; // only alu0
5999 5927 %}
6000 5928
6001 5929 // Integer ALU0 reg-mem operation
6002 5930 pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
6003 5931 single_instruction;
6004 5932 dst : S5(write);
6005 5933 mem : S3(read);
6006 5934 D0 : S0; // big decoder only
6007 5935 ALU0 : S4; // ALU0 only
6008 5936 MEM : S3; // any mem
6009 5937 %}
6010 5938
6011 5939 // Integer ALU reg-reg operation
6012 5940 pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
6013 5941 single_instruction;
6014 5942 cr : S4(write);
6015 5943 src1 : S3(read);
6016 5944 src2 : S3(read);
6017 5945 DECODE : S0; // any decoder
6018 5946 ALU : S3; // any alu
6019 5947 %}
6020 5948
6021 5949 // Integer ALU reg-imm operation
6022 5950 pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
6023 5951 single_instruction;
6024 5952 cr : S4(write);
6025 5953 src1 : S3(read);
6026 5954 DECODE : S0; // any decoder
6027 5955 ALU : S3; // any alu
6028 5956 %}
6029 5957
6030 5958 // Integer ALU reg-mem operation
6031 5959 pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
6032 5960 single_instruction;
6033 5961 cr : S4(write);
6034 5962 src1 : S3(read);
6035 5963 src2 : S3(read);
6036 5964 D0 : S0; // big decoder only
6037 5965 ALU : S4; // any alu
6038 5966 MEM : S3;
6039 5967 %}
6040 5968
6041 5969 // Conditional move reg-reg
6042 5970 pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
6043 5971 instruction_count(4);
6044 5972 y : S4(read);
6045 5973 q : S3(read);
6046 5974 p : S3(read);
6047 5975 DECODE : S0(4); // any decoder
6048 5976 %}
6049 5977
6050 5978 // Conditional move reg-reg
6051 5979 pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
6052 5980 single_instruction;
6053 5981 dst : S4(write);
6054 5982 src : S3(read);
6055 5983 cr : S3(read);
6056 5984 DECODE : S0; // any decoder
6057 5985 %}
6058 5986
6059 5987 // Conditional move reg-mem
6060 5988 pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
6061 5989 single_instruction;
6062 5990 dst : S4(write);
6063 5991 src : S3(read);
6064 5992 cr : S3(read);
6065 5993 DECODE : S0; // any decoder
6066 5994 MEM : S3;
6067 5995 %}
6068 5996
6069 5997 // Conditional move reg-reg long
6070 5998 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
6071 5999 single_instruction;
6072 6000 dst : S4(write);
6073 6001 src : S3(read);
6074 6002 cr : S3(read);
6075 6003 DECODE : S0(2); // any 2 decoders
6076 6004 %}
6077 6005
6078 6006 // Conditional move double reg-reg
6079 6007 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
6080 6008 single_instruction;
6081 6009 dst : S4(write);
6082 6010 src : S3(read);
6083 6011 cr : S3(read);
6084 6012 DECODE : S0; // any decoder
6085 6013 %}
6086 6014
6087 6015 // Float reg-reg operation
6088 6016 pipe_class fpu_reg(regD dst) %{
6089 6017 instruction_count(2);
6090 6018 dst : S3(read);
6091 6019 DECODE : S0(2); // any 2 decoders
6092 6020 FPU : S3;
6093 6021 %}
6094 6022
6095 6023 // Float reg-reg operation
6096 6024 pipe_class fpu_reg_reg(regD dst, regD src) %{
6097 6025 instruction_count(2);
6098 6026 dst : S4(write);
6099 6027 src : S3(read);
6100 6028 DECODE : S0(2); // any 2 decoders
6101 6029 FPU : S3;
6102 6030 %}
6103 6031
6104 6032 // Float reg-reg operation
6105 6033 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
6106 6034 instruction_count(3);
6107 6035 dst : S4(write);
6108 6036 src1 : S3(read);
6109 6037 src2 : S3(read);
6110 6038 DECODE : S0(3); // any 3 decoders
6111 6039 FPU : S3(2);
6112 6040 %}
6113 6041
6114 6042 // Float reg-reg operation
6115 6043 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
6116 6044 instruction_count(4);
6117 6045 dst : S4(write);
6118 6046 src1 : S3(read);
6119 6047 src2 : S3(read);
6120 6048 src3 : S3(read);
6121 6049 DECODE : S0(4); // any 3 decoders
6122 6050 FPU : S3(2);
6123 6051 %}
6124 6052
6125 6053 // Float reg-reg operation
6126 6054 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
6127 6055 instruction_count(4);
6128 6056 dst : S4(write);
6129 6057 src1 : S3(read);
6130 6058 src2 : S3(read);
6131 6059 src3 : S3(read);
6132 6060 DECODE : S1(3); // any 3 decoders
6133 6061 D0 : S0; // Big decoder only
6134 6062 FPU : S3(2);
6135 6063 MEM : S3;
6136 6064 %}
6137 6065
6138 6066 // Float reg-mem operation
6139 6067 pipe_class fpu_reg_mem(regD dst, memory mem) %{
6140 6068 instruction_count(2);
6141 6069 dst : S5(write);
6142 6070 mem : S3(read);
6143 6071 D0 : S0; // big decoder only
6144 6072 DECODE : S1; // any decoder for FPU POP
6145 6073 FPU : S4;
6146 6074 MEM : S3; // any mem
6147 6075 %}
6148 6076
6149 6077 // Float reg-mem operation
6150 6078 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
6151 6079 instruction_count(3);
6152 6080 dst : S5(write);
6153 6081 src1 : S3(read);
6154 6082 mem : S3(read);
6155 6083 D0 : S0; // big decoder only
6156 6084 DECODE : S1(2); // any decoder for FPU POP
6157 6085 FPU : S4;
6158 6086 MEM : S3; // any mem
6159 6087 %}
6160 6088
6161 6089 // Float mem-reg operation
6162 6090 pipe_class fpu_mem_reg(memory mem, regD src) %{
6163 6091 instruction_count(2);
6164 6092 src : S5(read);
6165 6093 mem : S3(read);
6166 6094 DECODE : S0; // any decoder for FPU PUSH
6167 6095 D0 : S1; // big decoder only
6168 6096 FPU : S4;
6169 6097 MEM : S3; // any mem
6170 6098 %}
6171 6099
6172 6100 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
6173 6101 instruction_count(3);
6174 6102 src1 : S3(read);
6175 6103 src2 : S3(read);
6176 6104 mem : S3(read);
6177 6105 DECODE : S0(2); // any decoder for FPU PUSH
6178 6106 D0 : S1; // big decoder only
6179 6107 FPU : S4;
6180 6108 MEM : S3; // any mem
6181 6109 %}
6182 6110
6183 6111 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
6184 6112 instruction_count(3);
6185 6113 src1 : S3(read);
6186 6114 src2 : S3(read);
6187 6115 mem : S4(read);
6188 6116 DECODE : S0; // any decoder for FPU PUSH
6189 6117 D0 : S0(2); // big decoder only
6190 6118 FPU : S4;
6191 6119 MEM : S3(2); // any mem
6192 6120 %}
6193 6121
6194 6122 pipe_class fpu_mem_mem(memory dst, memory src1) %{
6195 6123 instruction_count(2);
6196 6124 src1 : S3(read);
6197 6125 dst : S4(read);
6198 6126 D0 : S0(2); // big decoder only
6199 6127 MEM : S3(2); // any mem
6200 6128 %}
6201 6129
6202 6130 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
6203 6131 instruction_count(3);
6204 6132 src1 : S3(read);
6205 6133 src2 : S3(read);
6206 6134 dst : S4(read);
6207 6135 D0 : S0(3); // big decoder only
6208 6136 FPU : S4;
6209 6137 MEM : S3(3); // any mem
6210 6138 %}
6211 6139
6212 6140 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
6213 6141 instruction_count(3);
6214 6142 src1 : S4(read);
6215 6143 mem : S4(read);
6216 6144 DECODE : S0; // any decoder for FPU PUSH
6217 6145 D0 : S0(2); // big decoder only
6218 6146 FPU : S4;
6219 6147 MEM : S3(2); // any mem
6220 6148 %}
6221 6149
6222 6150 // Float load constant
6223 6151 pipe_class fpu_reg_con(regD dst) %{
6224 6152 instruction_count(2);
6225 6153 dst : S5(write);
6226 6154 D0 : S0; // big decoder only for the load
6227 6155 DECODE : S1; // any decoder for FPU POP
6228 6156 FPU : S4;
6229 6157 MEM : S3; // any mem
6230 6158 %}
6231 6159
6232 6160 // Float load constant
6233 6161 pipe_class fpu_reg_reg_con(regD dst, regD src) %{
6234 6162 instruction_count(3);
6235 6163 dst : S5(write);
6236 6164 src : S3(read);
6237 6165 D0 : S0; // big decoder only for the load
6238 6166 DECODE : S1(2); // any decoder for FPU POP
6239 6167 FPU : S4;
6240 6168 MEM : S3; // any mem
6241 6169 %}
6242 6170
6243 6171 // UnConditional branch
6244 6172 pipe_class pipe_jmp( label labl ) %{
6245 6173 single_instruction;
6246 6174 BR : S3;
6247 6175 %}
6248 6176
6249 6177 // Conditional branch
6250 6178 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
6251 6179 single_instruction;
6252 6180 cr : S1(read);
6253 6181 BR : S3;
6254 6182 %}
6255 6183
6256 6184 // Allocation idiom
6257 6185 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
6258 6186 instruction_count(1); force_serialization;
6259 6187 fixed_latency(6);
6260 6188 heap_ptr : S3(read);
6261 6189 DECODE : S0(3);
6262 6190 D0 : S2;
6263 6191 MEM : S3;
6264 6192 ALU : S3(2);
6265 6193 dst : S5(write);
6266 6194 BR : S5;
6267 6195 %}
6268 6196
6269 6197 // Generic big/slow expanded idiom
6270 6198 pipe_class pipe_slow( ) %{
6271 6199 instruction_count(10); multiple_bundles; force_serialization;
6272 6200 fixed_latency(100);
6273 6201 D0 : S0(2);
6274 6202 MEM : S3(2);
6275 6203 %}
6276 6204
6277 6205 // The real do-nothing guy
6278 6206 pipe_class empty( ) %{
6279 6207 instruction_count(0);
6280 6208 %}
6281 6209
6282 6210 // Define the class for the Nop node
6283 6211 define %{
6284 6212 MachNop = empty;
6285 6213 %}
6286 6214
6287 6215 %}
6288 6216
6289 6217 //----------INSTRUCTIONS-------------------------------------------------------
6290 6218 //
6291 6219 // match -- States which machine-independent subtree may be replaced
6292 6220 // by this instruction.
6293 6221 // ins_cost -- The estimated cost of this instruction is used by instruction
6294 6222 // selection to identify a minimum cost tree of machine
6295 6223 // instructions that matches a tree of machine-independent
6296 6224 // instructions.
6297 6225 // format -- A string providing the disassembly for this instruction.
6298 6226 // The value of an instruction's operand may be inserted
6299 6227 // by referring to it with a '$' prefix.
6300 6228 // opcode -- Three instruction opcodes may be provided. These are referred
6301 6229 // to within an encode class as $primary, $secondary, and $tertiary
6302 6230 // respectively. The primary opcode is commonly used to
6303 6231 // indicate the type of machine instruction, while secondary
6304 6232 // and tertiary are often used for prefix options or addressing
6305 6233 // modes.
6306 6234 // ins_encode -- A list of encode classes with parameters. The encode class
6307 6235 // name must have been defined in an 'enc_class' specification
6308 6236 // in the encode section of the architecture description.
6309 6237
6310 6238 //----------BSWAP-Instruction--------------------------------------------------
6311 6239 instruct bytes_reverse_int(eRegI dst) %{
6312 6240 match(Set dst (ReverseBytesI dst));
6313 6241
6314 6242 format %{ "BSWAP $dst" %}
6315 6243 opcode(0x0F, 0xC8);
6316 6244 ins_encode( OpcP, OpcSReg(dst) );
6317 6245 ins_pipe( ialu_reg );
6318 6246 %}
6319 6247
6320 6248 instruct bytes_reverse_long(eRegL dst) %{
6321 6249 match(Set dst (ReverseBytesL dst));
6322 6250
6323 6251 format %{ "BSWAP $dst.lo\n\t"
6324 6252 "BSWAP $dst.hi\n\t"
6325 6253 "XCHG $dst.lo $dst.hi" %}
6326 6254
6327 6255 ins_cost(125);
6328 6256 ins_encode( bswap_long_bytes(dst) );
6329 6257 ins_pipe( ialu_reg_reg);
6330 6258 %}
6331 6259
6332 6260 instruct bytes_reverse_unsigned_short(eRegI dst) %{
6333 6261 match(Set dst (ReverseBytesUS dst));
6334 6262
6335 6263 format %{ "BSWAP $dst\n\t"
6336 6264 "SHR $dst,16\n\t" %}
6337 6265 ins_encode %{
6338 6266 __ bswapl($dst$$Register);
6339 6267 __ shrl($dst$$Register, 16);
6340 6268 %}
6341 6269 ins_pipe( ialu_reg );
6342 6270 %}
6343 6271
6344 6272 instruct bytes_reverse_short(eRegI dst) %{
6345 6273 match(Set dst (ReverseBytesS dst));
6346 6274
6347 6275 format %{ "BSWAP $dst\n\t"
6348 6276 "SAR $dst,16\n\t" %}
6349 6277 ins_encode %{
6350 6278 __ bswapl($dst$$Register);
6351 6279 __ sarl($dst$$Register, 16);
6352 6280 %}
6353 6281 ins_pipe( ialu_reg );
6354 6282 %}
6355 6283
6356 6284
6357 6285 //---------- Zeros Count Instructions ------------------------------------------
6358 6286
6359 6287 instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6360 6288 predicate(UseCountLeadingZerosInstruction);
6361 6289 match(Set dst (CountLeadingZerosI src));
6362 6290 effect(KILL cr);
6363 6291
6364 6292 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %}
6365 6293 ins_encode %{
6366 6294 __ lzcntl($dst$$Register, $src$$Register);
6367 6295 %}
6368 6296 ins_pipe(ialu_reg);
6369 6297 %}
6370 6298
6371 6299 instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
6372 6300 predicate(!UseCountLeadingZerosInstruction);
6373 6301 match(Set dst (CountLeadingZerosI src));
6374 6302 effect(KILL cr);
6375 6303
6376 6304 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t"
6377 6305 "JNZ skip\n\t"
6378 6306 "MOV $dst, -1\n"
6379 6307 "skip:\n\t"
6380 6308 "NEG $dst\n\t"
6381 6309 "ADD $dst, 31" %}
6382 6310 ins_encode %{
6383 6311 Register Rdst = $dst$$Register;
6384 6312 Register Rsrc = $src$$Register;
6385 6313 Label skip;
6386 6314 __ bsrl(Rdst, Rsrc);
6387 6315 __ jccb(Assembler::notZero, skip);
6388 6316 __ movl(Rdst, -1);
6389 6317 __ bind(skip);
6390 6318 __ negl(Rdst);
6391 6319 __ addl(Rdst, BitsPerInt - 1);
6392 6320 %}
6393 6321 ins_pipe(ialu_reg);
6394 6322 %}
6395 6323
6396 6324 instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6397 6325 predicate(UseCountLeadingZerosInstruction);
6398 6326 match(Set dst (CountLeadingZerosL src));
6399 6327 effect(TEMP dst, KILL cr);
6400 6328
6401 6329 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t"
6402 6330 "JNC done\n\t"
6403 6331 "LZCNT $dst, $src.lo\n\t"
6404 6332 "ADD $dst, 32\n"
6405 6333 "done:" %}
6406 6334 ins_encode %{
6407 6335 Register Rdst = $dst$$Register;
6408 6336 Register Rsrc = $src$$Register;
6409 6337 Label done;
6410 6338 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
6411 6339 __ jccb(Assembler::carryClear, done);
6412 6340 __ lzcntl(Rdst, Rsrc);
6413 6341 __ addl(Rdst, BitsPerInt);
6414 6342 __ bind(done);
6415 6343 %}
6416 6344 ins_pipe(ialu_reg);
6417 6345 %}
6418 6346
6419 6347 instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
6420 6348 predicate(!UseCountLeadingZerosInstruction);
6421 6349 match(Set dst (CountLeadingZerosL src));
6422 6350 effect(TEMP dst, KILL cr);
6423 6351
6424 6352 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t"
6425 6353 "JZ msw_is_zero\n\t"
6426 6354 "ADD $dst, 32\n\t"
6427 6355 "JMP not_zero\n"
6428 6356 "msw_is_zero:\n\t"
6429 6357 "BSR $dst, $src.lo\n\t"
6430 6358 "JNZ not_zero\n\t"
6431 6359 "MOV $dst, -1\n"
6432 6360 "not_zero:\n\t"
6433 6361 "NEG $dst\n\t"
6434 6362 "ADD $dst, 63\n" %}
6435 6363 ins_encode %{
6436 6364 Register Rdst = $dst$$Register;
6437 6365 Register Rsrc = $src$$Register;
6438 6366 Label msw_is_zero;
6439 6367 Label not_zero;
6440 6368 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
6441 6369 __ jccb(Assembler::zero, msw_is_zero);
6442 6370 __ addl(Rdst, BitsPerInt);
6443 6371 __ jmpb(not_zero);
6444 6372 __ bind(msw_is_zero);
6445 6373 __ bsrl(Rdst, Rsrc);
6446 6374 __ jccb(Assembler::notZero, not_zero);
6447 6375 __ movl(Rdst, -1);
6448 6376 __ bind(not_zero);
6449 6377 __ negl(Rdst);
6450 6378 __ addl(Rdst, BitsPerLong - 1);
6451 6379 %}
6452 6380 ins_pipe(ialu_reg);
6453 6381 %}
6454 6382
6455 6383 instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6456 6384 match(Set dst (CountTrailingZerosI src));
6457 6385 effect(KILL cr);
6458 6386
6459 6387 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
6460 6388 "JNZ done\n\t"
6461 6389 "MOV $dst, 32\n"
6462 6390 "done:" %}
6463 6391 ins_encode %{
6464 6392 Register Rdst = $dst$$Register;
6465 6393 Label done;
6466 6394 __ bsfl(Rdst, $src$$Register);
6467 6395 __ jccb(Assembler::notZero, done);
6468 6396 __ movl(Rdst, BitsPerInt);
6469 6397 __ bind(done);
6470 6398 %}
6471 6399 ins_pipe(ialu_reg);
6472 6400 %}
6473 6401
6474 6402 instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6475 6403 match(Set dst (CountTrailingZerosL src));
6476 6404 effect(TEMP dst, KILL cr);
6477 6405
6478 6406 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
6479 6407 "JNZ done\n\t"
6480 6408 "BSF $dst, $src.hi\n\t"
6481 6409 "JNZ msw_not_zero\n\t"
6482 6410 "MOV $dst, 32\n"
6483 6411 "msw_not_zero:\n\t"
6484 6412 "ADD $dst, 32\n"
6485 6413 "done:" %}
6486 6414 ins_encode %{
6487 6415 Register Rdst = $dst$$Register;
6488 6416 Register Rsrc = $src$$Register;
6489 6417 Label msw_not_zero;
6490 6418 Label done;
6491 6419 __ bsfl(Rdst, Rsrc);
6492 6420 __ jccb(Assembler::notZero, done);
6493 6421 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
6494 6422 __ jccb(Assembler::notZero, msw_not_zero);
6495 6423 __ movl(Rdst, BitsPerInt);
6496 6424 __ bind(msw_not_zero);
6497 6425 __ addl(Rdst, BitsPerInt);
6498 6426 __ bind(done);
6499 6427 %}
6500 6428 ins_pipe(ialu_reg);
6501 6429 %}
6502 6430
6503 6431
6504 6432 //---------- Population Count Instructions -------------------------------------
6505 6433
6506 6434 instruct popCountI(eRegI dst, eRegI src) %{
6507 6435 predicate(UsePopCountInstruction);
6508 6436 match(Set dst (PopCountI src));
6509 6437
6510 6438 format %{ "POPCNT $dst, $src" %}
6511 6439 ins_encode %{
6512 6440 __ popcntl($dst$$Register, $src$$Register);
6513 6441 %}
6514 6442 ins_pipe(ialu_reg);
6515 6443 %}
6516 6444
6517 6445 instruct popCountI_mem(eRegI dst, memory mem) %{
6518 6446 predicate(UsePopCountInstruction);
6519 6447 match(Set dst (PopCountI (LoadI mem)));
6520 6448
6521 6449 format %{ "POPCNT $dst, $mem" %}
6522 6450 ins_encode %{
6523 6451 __ popcntl($dst$$Register, $mem$$Address);
6524 6452 %}
6525 6453 ins_pipe(ialu_reg);
6526 6454 %}
6527 6455
6528 6456 // Note: Long.bitCount(long) returns an int.
6529 6457 instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
6530 6458 predicate(UsePopCountInstruction);
6531 6459 match(Set dst (PopCountL src));
6532 6460 effect(KILL cr, TEMP tmp, TEMP dst);
6533 6461
6534 6462 format %{ "POPCNT $dst, $src.lo\n\t"
6535 6463 "POPCNT $tmp, $src.hi\n\t"
6536 6464 "ADD $dst, $tmp" %}
6537 6465 ins_encode %{
6538 6466 __ popcntl($dst$$Register, $src$$Register);
6539 6467 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
6540 6468 __ addl($dst$$Register, $tmp$$Register);
6541 6469 %}
6542 6470 ins_pipe(ialu_reg);
6543 6471 %}
6544 6472
6545 6473 // Note: Long.bitCount(long) returns an int.
6546 6474 instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
6547 6475 predicate(UsePopCountInstruction);
6548 6476 match(Set dst (PopCountL (LoadL mem)));
6549 6477 effect(KILL cr, TEMP tmp, TEMP dst);
6550 6478
6551 6479 format %{ "POPCNT $dst, $mem\n\t"
6552 6480 "POPCNT $tmp, $mem+4\n\t"
6553 6481 "ADD $dst, $tmp" %}
6554 6482 ins_encode %{
6555 6483 //__ popcntl($dst$$Register, $mem$$Address$$first);
6556 6484 //__ popcntl($tmp$$Register, $mem$$Address$$second);
6557 6485 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
6558 6486 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
6559 6487 __ addl($dst$$Register, $tmp$$Register);
6560 6488 %}
6561 6489 ins_pipe(ialu_reg);
6562 6490 %}
6563 6491
6564 6492
6565 6493 //----------Load/Store/Move Instructions---------------------------------------
6566 6494 //----------Load Instructions--------------------------------------------------
6567 6495 // Load Byte (8bit signed)
6568 6496 instruct loadB(xRegI dst, memory mem) %{
6569 6497 match(Set dst (LoadB mem));
6570 6498
6571 6499 ins_cost(125);
6572 6500 format %{ "MOVSX8 $dst,$mem\t# byte" %}
6573 6501
6574 6502 ins_encode %{
6575 6503 __ movsbl($dst$$Register, $mem$$Address);
6576 6504 %}
6577 6505
6578 6506 ins_pipe(ialu_reg_mem);
6579 6507 %}
6580 6508
6581 6509 // Load Byte (8bit signed) into Long Register
6582 6510 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6583 6511 match(Set dst (ConvI2L (LoadB mem)));
6584 6512 effect(KILL cr);
6585 6513
6586 6514 ins_cost(375);
6587 6515 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
6588 6516 "MOV $dst.hi,$dst.lo\n\t"
6589 6517 "SAR $dst.hi,7" %}
6590 6518
6591 6519 ins_encode %{
6592 6520 __ movsbl($dst$$Register, $mem$$Address);
6593 6521 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6594 6522 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
6595 6523 %}
6596 6524
6597 6525 ins_pipe(ialu_reg_mem);
6598 6526 %}
6599 6527
6600 6528 // Load Unsigned Byte (8bit UNsigned)
6601 6529 instruct loadUB(xRegI dst, memory mem) %{
6602 6530 match(Set dst (LoadUB mem));
6603 6531
6604 6532 ins_cost(125);
6605 6533 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
6606 6534
6607 6535 ins_encode %{
6608 6536 __ movzbl($dst$$Register, $mem$$Address);
6609 6537 %}
6610 6538
6611 6539 ins_pipe(ialu_reg_mem);
6612 6540 %}
6613 6541
6614 6542 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6615 6543 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6616 6544 match(Set dst (ConvI2L (LoadUB mem)));
6617 6545 effect(KILL cr);
6618 6546
6619 6547 ins_cost(250);
6620 6548 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
6621 6549 "XOR $dst.hi,$dst.hi" %}
6622 6550
6623 6551 ins_encode %{
6624 6552 Register Rdst = $dst$$Register;
6625 6553 __ movzbl(Rdst, $mem$$Address);
6626 6554 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6627 6555 %}
6628 6556
6629 6557 ins_pipe(ialu_reg_mem);
6630 6558 %}
6631 6559
6632 6560 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
6633 6561 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
6634 6562 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6635 6563 effect(KILL cr);
6636 6564
6637 6565 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
6638 6566 "XOR $dst.hi,$dst.hi\n\t"
6639 6567 "AND $dst.lo,$mask" %}
6640 6568 ins_encode %{
6641 6569 Register Rdst = $dst$$Register;
6642 6570 __ movzbl(Rdst, $mem$$Address);
6643 6571 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6644 6572 __ andl(Rdst, $mask$$constant);
6645 6573 %}
6646 6574 ins_pipe(ialu_reg_mem);
6647 6575 %}
6648 6576
6649 6577 // Load Short (16bit signed)
6650 6578 instruct loadS(eRegI dst, memory mem) %{
6651 6579 match(Set dst (LoadS mem));
6652 6580
6653 6581 ins_cost(125);
6654 6582 format %{ "MOVSX $dst,$mem\t# short" %}
6655 6583
6656 6584 ins_encode %{
6657 6585 __ movswl($dst$$Register, $mem$$Address);
6658 6586 %}
6659 6587
6660 6588 ins_pipe(ialu_reg_mem);
6661 6589 %}
6662 6590
6663 6591 // Load Short (16 bit signed) to Byte (8 bit signed)
6664 6592 instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6665 6593 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6666 6594
6667 6595 ins_cost(125);
6668 6596 format %{ "MOVSX $dst, $mem\t# short -> byte" %}
6669 6597 ins_encode %{
6670 6598 __ movsbl($dst$$Register, $mem$$Address);
6671 6599 %}
6672 6600 ins_pipe(ialu_reg_mem);
6673 6601 %}
6674 6602
6675 6603 // Load Short (16bit signed) into Long Register
6676 6604 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6677 6605 match(Set dst (ConvI2L (LoadS mem)));
6678 6606 effect(KILL cr);
6679 6607
6680 6608 ins_cost(375);
6681 6609 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t"
6682 6610 "MOV $dst.hi,$dst.lo\n\t"
6683 6611 "SAR $dst.hi,15" %}
6684 6612
6685 6613 ins_encode %{
6686 6614 __ movswl($dst$$Register, $mem$$Address);
6687 6615 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6688 6616 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
6689 6617 %}
6690 6618
6691 6619 ins_pipe(ialu_reg_mem);
6692 6620 %}
6693 6621
6694 6622 // Load Unsigned Short/Char (16bit unsigned)
6695 6623 instruct loadUS(eRegI dst, memory mem) %{
6696 6624 match(Set dst (LoadUS mem));
6697 6625
6698 6626 ins_cost(125);
6699 6627 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %}
6700 6628
6701 6629 ins_encode %{
6702 6630 __ movzwl($dst$$Register, $mem$$Address);
6703 6631 %}
6704 6632
6705 6633 ins_pipe(ialu_reg_mem);
6706 6634 %}
6707 6635
6708 6636 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6709 6637 instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6710 6638 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6711 6639
6712 6640 ins_cost(125);
6713 6641 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %}
6714 6642 ins_encode %{
6715 6643 __ movsbl($dst$$Register, $mem$$Address);
6716 6644 %}
6717 6645 ins_pipe(ialu_reg_mem);
6718 6646 %}
6719 6647
6720 6648 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6721 6649 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6722 6650 match(Set dst (ConvI2L (LoadUS mem)));
6723 6651 effect(KILL cr);
6724 6652
6725 6653 ins_cost(250);
6726 6654 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t"
6727 6655 "XOR $dst.hi,$dst.hi" %}
6728 6656
6729 6657 ins_encode %{
6730 6658 __ movzwl($dst$$Register, $mem$$Address);
6731 6659 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6732 6660 %}
6733 6661
6734 6662 ins_pipe(ialu_reg_mem);
6735 6663 %}
6736 6664
6737 6665 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6738 6666 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6739 6667 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6740 6668 effect(KILL cr);
6741 6669
6742 6670 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
6743 6671 "XOR $dst.hi,$dst.hi" %}
6744 6672 ins_encode %{
6745 6673 Register Rdst = $dst$$Register;
6746 6674 __ movzbl(Rdst, $mem$$Address);
6747 6675 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6748 6676 %}
6749 6677 ins_pipe(ialu_reg_mem);
6750 6678 %}
6751 6679
6752 6680 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
6753 6681 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
6754 6682 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6755 6683 effect(KILL cr);
6756 6684
6757 6685 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6758 6686 "XOR $dst.hi,$dst.hi\n\t"
6759 6687 "AND $dst.lo,$mask" %}
6760 6688 ins_encode %{
6761 6689 Register Rdst = $dst$$Register;
6762 6690 __ movzwl(Rdst, $mem$$Address);
6763 6691 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6764 6692 __ andl(Rdst, $mask$$constant);
6765 6693 %}
6766 6694 ins_pipe(ialu_reg_mem);
6767 6695 %}
6768 6696
6769 6697 // Load Integer
6770 6698 instruct loadI(eRegI dst, memory mem) %{
6771 6699 match(Set dst (LoadI mem));
6772 6700
6773 6701 ins_cost(125);
6774 6702 format %{ "MOV $dst,$mem\t# int" %}
6775 6703
6776 6704 ins_encode %{
6777 6705 __ movl($dst$$Register, $mem$$Address);
6778 6706 %}
6779 6707
6780 6708 ins_pipe(ialu_reg_mem);
6781 6709 %}
6782 6710
6783 6711 // Load Integer (32 bit signed) to Byte (8 bit signed)
6784 6712 instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6785 6713 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6786 6714
6787 6715 ins_cost(125);
6788 6716 format %{ "MOVSX $dst, $mem\t# int -> byte" %}
6789 6717 ins_encode %{
6790 6718 __ movsbl($dst$$Register, $mem$$Address);
6791 6719 %}
6792 6720 ins_pipe(ialu_reg_mem);
6793 6721 %}
6794 6722
6795 6723 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6796 6724 instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6797 6725 match(Set dst (AndI (LoadI mem) mask));
6798 6726
6799 6727 ins_cost(125);
6800 6728 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %}
6801 6729 ins_encode %{
6802 6730 __ movzbl($dst$$Register, $mem$$Address);
6803 6731 %}
6804 6732 ins_pipe(ialu_reg_mem);
6805 6733 %}
6806 6734
6807 6735 // Load Integer (32 bit signed) to Short (16 bit signed)
6808 6736 instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6809 6737 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6810 6738
6811 6739 ins_cost(125);
6812 6740 format %{ "MOVSX $dst, $mem\t# int -> short" %}
6813 6741 ins_encode %{
6814 6742 __ movswl($dst$$Register, $mem$$Address);
6815 6743 %}
6816 6744 ins_pipe(ialu_reg_mem);
6817 6745 %}
6818 6746
6819 6747 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6820 6748 instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6821 6749 match(Set dst (AndI (LoadI mem) mask));
6822 6750
6823 6751 ins_cost(125);
6824 6752 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %}
6825 6753 ins_encode %{
6826 6754 __ movzwl($dst$$Register, $mem$$Address);
6827 6755 %}
6828 6756 ins_pipe(ialu_reg_mem);
6829 6757 %}
6830 6758
6831 6759 // Load Integer into Long Register
6832 6760 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6833 6761 match(Set dst (ConvI2L (LoadI mem)));
6834 6762 effect(KILL cr);
6835 6763
6836 6764 ins_cost(375);
6837 6765 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t"
6838 6766 "MOV $dst.hi,$dst.lo\n\t"
6839 6767 "SAR $dst.hi,31" %}
6840 6768
6841 6769 ins_encode %{
6842 6770 __ movl($dst$$Register, $mem$$Address);
6843 6771 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6844 6772 __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6845 6773 %}
6846 6774
6847 6775 ins_pipe(ialu_reg_mem);
6848 6776 %}
6849 6777
6850 6778 // Load Integer with mask 0xFF into Long Register
6851 6779 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6852 6780 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6853 6781 effect(KILL cr);
6854 6782
6855 6783 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6856 6784 "XOR $dst.hi,$dst.hi" %}
6857 6785 ins_encode %{
6858 6786 Register Rdst = $dst$$Register;
6859 6787 __ movzbl(Rdst, $mem$$Address);
6860 6788 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6861 6789 %}
6862 6790 ins_pipe(ialu_reg_mem);
6863 6791 %}
6864 6792
6865 6793 // Load Integer with mask 0xFFFF into Long Register
6866 6794 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6867 6795 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6868 6796 effect(KILL cr);
6869 6797
6870 6798 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6871 6799 "XOR $dst.hi,$dst.hi" %}
6872 6800 ins_encode %{
6873 6801 Register Rdst = $dst$$Register;
6874 6802 __ movzwl(Rdst, $mem$$Address);
6875 6803 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6876 6804 %}
6877 6805 ins_pipe(ialu_reg_mem);
6878 6806 %}
6879 6807
6880 6808 // Load Integer with 32-bit mask into Long Register
6881 6809 instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6882 6810 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6883 6811 effect(KILL cr);
6884 6812
6885 6813 format %{ "MOV $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6886 6814 "XOR $dst.hi,$dst.hi\n\t"
6887 6815 "AND $dst.lo,$mask" %}
6888 6816 ins_encode %{
6889 6817 Register Rdst = $dst$$Register;
6890 6818 __ movl(Rdst, $mem$$Address);
6891 6819 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6892 6820 __ andl(Rdst, $mask$$constant);
6893 6821 %}
6894 6822 ins_pipe(ialu_reg_mem);
6895 6823 %}
6896 6824
6897 6825 // Load Unsigned Integer into Long Register
6898 6826 instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6899 6827 match(Set dst (LoadUI2L mem));
6900 6828 effect(KILL cr);
6901 6829
6902 6830 ins_cost(250);
6903 6831 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t"
6904 6832 "XOR $dst.hi,$dst.hi" %}
6905 6833
6906 6834 ins_encode %{
6907 6835 __ movl($dst$$Register, $mem$$Address);
6908 6836 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6909 6837 %}
6910 6838
6911 6839 ins_pipe(ialu_reg_mem);
6912 6840 %}
6913 6841
6914 6842 // Load Long. Cannot clobber address while loading, so restrict address
6915 6843 // register to ESI
6916 6844 instruct loadL(eRegL dst, load_long_memory mem) %{
6917 6845 predicate(!((LoadLNode*)n)->require_atomic_access());
6918 6846 match(Set dst (LoadL mem));
6919 6847
6920 6848 ins_cost(250);
6921 6849 format %{ "MOV $dst.lo,$mem\t# long\n\t"
6922 6850 "MOV $dst.hi,$mem+4" %}
6923 6851
6924 6852 ins_encode %{
6925 6853 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6926 6854 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6927 6855 __ movl($dst$$Register, Amemlo);
6928 6856 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6929 6857 %}
6930 6858
6931 6859 ins_pipe(ialu_reg_long_mem);
6932 6860 %}
6933 6861
6934 6862 // Volatile Load Long. Must be atomic, so do 64-bit FILD
6935 6863 // then store it down to the stack and reload on the int
6936 6864 // side.
6937 6865 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6938 6866 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6939 6867 match(Set dst (LoadL mem));
6940 6868
6941 6869 ins_cost(200);
6942 6870 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
6943 6871 "FISTp $dst" %}
6944 6872 ins_encode(enc_loadL_volatile(mem,dst));
6945 6873 ins_pipe( fpu_reg_mem );
6946 6874 %}
6947 6875
6948 6876 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6949 6877 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6950 6878 match(Set dst (LoadL mem));
6951 6879 effect(TEMP tmp);
6952 6880 ins_cost(180);
6953 6881 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6954 6882 "MOVSD $dst,$tmp" %}
6955 6883 ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6956 6884 ins_pipe( pipe_slow );
6957 6885 %}
6958 6886
6959 6887 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6960 6888 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6961 6889 match(Set dst (LoadL mem));
6962 6890 effect(TEMP tmp);
6963 6891 ins_cost(160);
6964 6892 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6965 6893 "MOVD $dst.lo,$tmp\n\t"
6966 6894 "PSRLQ $tmp,32\n\t"
6967 6895 "MOVD $dst.hi,$tmp" %}
6968 6896 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6969 6897 ins_pipe( pipe_slow );
6970 6898 %}
6971 6899
6972 6900 // Load Range
6973 6901 instruct loadRange(eRegI dst, memory mem) %{
6974 6902 match(Set dst (LoadRange mem));
6975 6903
6976 6904 ins_cost(125);
6977 6905 format %{ "MOV $dst,$mem" %}
6978 6906 opcode(0x8B);
6979 6907 ins_encode( OpcP, RegMem(dst,mem));
6980 6908 ins_pipe( ialu_reg_mem );
6981 6909 %}
6982 6910
6983 6911
6984 6912 // Load Pointer
6985 6913 instruct loadP(eRegP dst, memory mem) %{
6986 6914 match(Set dst (LoadP mem));
6987 6915
6988 6916 ins_cost(125);
6989 6917 format %{ "MOV $dst,$mem" %}
6990 6918 opcode(0x8B);
6991 6919 ins_encode( OpcP, RegMem(dst,mem));
6992 6920 ins_pipe( ialu_reg_mem );
6993 6921 %}
6994 6922
6995 6923 // Load Klass Pointer
6996 6924 instruct loadKlass(eRegP dst, memory mem) %{
6997 6925 match(Set dst (LoadKlass mem));
6998 6926
6999 6927 ins_cost(125);
7000 6928 format %{ "MOV $dst,$mem" %}
7001 6929 opcode(0x8B);
7002 6930 ins_encode( OpcP, RegMem(dst,mem));
7003 6931 ins_pipe( ialu_reg_mem );
7004 6932 %}
7005 6933
7006 6934 // Load Double
7007 6935 instruct loadD(regD dst, memory mem) %{
7008 6936 predicate(UseSSE<=1);
7009 6937 match(Set dst (LoadD mem));
7010 6938
7011 6939 ins_cost(150);
7012 6940 format %{ "FLD_D ST,$mem\n\t"
7013 6941 "FSTP $dst" %}
7014 6942 opcode(0xDD); /* DD /0 */
7015 6943 ins_encode( OpcP, RMopc_Mem(0x00,mem),
7016 6944 Pop_Reg_D(dst) );
7017 6945 ins_pipe( fpu_reg_mem );
7018 6946 %}
7019 6947
7020 6948 // Load Double to XMM
7021 6949 instruct loadXD(regXD dst, memory mem) %{
7022 6950 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
7023 6951 match(Set dst (LoadD mem));
7024 6952 ins_cost(145);
7025 6953 format %{ "MOVSD $dst,$mem" %}
7026 6954 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
7027 6955 ins_pipe( pipe_slow );
7028 6956 %}
7029 6957
7030 6958 instruct loadXD_partial(regXD dst, memory mem) %{
7031 6959 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
7032 6960 match(Set dst (LoadD mem));
7033 6961 ins_cost(145);
7034 6962 format %{ "MOVLPD $dst,$mem" %}
7035 6963 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
7036 6964 ins_pipe( pipe_slow );
7037 6965 %}
7038 6966
7039 6967 // Load to XMM register (single-precision floating point)
7040 6968 // MOVSS instruction
7041 6969 instruct loadX(regX dst, memory mem) %{
7042 6970 predicate(UseSSE>=1);
7043 6971 match(Set dst (LoadF mem));
7044 6972 ins_cost(145);
7045 6973 format %{ "MOVSS $dst,$mem" %}
7046 6974 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
7047 6975 ins_pipe( pipe_slow );
7048 6976 %}
7049 6977
7050 6978 // Load Float
7051 6979 instruct loadF(regF dst, memory mem) %{
7052 6980 predicate(UseSSE==0);
7053 6981 match(Set dst (LoadF mem));
7054 6982
7055 6983 ins_cost(150);
7056 6984 format %{ "FLD_S ST,$mem\n\t"
7057 6985 "FSTP $dst" %}
7058 6986 opcode(0xD9); /* D9 /0 */
7059 6987 ins_encode( OpcP, RMopc_Mem(0x00,mem),
7060 6988 Pop_Reg_F(dst) );
7061 6989 ins_pipe( fpu_reg_mem );
7062 6990 %}
7063 6991
7064 6992 // Load Aligned Packed Byte to XMM register
7065 6993 instruct loadA8B(regXD dst, memory mem) %{
7066 6994 predicate(UseSSE>=1);
7067 6995 match(Set dst (Load8B mem));
7068 6996 ins_cost(125);
7069 6997 format %{ "MOVQ $dst,$mem\t! packed8B" %}
7070 6998 ins_encode( movq_ld(dst, mem));
7071 6999 ins_pipe( pipe_slow );
7072 7000 %}
7073 7001
7074 7002 // Load Aligned Packed Short to XMM register
7075 7003 instruct loadA4S(regXD dst, memory mem) %{
7076 7004 predicate(UseSSE>=1);
7077 7005 match(Set dst (Load4S mem));
7078 7006 ins_cost(125);
7079 7007 format %{ "MOVQ $dst,$mem\t! packed4S" %}
7080 7008 ins_encode( movq_ld(dst, mem));
7081 7009 ins_pipe( pipe_slow );
7082 7010 %}
7083 7011
7084 7012 // Load Aligned Packed Char to XMM register
7085 7013 instruct loadA4C(regXD dst, memory mem) %{
7086 7014 predicate(UseSSE>=1);
7087 7015 match(Set dst (Load4C mem));
7088 7016 ins_cost(125);
7089 7017 format %{ "MOVQ $dst,$mem\t! packed4C" %}
7090 7018 ins_encode( movq_ld(dst, mem));
7091 7019 ins_pipe( pipe_slow );
7092 7020 %}
7093 7021
7094 7022 // Load Aligned Packed Integer to XMM register
7095 7023 instruct load2IU(regXD dst, memory mem) %{
7096 7024 predicate(UseSSE>=1);
7097 7025 match(Set dst (Load2I mem));
7098 7026 ins_cost(125);
7099 7027 format %{ "MOVQ $dst,$mem\t! packed2I" %}
7100 7028 ins_encode( movq_ld(dst, mem));
7101 7029 ins_pipe( pipe_slow );
7102 7030 %}
7103 7031
7104 7032 // Load Aligned Packed Single to XMM
7105 7033 instruct loadA2F(regXD dst, memory mem) %{
7106 7034 predicate(UseSSE>=1);
7107 7035 match(Set dst (Load2F mem));
7108 7036 ins_cost(145);
7109 7037 format %{ "MOVQ $dst,$mem\t! packed2F" %}
7110 7038 ins_encode( movq_ld(dst, mem));
7111 7039 ins_pipe( pipe_slow );
7112 7040 %}
7113 7041
7114 7042 // Load Effective Address
7115 7043 instruct leaP8(eRegP dst, indOffset8 mem) %{
7116 7044 match(Set dst mem);
7117 7045
7118 7046 ins_cost(110);
7119 7047 format %{ "LEA $dst,$mem" %}
7120 7048 opcode(0x8D);
7121 7049 ins_encode( OpcP, RegMem(dst,mem));
7122 7050 ins_pipe( ialu_reg_reg_fat );
7123 7051 %}
7124 7052
7125 7053 instruct leaP32(eRegP dst, indOffset32 mem) %{
7126 7054 match(Set dst mem);
7127 7055
7128 7056 ins_cost(110);
7129 7057 format %{ "LEA $dst,$mem" %}
7130 7058 opcode(0x8D);
7131 7059 ins_encode( OpcP, RegMem(dst,mem));
7132 7060 ins_pipe( ialu_reg_reg_fat );
7133 7061 %}
7134 7062
7135 7063 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
7136 7064 match(Set dst mem);
7137 7065
7138 7066 ins_cost(110);
7139 7067 format %{ "LEA $dst,$mem" %}
7140 7068 opcode(0x8D);
7141 7069 ins_encode( OpcP, RegMem(dst,mem));
7142 7070 ins_pipe( ialu_reg_reg_fat );
7143 7071 %}
7144 7072
7145 7073 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
7146 7074 match(Set dst mem);
7147 7075
7148 7076 ins_cost(110);
7149 7077 format %{ "LEA $dst,$mem" %}
7150 7078 opcode(0x8D);
7151 7079 ins_encode( OpcP, RegMem(dst,mem));
7152 7080 ins_pipe( ialu_reg_reg_fat );
7153 7081 %}
7154 7082
7155 7083 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
7156 7084 match(Set dst mem);
7157 7085
7158 7086 ins_cost(110);
7159 7087 format %{ "LEA $dst,$mem" %}
7160 7088 opcode(0x8D);
7161 7089 ins_encode( OpcP, RegMem(dst,mem));
7162 7090 ins_pipe( ialu_reg_reg_fat );
7163 7091 %}
7164 7092
7165 7093 // Load Constant
7166 7094 instruct loadConI(eRegI dst, immI src) %{
7167 7095 match(Set dst src);
7168 7096
7169 7097 format %{ "MOV $dst,$src" %}
7170 7098 ins_encode( LdImmI(dst, src) );
7171 7099 ins_pipe( ialu_reg_fat );
7172 7100 %}
7173 7101
7174 7102 // Load Constant zero
7175 7103 instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
7176 7104 match(Set dst src);
7177 7105 effect(KILL cr);
7178 7106
7179 7107 ins_cost(50);
7180 7108 format %{ "XOR $dst,$dst" %}
7181 7109 opcode(0x33); /* + rd */
7182 7110 ins_encode( OpcP, RegReg( dst, dst ) );
7183 7111 ins_pipe( ialu_reg );
7184 7112 %}
7185 7113
7186 7114 instruct loadConP(eRegP dst, immP src) %{
7187 7115 match(Set dst src);
7188 7116
7189 7117 format %{ "MOV $dst,$src" %}
7190 7118 opcode(0xB8); /* + rd */
7191 7119 ins_encode( LdImmP(dst, src) );
7192 7120 ins_pipe( ialu_reg_fat );
7193 7121 %}
7194 7122
7195 7123 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
7196 7124 match(Set dst src);
7197 7125 effect(KILL cr);
7198 7126 ins_cost(200);
7199 7127 format %{ "MOV $dst.lo,$src.lo\n\t"
7200 7128 "MOV $dst.hi,$src.hi" %}
7201 7129 opcode(0xB8);
7202 7130 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
7203 7131 ins_pipe( ialu_reg_long_fat );
7204 7132 %}
7205 7133
7206 7134 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
7207 7135 match(Set dst src);
↓ open down ↓ |
2350 lines elided |
↑ open up ↑ |
7208 7136 effect(KILL cr);
7209 7137 ins_cost(150);
7210 7138 format %{ "XOR $dst.lo,$dst.lo\n\t"
7211 7139 "XOR $dst.hi,$dst.hi" %}
7212 7140 opcode(0x33,0x33);
7213 7141 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
7214 7142 ins_pipe( ialu_reg_long );
7215 7143 %}
7216 7144
7217 7145 // The instruction usage is guarded by predicate in operand immF().
7218 -instruct loadConF(regF dst, immF src) %{
7219 - match(Set dst src);
7146 +instruct loadConF(regF dst, immF con) %{
7147 + match(Set dst con);
7220 7148 ins_cost(125);
7149 + format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
7150 + "FSTP $dst" %}
7151 + ins_encode %{
7152 + __ fld_s($constantaddress($con));
7153 + __ fstp_d($dst$$reg);
7154 + %}
7155 + ins_pipe(fpu_reg_con);
7156 +%}
7221 7157
7222 - format %{ "FLD_S ST,$src\n\t"
7158 +// The instruction usage is guarded by predicate in operand immF0().
7159 +instruct loadConF0(regF dst, immF0 con) %{
7160 + match(Set dst con);
7161 + ins_cost(125);
7162 + format %{ "FLDZ ST\n\t"
7223 7163 "FSTP $dst" %}
7224 - opcode(0xD9, 0x00); /* D9 /0 */
7225 - ins_encode(LdImmF(src), Pop_Reg_F(dst) );
7226 - ins_pipe( fpu_reg_con );
7164 + ins_encode %{
7165 + __ fldz();
7166 + __ fstp_d($dst$$reg);
7167 + %}
7168 + ins_pipe(fpu_reg_con);
7169 +%}
7170 +
7171 +// The instruction usage is guarded by predicate in operand immF1().
7172 +instruct loadConF1(regF dst, immF1 con) %{
7173 + match(Set dst con);
7174 + ins_cost(125);
7175 + format %{ "FLD1 ST\n\t"
7176 + "FSTP $dst" %}
7177 + ins_encode %{
7178 + __ fld1();
7179 + __ fstp_d($dst$$reg);
7180 + %}
7181 + ins_pipe(fpu_reg_con);
7227 7182 %}
7228 7183
7229 7184 // The instruction usage is guarded by predicate in operand immXF().
7230 7185 instruct loadConX(regX dst, immXF con) %{
7231 7186 match(Set dst con);
7232 7187 ins_cost(125);
7233 - format %{ "MOVSS $dst,[$con]" %}
7234 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con));
7235 - ins_pipe( pipe_slow );
7188 + format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
7189 + ins_encode %{
7190 + __ movflt($dst$$XMMRegister, $constantaddress($con));
7191 + %}
7192 + ins_pipe(pipe_slow);
7236 7193 %}
7237 7194
7238 7195 // The instruction usage is guarded by predicate in operand immXF0().
7239 7196 instruct loadConX0(regX dst, immXF0 src) %{
7240 7197 match(Set dst src);
7241 7198 ins_cost(100);
7242 7199 format %{ "XORPS $dst,$dst\t# float 0.0" %}
7243 - ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7244 - ins_pipe( pipe_slow );
7200 + ins_encode %{
7201 + __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7202 + %}
7203 + ins_pipe(pipe_slow);
7245 7204 %}
7246 7205
7247 7206 // The instruction usage is guarded by predicate in operand immD().
7248 -instruct loadConD(regD dst, immD src) %{
7249 - match(Set dst src);
7207 +instruct loadConD(regD dst, immD con) %{
7208 + match(Set dst con);
7209 + ins_cost(125);
7210 +
7211 + format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
7212 + "FSTP $dst" %}
7213 + ins_encode %{
7214 + __ fld_d($constantaddress($con));
7215 + __ fstp_d($dst$$reg);
7216 + %}
7217 + ins_pipe(fpu_reg_con);
7218 +%}
7219 +
7220 +// The instruction usage is guarded by predicate in operand immD0().
7221 +instruct loadConD0(regD dst, immD0 con) %{
7222 + match(Set dst con);
7223 + ins_cost(125);
7224 +
7225 + format %{ "FLDZ ST\n\t"
7226 + "FSTP $dst" %}
7227 + ins_encode %{
7228 + __ fldz();
7229 + __ fstp_d($dst$$reg);
7230 + %}
7231 + ins_pipe(fpu_reg_con);
7232 +%}
7233 +
7234 +// The instruction usage is guarded by predicate in operand immD1().
7235 +instruct loadConD1(regD dst, immD1 con) %{
7236 + match(Set dst con);
7250 7237 ins_cost(125);
7251 7238
7252 - format %{ "FLD_D ST,$src\n\t"
7239 + format %{ "FLD1 ST\n\t"
7253 7240 "FSTP $dst" %}
7254 - ins_encode(LdImmD(src), Pop_Reg_D(dst) );
7255 - ins_pipe( fpu_reg_con );
7241 + ins_encode %{
7242 + __ fld1();
7243 + __ fstp_d($dst$$reg);
7244 + %}
7245 + ins_pipe(fpu_reg_con);
7256 7246 %}
7257 7247
7258 7248 // The instruction usage is guarded by predicate in operand immXD().
7259 7249 instruct loadConXD(regXD dst, immXD con) %{
7260 7250 match(Set dst con);
7261 7251 ins_cost(125);
7262 - format %{ "MOVSD $dst,[$con]" %}
7263 - ins_encode(load_conXD(dst, con));
7264 - ins_pipe( pipe_slow );
7252 + format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
7253 + ins_encode %{
7254 + __ movdbl($dst$$XMMRegister, $constantaddress($con));
7255 + %}
7256 + ins_pipe(pipe_slow);
7265 7257 %}
7266 7258
7267 7259 // The instruction usage is guarded by predicate in operand immXD0().
7268 7260 instruct loadConXD0(regXD dst, immXD0 src) %{
7269 7261 match(Set dst src);
7270 7262 ins_cost(100);
7271 7263 format %{ "XORPD $dst,$dst\t# double 0.0" %}
7272 7264 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7273 7265 ins_pipe( pipe_slow );
7274 7266 %}
7275 7267
7276 7268 // Load Stack Slot
7277 7269 instruct loadSSI(eRegI dst, stackSlotI src) %{
7278 7270 match(Set dst src);
7279 7271 ins_cost(125);
7280 7272
7281 7273 format %{ "MOV $dst,$src" %}
7282 7274 opcode(0x8B);
7283 7275 ins_encode( OpcP, RegMem(dst,src));
7284 7276 ins_pipe( ialu_reg_mem );
7285 7277 %}
7286 7278
7287 7279 instruct loadSSL(eRegL dst, stackSlotL src) %{
7288 7280 match(Set dst src);
7289 7281
7290 7282 ins_cost(200);
7291 7283 format %{ "MOV $dst,$src.lo\n\t"
7292 7284 "MOV $dst+4,$src.hi" %}
7293 7285 opcode(0x8B, 0x8B);
7294 7286 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
7295 7287 ins_pipe( ialu_mem_long_reg );
7296 7288 %}
7297 7289
7298 7290 // Load Stack Slot
7299 7291 instruct loadSSP(eRegP dst, stackSlotP src) %{
7300 7292 match(Set dst src);
7301 7293 ins_cost(125);
7302 7294
7303 7295 format %{ "MOV $dst,$src" %}
7304 7296 opcode(0x8B);
7305 7297 ins_encode( OpcP, RegMem(dst,src));
7306 7298 ins_pipe( ialu_reg_mem );
7307 7299 %}
7308 7300
7309 7301 // Load Stack Slot
7310 7302 instruct loadSSF(regF dst, stackSlotF src) %{
7311 7303 match(Set dst src);
7312 7304 ins_cost(125);
7313 7305
7314 7306 format %{ "FLD_S $src\n\t"
7315 7307 "FSTP $dst" %}
7316 7308 opcode(0xD9); /* D9 /0, FLD m32real */
7317 7309 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7318 7310 Pop_Reg_F(dst) );
7319 7311 ins_pipe( fpu_reg_mem );
7320 7312 %}
7321 7313
7322 7314 // Load Stack Slot
7323 7315 instruct loadSSD(regD dst, stackSlotD src) %{
7324 7316 match(Set dst src);
7325 7317 ins_cost(125);
7326 7318
7327 7319 format %{ "FLD_D $src\n\t"
7328 7320 "FSTP $dst" %}
7329 7321 opcode(0xDD); /* DD /0, FLD m64real */
7330 7322 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7331 7323 Pop_Reg_D(dst) );
7332 7324 ins_pipe( fpu_reg_mem );
7333 7325 %}
7334 7326
7335 7327 // Prefetch instructions.
7336 7328 // Must be safe to execute with invalid address (cannot fault).
7337 7329
7338 7330 instruct prefetchr0( memory mem ) %{
7339 7331 predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7340 7332 match(PrefetchRead mem);
7341 7333 ins_cost(0);
7342 7334 size(0);
7343 7335 format %{ "PREFETCHR (non-SSE is empty encoding)" %}
7344 7336 ins_encode();
7345 7337 ins_pipe(empty);
7346 7338 %}
7347 7339
7348 7340 instruct prefetchr( memory mem ) %{
7349 7341 predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3);
7350 7342 match(PrefetchRead mem);
7351 7343 ins_cost(100);
7352 7344
7353 7345 format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
7354 7346 opcode(0x0F, 0x0d); /* Opcode 0F 0d /0 */
7355 7347 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7356 7348 ins_pipe(ialu_mem);
7357 7349 %}
7358 7350
7359 7351 instruct prefetchrNTA( memory mem ) %{
7360 7352 predicate(UseSSE>=1 && ReadPrefetchInstr==0);
7361 7353 match(PrefetchRead mem);
7362 7354 ins_cost(100);
7363 7355
7364 7356 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
7365 7357 opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */
7366 7358 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7367 7359 ins_pipe(ialu_mem);
7368 7360 %}
7369 7361
7370 7362 instruct prefetchrT0( memory mem ) %{
7371 7363 predicate(UseSSE>=1 && ReadPrefetchInstr==1);
7372 7364 match(PrefetchRead mem);
7373 7365 ins_cost(100);
7374 7366
7375 7367 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
7376 7368 opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
7377 7369 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7378 7370 ins_pipe(ialu_mem);
7379 7371 %}
7380 7372
7381 7373 instruct prefetchrT2( memory mem ) %{
7382 7374 predicate(UseSSE>=1 && ReadPrefetchInstr==2);
7383 7375 match(PrefetchRead mem);
7384 7376 ins_cost(100);
7385 7377
7386 7378 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
7387 7379 opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
7388 7380 ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7389 7381 ins_pipe(ialu_mem);
7390 7382 %}
7391 7383
7392 7384 instruct prefetchw0( memory mem ) %{
7393 7385 predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7394 7386 match(PrefetchWrite mem);
7395 7387 ins_cost(0);
7396 7388 size(0);
7397 7389 format %{ "Prefetch (non-SSE is empty encoding)" %}
7398 7390 ins_encode();
7399 7391 ins_pipe(empty);
7400 7392 %}
7401 7393
7402 7394 instruct prefetchw( memory mem ) %{
7403 7395 predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3);
7404 7396 match( PrefetchWrite mem );
7405 7397 ins_cost(100);
7406 7398
7407 7399 format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
7408 7400 opcode(0x0F, 0x0D); /* Opcode 0F 0D /1 */
7409 7401 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7410 7402 ins_pipe(ialu_mem);
7411 7403 %}
7412 7404
7413 7405 instruct prefetchwNTA( memory mem ) %{
7414 7406 predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
7415 7407 match(PrefetchWrite mem);
7416 7408 ins_cost(100);
7417 7409
7418 7410 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
7419 7411 opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */
7420 7412 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7421 7413 ins_pipe(ialu_mem);
7422 7414 %}
7423 7415
7424 7416 instruct prefetchwT0( memory mem ) %{
7425 7417 predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
7426 7418 match(PrefetchWrite mem);
7427 7419 ins_cost(100);
7428 7420
7429 7421 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
7430 7422 opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
7431 7423 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7432 7424 ins_pipe(ialu_mem);
7433 7425 %}
7434 7426
7435 7427 instruct prefetchwT2( memory mem ) %{
7436 7428 predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
7437 7429 match(PrefetchWrite mem);
7438 7430 ins_cost(100);
7439 7431
7440 7432 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
7441 7433 opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
7442 7434 ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7443 7435 ins_pipe(ialu_mem);
7444 7436 %}
7445 7437
7446 7438 //----------Store Instructions-------------------------------------------------
7447 7439
7448 7440 // Store Byte
7449 7441 instruct storeB(memory mem, xRegI src) %{
7450 7442 match(Set mem (StoreB mem src));
7451 7443
7452 7444 ins_cost(125);
7453 7445 format %{ "MOV8 $mem,$src" %}
7454 7446 opcode(0x88);
7455 7447 ins_encode( OpcP, RegMem( src, mem ) );
7456 7448 ins_pipe( ialu_mem_reg );
7457 7449 %}
7458 7450
7459 7451 // Store Char/Short
7460 7452 instruct storeC(memory mem, eRegI src) %{
7461 7453 match(Set mem (StoreC mem src));
7462 7454
7463 7455 ins_cost(125);
7464 7456 format %{ "MOV16 $mem,$src" %}
7465 7457 opcode(0x89, 0x66);
7466 7458 ins_encode( OpcS, OpcP, RegMem( src, mem ) );
7467 7459 ins_pipe( ialu_mem_reg );
7468 7460 %}
7469 7461
7470 7462 // Store Integer
7471 7463 instruct storeI(memory mem, eRegI src) %{
7472 7464 match(Set mem (StoreI mem src));
7473 7465
7474 7466 ins_cost(125);
7475 7467 format %{ "MOV $mem,$src" %}
7476 7468 opcode(0x89);
7477 7469 ins_encode( OpcP, RegMem( src, mem ) );
7478 7470 ins_pipe( ialu_mem_reg );
7479 7471 %}
7480 7472
7481 7473 // Store Long
7482 7474 instruct storeL(long_memory mem, eRegL src) %{
7483 7475 predicate(!((StoreLNode*)n)->require_atomic_access());
7484 7476 match(Set mem (StoreL mem src));
7485 7477
7486 7478 ins_cost(200);
7487 7479 format %{ "MOV $mem,$src.lo\n\t"
7488 7480 "MOV $mem+4,$src.hi" %}
7489 7481 opcode(0x89, 0x89);
7490 7482 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
7491 7483 ins_pipe( ialu_mem_long_reg );
7492 7484 %}
7493 7485
7494 7486 // Store Long to Integer
7495 7487 instruct storeL2I(memory mem, eRegL src) %{
7496 7488 match(Set mem (StoreI mem (ConvL2I src)));
7497 7489
7498 7490 format %{ "MOV $mem,$src.lo\t# long -> int" %}
7499 7491 ins_encode %{
7500 7492 __ movl($mem$$Address, $src$$Register);
7501 7493 %}
7502 7494 ins_pipe(ialu_mem_reg);
7503 7495 %}
7504 7496
7505 7497 // Volatile Store Long. Must be atomic, so move it into
7506 7498 // the FP TOS and then do a 64-bit FIST. Has to probe the
7507 7499 // target address before the store (for null-ptr checks)
7508 7500 // so the memory operand is used twice in the encoding.
7509 7501 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7510 7502 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7511 7503 match(Set mem (StoreL mem src));
7512 7504 effect( KILL cr );
7513 7505 ins_cost(400);
7514 7506 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7515 7507 "FILD $src\n\t"
7516 7508 "FISTp $mem\t # 64-bit atomic volatile long store" %}
7517 7509 opcode(0x3B);
7518 7510 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7519 7511 ins_pipe( fpu_reg_mem );
7520 7512 %}
7521 7513
7522 7514 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7523 7515 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7524 7516 match(Set mem (StoreL mem src));
7525 7517 effect( TEMP tmp, KILL cr );
7526 7518 ins_cost(380);
7527 7519 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7528 7520 "MOVSD $tmp,$src\n\t"
7529 7521 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7530 7522 opcode(0x3B);
7531 7523 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7532 7524 ins_pipe( pipe_slow );
7533 7525 %}
7534 7526
7535 7527 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7536 7528 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7537 7529 match(Set mem (StoreL mem src));
7538 7530 effect( TEMP tmp2 , TEMP tmp, KILL cr );
7539 7531 ins_cost(360);
7540 7532 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7541 7533 "MOVD $tmp,$src.lo\n\t"
7542 7534 "MOVD $tmp2,$src.hi\n\t"
7543 7535 "PUNPCKLDQ $tmp,$tmp2\n\t"
7544 7536 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7545 7537 opcode(0x3B);
7546 7538 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7547 7539 ins_pipe( pipe_slow );
7548 7540 %}
7549 7541
7550 7542 // Store Pointer; for storing unknown oops and raw pointers
7551 7543 instruct storeP(memory mem, anyRegP src) %{
7552 7544 match(Set mem (StoreP mem src));
7553 7545
7554 7546 ins_cost(125);
7555 7547 format %{ "MOV $mem,$src" %}
7556 7548 opcode(0x89);
7557 7549 ins_encode( OpcP, RegMem( src, mem ) );
7558 7550 ins_pipe( ialu_mem_reg );
7559 7551 %}
7560 7552
7561 7553 // Store Integer Immediate
7562 7554 instruct storeImmI(memory mem, immI src) %{
7563 7555 match(Set mem (StoreI mem src));
7564 7556
7565 7557 ins_cost(150);
7566 7558 format %{ "MOV $mem,$src" %}
7567 7559 opcode(0xC7); /* C7 /0 */
7568 7560 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
7569 7561 ins_pipe( ialu_mem_imm );
7570 7562 %}
7571 7563
7572 7564 // Store Short/Char Immediate
7573 7565 instruct storeImmI16(memory mem, immI16 src) %{
7574 7566 predicate(UseStoreImmI16);
7575 7567 match(Set mem (StoreC mem src));
7576 7568
7577 7569 ins_cost(150);
7578 7570 format %{ "MOV16 $mem,$src" %}
7579 7571 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7580 7572 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src ));
7581 7573 ins_pipe( ialu_mem_imm );
7582 7574 %}
7583 7575
7584 7576 // Store Pointer Immediate; null pointers or constant oops that do not
7585 7577 // need card-mark barriers.
7586 7578 instruct storeImmP(memory mem, immP src) %{
7587 7579 match(Set mem (StoreP mem src));
7588 7580
7589 7581 ins_cost(150);
7590 7582 format %{ "MOV $mem,$src" %}
7591 7583 opcode(0xC7); /* C7 /0 */
7592 7584 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
7593 7585 ins_pipe( ialu_mem_imm );
7594 7586 %}
7595 7587
7596 7588 // Store Byte Immediate
7597 7589 instruct storeImmB(memory mem, immI8 src) %{
7598 7590 match(Set mem (StoreB mem src));
7599 7591
7600 7592 ins_cost(150);
7601 7593 format %{ "MOV8 $mem,$src" %}
7602 7594 opcode(0xC6); /* C6 /0 */
7603 7595 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7604 7596 ins_pipe( ialu_mem_imm );
7605 7597 %}
7606 7598
7607 7599 // Store Aligned Packed Byte XMM register to memory
7608 7600 instruct storeA8B(memory mem, regXD src) %{
7609 7601 predicate(UseSSE>=1);
7610 7602 match(Set mem (Store8B mem src));
7611 7603 ins_cost(145);
7612 7604 format %{ "MOVQ $mem,$src\t! packed8B" %}
7613 7605 ins_encode( movq_st(mem, src));
7614 7606 ins_pipe( pipe_slow );
7615 7607 %}
7616 7608
7617 7609 // Store Aligned Packed Char/Short XMM register to memory
7618 7610 instruct storeA4C(memory mem, regXD src) %{
7619 7611 predicate(UseSSE>=1);
7620 7612 match(Set mem (Store4C mem src));
7621 7613 ins_cost(145);
7622 7614 format %{ "MOVQ $mem,$src\t! packed4C" %}
7623 7615 ins_encode( movq_st(mem, src));
7624 7616 ins_pipe( pipe_slow );
7625 7617 %}
7626 7618
7627 7619 // Store Aligned Packed Integer XMM register to memory
7628 7620 instruct storeA2I(memory mem, regXD src) %{
7629 7621 predicate(UseSSE>=1);
7630 7622 match(Set mem (Store2I mem src));
7631 7623 ins_cost(145);
7632 7624 format %{ "MOVQ $mem,$src\t! packed2I" %}
7633 7625 ins_encode( movq_st(mem, src));
7634 7626 ins_pipe( pipe_slow );
7635 7627 %}
7636 7628
7637 7629 // Store CMS card-mark Immediate
7638 7630 instruct storeImmCM(memory mem, immI8 src) %{
7639 7631 match(Set mem (StoreCM mem src));
7640 7632
7641 7633 ins_cost(150);
7642 7634 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
7643 7635 opcode(0xC6); /* C6 /0 */
7644 7636 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7645 7637 ins_pipe( ialu_mem_imm );
7646 7638 %}
7647 7639
7648 7640 // Store Double
7649 7641 instruct storeD( memory mem, regDPR1 src) %{
7650 7642 predicate(UseSSE<=1);
7651 7643 match(Set mem (StoreD mem src));
7652 7644
7653 7645 ins_cost(100);
7654 7646 format %{ "FST_D $mem,$src" %}
7655 7647 opcode(0xDD); /* DD /2 */
7656 7648 ins_encode( enc_FP_store(mem,src) );
7657 7649 ins_pipe( fpu_mem_reg );
7658 7650 %}
7659 7651
7660 7652 // Store double does rounding on x86
7661 7653 instruct storeD_rounded( memory mem, regDPR1 src) %{
7662 7654 predicate(UseSSE<=1);
7663 7655 match(Set mem (StoreD mem (RoundDouble src)));
7664 7656
7665 7657 ins_cost(100);
7666 7658 format %{ "FST_D $mem,$src\t# round" %}
7667 7659 opcode(0xDD); /* DD /2 */
7668 7660 ins_encode( enc_FP_store(mem,src) );
7669 7661 ins_pipe( fpu_mem_reg );
7670 7662 %}
7671 7663
7672 7664 // Store XMM register to memory (double-precision floating points)
7673 7665 // MOVSD instruction
7674 7666 instruct storeXD(memory mem, regXD src) %{
7675 7667 predicate(UseSSE>=2);
7676 7668 match(Set mem (StoreD mem src));
7677 7669 ins_cost(95);
7678 7670 format %{ "MOVSD $mem,$src" %}
7679 7671 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7680 7672 ins_pipe( pipe_slow );
7681 7673 %}
7682 7674
7683 7675 // Store XMM register to memory (single-precision floating point)
7684 7676 // MOVSS instruction
7685 7677 instruct storeX(memory mem, regX src) %{
7686 7678 predicate(UseSSE>=1);
7687 7679 match(Set mem (StoreF mem src));
7688 7680 ins_cost(95);
7689 7681 format %{ "MOVSS $mem,$src" %}
7690 7682 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7691 7683 ins_pipe( pipe_slow );
7692 7684 %}
7693 7685
7694 7686 // Store Aligned Packed Single Float XMM register to memory
7695 7687 instruct storeA2F(memory mem, regXD src) %{
7696 7688 predicate(UseSSE>=1);
7697 7689 match(Set mem (Store2F mem src));
7698 7690 ins_cost(145);
7699 7691 format %{ "MOVQ $mem,$src\t! packed2F" %}
7700 7692 ins_encode( movq_st(mem, src));
7701 7693 ins_pipe( pipe_slow );
7702 7694 %}
7703 7695
7704 7696 // Store Float
7705 7697 instruct storeF( memory mem, regFPR1 src) %{
7706 7698 predicate(UseSSE==0);
7707 7699 match(Set mem (StoreF mem src));
7708 7700
7709 7701 ins_cost(100);
7710 7702 format %{ "FST_S $mem,$src" %}
7711 7703 opcode(0xD9); /* D9 /2 */
7712 7704 ins_encode( enc_FP_store(mem,src) );
7713 7705 ins_pipe( fpu_mem_reg );
7714 7706 %}
7715 7707
7716 7708 // Store Float does rounding on x86
7717 7709 instruct storeF_rounded( memory mem, regFPR1 src) %{
7718 7710 predicate(UseSSE==0);
7719 7711 match(Set mem (StoreF mem (RoundFloat src)));
7720 7712
7721 7713 ins_cost(100);
7722 7714 format %{ "FST_S $mem,$src\t# round" %}
7723 7715 opcode(0xD9); /* D9 /2 */
7724 7716 ins_encode( enc_FP_store(mem,src) );
7725 7717 ins_pipe( fpu_mem_reg );
7726 7718 %}
7727 7719
7728 7720 // Store Float does rounding on x86
7729 7721 instruct storeF_Drounded( memory mem, regDPR1 src) %{
7730 7722 predicate(UseSSE<=1);
7731 7723 match(Set mem (StoreF mem (ConvD2F src)));
7732 7724
7733 7725 ins_cost(100);
7734 7726 format %{ "FST_S $mem,$src\t# D-round" %}
7735 7727 opcode(0xD9); /* D9 /2 */
7736 7728 ins_encode( enc_FP_store(mem,src) );
7737 7729 ins_pipe( fpu_mem_reg );
7738 7730 %}
7739 7731
7740 7732 // Store immediate Float value (it is faster than store from FPU register)
7741 7733 // The instruction usage is guarded by predicate in operand immF().
7742 7734 instruct storeF_imm( memory mem, immF src) %{
7743 7735 match(Set mem (StoreF mem src));
7744 7736
7745 7737 ins_cost(50);
7746 7738 format %{ "MOV $mem,$src\t# store float" %}
7747 7739 opcode(0xC7); /* C7 /0 */
7748 7740 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
7749 7741 ins_pipe( ialu_mem_imm );
7750 7742 %}
7751 7743
7752 7744 // Store immediate Float value (it is faster than store from XMM register)
7753 7745 // The instruction usage is guarded by predicate in operand immXF().
7754 7746 instruct storeX_imm( memory mem, immXF src) %{
7755 7747 match(Set mem (StoreF mem src));
7756 7748
7757 7749 ins_cost(50);
7758 7750 format %{ "MOV $mem,$src\t# store float" %}
7759 7751 opcode(0xC7); /* C7 /0 */
7760 7752 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src ));
7761 7753 ins_pipe( ialu_mem_imm );
7762 7754 %}
7763 7755
7764 7756 // Store Integer to stack slot
7765 7757 instruct storeSSI(stackSlotI dst, eRegI src) %{
7766 7758 match(Set dst src);
7767 7759
7768 7760 ins_cost(100);
7769 7761 format %{ "MOV $dst,$src" %}
7770 7762 opcode(0x89);
7771 7763 ins_encode( OpcPRegSS( dst, src ) );
7772 7764 ins_pipe( ialu_mem_reg );
7773 7765 %}
7774 7766
7775 7767 // Store Integer to stack slot
7776 7768 instruct storeSSP(stackSlotP dst, eRegP src) %{
7777 7769 match(Set dst src);
7778 7770
7779 7771 ins_cost(100);
7780 7772 format %{ "MOV $dst,$src" %}
7781 7773 opcode(0x89);
7782 7774 ins_encode( OpcPRegSS( dst, src ) );
7783 7775 ins_pipe( ialu_mem_reg );
7784 7776 %}
7785 7777
7786 7778 // Store Long to stack slot
7787 7779 instruct storeSSL(stackSlotL dst, eRegL src) %{
7788 7780 match(Set dst src);
7789 7781
7790 7782 ins_cost(200);
7791 7783 format %{ "MOV $dst,$src.lo\n\t"
7792 7784 "MOV $dst+4,$src.hi" %}
7793 7785 opcode(0x89, 0x89);
7794 7786 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7795 7787 ins_pipe( ialu_mem_long_reg );
7796 7788 %}
7797 7789
7798 7790 //----------MemBar Instructions-----------------------------------------------
7799 7791 // Memory barrier flavors
7800 7792
7801 7793 instruct membar_acquire() %{
7802 7794 match(MemBarAcquire);
7803 7795 ins_cost(400);
7804 7796
7805 7797 size(0);
7806 7798 format %{ "MEMBAR-acquire ! (empty encoding)" %}
7807 7799 ins_encode();
7808 7800 ins_pipe(empty);
7809 7801 %}
7810 7802
7811 7803 instruct membar_acquire_lock() %{
7812 7804 match(MemBarAcquire);
7813 7805 predicate(Matcher::prior_fast_lock(n));
7814 7806 ins_cost(0);
7815 7807
7816 7808 size(0);
7817 7809 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7818 7810 ins_encode( );
7819 7811 ins_pipe(empty);
7820 7812 %}
7821 7813
7822 7814 instruct membar_release() %{
7823 7815 match(MemBarRelease);
7824 7816 ins_cost(400);
7825 7817
7826 7818 size(0);
7827 7819 format %{ "MEMBAR-release ! (empty encoding)" %}
7828 7820 ins_encode( );
7829 7821 ins_pipe(empty);
7830 7822 %}
7831 7823
7832 7824 instruct membar_release_lock() %{
7833 7825 match(MemBarRelease);
7834 7826 predicate(Matcher::post_fast_unlock(n));
7835 7827 ins_cost(0);
7836 7828
7837 7829 size(0);
7838 7830 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7839 7831 ins_encode( );
7840 7832 ins_pipe(empty);
7841 7833 %}
7842 7834
7843 7835 instruct membar_volatile(eFlagsReg cr) %{
7844 7836 match(MemBarVolatile);
7845 7837 effect(KILL cr);
7846 7838 ins_cost(400);
7847 7839
7848 7840 format %{
7849 7841 $$template
7850 7842 if (os::is_MP()) {
7851 7843 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7852 7844 } else {
7853 7845 $$emit$$"MEMBAR-volatile ! (empty encoding)"
7854 7846 }
7855 7847 %}
7856 7848 ins_encode %{
7857 7849 __ membar(Assembler::StoreLoad);
7858 7850 %}
7859 7851 ins_pipe(pipe_slow);
7860 7852 %}
7861 7853
7862 7854 instruct unnecessary_membar_volatile() %{
7863 7855 match(MemBarVolatile);
7864 7856 predicate(Matcher::post_store_load_barrier(n));
7865 7857 ins_cost(0);
7866 7858
7867 7859 size(0);
7868 7860 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7869 7861 ins_encode( );
7870 7862 ins_pipe(empty);
7871 7863 %}
7872 7864
7873 7865 //----------Move Instructions--------------------------------------------------
7874 7866 instruct castX2P(eAXRegP dst, eAXRegI src) %{
7875 7867 match(Set dst (CastX2P src));
7876 7868 format %{ "# X2P $dst, $src" %}
7877 7869 ins_encode( /*empty encoding*/ );
7878 7870 ins_cost(0);
7879 7871 ins_pipe(empty);
7880 7872 %}
7881 7873
7882 7874 instruct castP2X(eRegI dst, eRegP src ) %{
7883 7875 match(Set dst (CastP2X src));
7884 7876 ins_cost(50);
7885 7877 format %{ "MOV $dst, $src\t# CastP2X" %}
7886 7878 ins_encode( enc_Copy( dst, src) );
7887 7879 ins_pipe( ialu_reg_reg );
7888 7880 %}
7889 7881
7890 7882 //----------Conditional Move---------------------------------------------------
7891 7883 // Conditional move
7892 7884 instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7893 7885 predicate(VM_Version::supports_cmov() );
7894 7886 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7895 7887 ins_cost(200);
7896 7888 format %{ "CMOV$cop $dst,$src" %}
7897 7889 opcode(0x0F,0x40);
7898 7890 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7899 7891 ins_pipe( pipe_cmov_reg );
7900 7892 %}
7901 7893
7902 7894 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7903 7895 predicate(VM_Version::supports_cmov() );
7904 7896 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7905 7897 ins_cost(200);
7906 7898 format %{ "CMOV$cop $dst,$src" %}
7907 7899 opcode(0x0F,0x40);
7908 7900 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7909 7901 ins_pipe( pipe_cmov_reg );
7910 7902 %}
7911 7903
7912 7904 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7913 7905 predicate(VM_Version::supports_cmov() );
7914 7906 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7915 7907 ins_cost(200);
7916 7908 expand %{
7917 7909 cmovI_regU(cop, cr, dst, src);
7918 7910 %}
7919 7911 %}
7920 7912
7921 7913 // Conditional move
7922 7914 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7923 7915 predicate(VM_Version::supports_cmov() );
7924 7916 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7925 7917 ins_cost(250);
7926 7918 format %{ "CMOV$cop $dst,$src" %}
7927 7919 opcode(0x0F,0x40);
7928 7920 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7929 7921 ins_pipe( pipe_cmov_mem );
7930 7922 %}
7931 7923
7932 7924 // Conditional move
7933 7925 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7934 7926 predicate(VM_Version::supports_cmov() );
7935 7927 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7936 7928 ins_cost(250);
7937 7929 format %{ "CMOV$cop $dst,$src" %}
7938 7930 opcode(0x0F,0x40);
7939 7931 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7940 7932 ins_pipe( pipe_cmov_mem );
7941 7933 %}
7942 7934
7943 7935 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7944 7936 predicate(VM_Version::supports_cmov() );
7945 7937 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7946 7938 ins_cost(250);
7947 7939 expand %{
7948 7940 cmovI_memU(cop, cr, dst, src);
7949 7941 %}
7950 7942 %}
7951 7943
7952 7944 // Conditional move
7953 7945 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7954 7946 predicate(VM_Version::supports_cmov() );
7955 7947 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7956 7948 ins_cost(200);
7957 7949 format %{ "CMOV$cop $dst,$src\t# ptr" %}
7958 7950 opcode(0x0F,0x40);
7959 7951 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7960 7952 ins_pipe( pipe_cmov_reg );
7961 7953 %}
7962 7954
7963 7955 // Conditional move (non-P6 version)
7964 7956 // Note: a CMoveP is generated for stubs and native wrappers
7965 7957 // regardless of whether we are on a P6, so we
7966 7958 // emulate a cmov here
7967 7959 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7968 7960 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7969 7961 ins_cost(300);
7970 7962 format %{ "Jn$cop skip\n\t"
7971 7963 "MOV $dst,$src\t# pointer\n"
7972 7964 "skip:" %}
7973 7965 opcode(0x8b);
7974 7966 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7975 7967 ins_pipe( pipe_cmov_reg );
7976 7968 %}
7977 7969
7978 7970 // Conditional move
7979 7971 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7980 7972 predicate(VM_Version::supports_cmov() );
7981 7973 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7982 7974 ins_cost(200);
7983 7975 format %{ "CMOV$cop $dst,$src\t# ptr" %}
7984 7976 opcode(0x0F,0x40);
7985 7977 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7986 7978 ins_pipe( pipe_cmov_reg );
7987 7979 %}
7988 7980
7989 7981 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7990 7982 predicate(VM_Version::supports_cmov() );
7991 7983 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7992 7984 ins_cost(200);
7993 7985 expand %{
7994 7986 cmovP_regU(cop, cr, dst, src);
7995 7987 %}
7996 7988 %}
7997 7989
7998 7990 // DISABLED: Requires the ADLC to emit a bottom_type call that
7999 7991 // correctly meets the two pointer arguments; one is an incoming
8000 7992 // register but the other is a memory operand. ALSO appears to
8001 7993 // be buggy with implicit null checks.
8002 7994 //
8003 7995 //// Conditional move
8004 7996 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
8005 7997 // predicate(VM_Version::supports_cmov() );
8006 7998 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8007 7999 // ins_cost(250);
8008 8000 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
8009 8001 // opcode(0x0F,0x40);
8010 8002 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8011 8003 // ins_pipe( pipe_cmov_mem );
8012 8004 //%}
8013 8005 //
8014 8006 //// Conditional move
8015 8007 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
8016 8008 // predicate(VM_Version::supports_cmov() );
8017 8009 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8018 8010 // ins_cost(250);
8019 8011 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
8020 8012 // opcode(0x0F,0x40);
8021 8013 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8022 8014 // ins_pipe( pipe_cmov_mem );
8023 8015 //%}
8024 8016
8025 8017 // Conditional move
8026 8018 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
8027 8019 predicate(UseSSE<=1);
8028 8020 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8029 8021 ins_cost(200);
8030 8022 format %{ "FCMOV$cop $dst,$src\t# double" %}
8031 8023 opcode(0xDA);
8032 8024 ins_encode( enc_cmov_d(cop,src) );
8033 8025 ins_pipe( pipe_cmovD_reg );
8034 8026 %}
8035 8027
8036 8028 // Conditional move
8037 8029 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
8038 8030 predicate(UseSSE==0);
8039 8031 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8040 8032 ins_cost(200);
8041 8033 format %{ "FCMOV$cop $dst,$src\t# float" %}
8042 8034 opcode(0xDA);
8043 8035 ins_encode( enc_cmov_d(cop,src) );
8044 8036 ins_pipe( pipe_cmovD_reg );
8045 8037 %}
8046 8038
8047 8039 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8048 8040 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
8049 8041 predicate(UseSSE<=1);
8050 8042 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8051 8043 ins_cost(200);
8052 8044 format %{ "Jn$cop skip\n\t"
8053 8045 "MOV $dst,$src\t# double\n"
8054 8046 "skip:" %}
8055 8047 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
8056 8048 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
8057 8049 ins_pipe( pipe_cmovD_reg );
8058 8050 %}
8059 8051
8060 8052 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8061 8053 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
8062 8054 predicate(UseSSE==0);
8063 8055 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8064 8056 ins_cost(200);
8065 8057 format %{ "Jn$cop skip\n\t"
8066 8058 "MOV $dst,$src\t# float\n"
8067 8059 "skip:" %}
8068 8060 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
8069 8061 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
8070 8062 ins_pipe( pipe_cmovD_reg );
8071 8063 %}
8072 8064
8073 8065 // No CMOVE with SSE/SSE2
8074 8066 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
8075 8067 predicate (UseSSE>=1);
8076 8068 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8077 8069 ins_cost(200);
8078 8070 format %{ "Jn$cop skip\n\t"
8079 8071 "MOVSS $dst,$src\t# float\n"
8080 8072 "skip:" %}
8081 8073 ins_encode %{
8082 8074 Label skip;
8083 8075 // Invert sense of branch from sense of CMOV
8084 8076 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8085 8077 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8086 8078 __ bind(skip);
8087 8079 %}
8088 8080 ins_pipe( pipe_slow );
8089 8081 %}
8090 8082
8091 8083 // No CMOVE with SSE/SSE2
8092 8084 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
8093 8085 predicate (UseSSE>=2);
8094 8086 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8095 8087 ins_cost(200);
8096 8088 format %{ "Jn$cop skip\n\t"
8097 8089 "MOVSD $dst,$src\t# float\n"
8098 8090 "skip:" %}
8099 8091 ins_encode %{
8100 8092 Label skip;
8101 8093 // Invert sense of branch from sense of CMOV
8102 8094 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8103 8095 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8104 8096 __ bind(skip);
8105 8097 %}
8106 8098 ins_pipe( pipe_slow );
8107 8099 %}
8108 8100
8109 8101 // unsigned version
8110 8102 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
8111 8103 predicate (UseSSE>=1);
8112 8104 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8113 8105 ins_cost(200);
8114 8106 format %{ "Jn$cop skip\n\t"
8115 8107 "MOVSS $dst,$src\t# float\n"
8116 8108 "skip:" %}
8117 8109 ins_encode %{
8118 8110 Label skip;
8119 8111 // Invert sense of branch from sense of CMOV
8120 8112 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8121 8113 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8122 8114 __ bind(skip);
8123 8115 %}
8124 8116 ins_pipe( pipe_slow );
8125 8117 %}
8126 8118
8127 8119 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
8128 8120 predicate (UseSSE>=1);
8129 8121 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8130 8122 ins_cost(200);
8131 8123 expand %{
8132 8124 fcmovX_regU(cop, cr, dst, src);
8133 8125 %}
8134 8126 %}
8135 8127
8136 8128 // unsigned version
8137 8129 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
8138 8130 predicate (UseSSE>=2);
8139 8131 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8140 8132 ins_cost(200);
8141 8133 format %{ "Jn$cop skip\n\t"
8142 8134 "MOVSD $dst,$src\t# float\n"
8143 8135 "skip:" %}
8144 8136 ins_encode %{
8145 8137 Label skip;
8146 8138 // Invert sense of branch from sense of CMOV
8147 8139 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8148 8140 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8149 8141 __ bind(skip);
8150 8142 %}
8151 8143 ins_pipe( pipe_slow );
8152 8144 %}
8153 8145
8154 8146 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
8155 8147 predicate (UseSSE>=2);
8156 8148 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8157 8149 ins_cost(200);
8158 8150 expand %{
8159 8151 fcmovXD_regU(cop, cr, dst, src);
8160 8152 %}
8161 8153 %}
8162 8154
8163 8155 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
8164 8156 predicate(VM_Version::supports_cmov() );
8165 8157 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8166 8158 ins_cost(200);
8167 8159 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8168 8160 "CMOV$cop $dst.hi,$src.hi" %}
8169 8161 opcode(0x0F,0x40);
8170 8162 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8171 8163 ins_pipe( pipe_cmov_reg_long );
8172 8164 %}
8173 8165
8174 8166 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
8175 8167 predicate(VM_Version::supports_cmov() );
8176 8168 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8177 8169 ins_cost(200);
8178 8170 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8179 8171 "CMOV$cop $dst.hi,$src.hi" %}
8180 8172 opcode(0x0F,0x40);
8181 8173 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8182 8174 ins_pipe( pipe_cmov_reg_long );
8183 8175 %}
8184 8176
8185 8177 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
8186 8178 predicate(VM_Version::supports_cmov() );
8187 8179 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8188 8180 ins_cost(200);
8189 8181 expand %{
8190 8182 cmovL_regU(cop, cr, dst, src);
8191 8183 %}
8192 8184 %}
8193 8185
8194 8186 //----------Arithmetic Instructions--------------------------------------------
8195 8187 //----------Addition Instructions----------------------------------------------
8196 8188 // Integer Addition Instructions
8197 8189 instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8198 8190 match(Set dst (AddI dst src));
8199 8191 effect(KILL cr);
8200 8192
8201 8193 size(2);
8202 8194 format %{ "ADD $dst,$src" %}
8203 8195 opcode(0x03);
8204 8196 ins_encode( OpcP, RegReg( dst, src) );
8205 8197 ins_pipe( ialu_reg_reg );
8206 8198 %}
8207 8199
8208 8200 instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8209 8201 match(Set dst (AddI dst src));
8210 8202 effect(KILL cr);
8211 8203
8212 8204 format %{ "ADD $dst,$src" %}
8213 8205 opcode(0x81, 0x00); /* /0 id */
8214 8206 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8215 8207 ins_pipe( ialu_reg );
8216 8208 %}
8217 8209
8218 8210 instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
8219 8211 predicate(UseIncDec);
8220 8212 match(Set dst (AddI dst src));
8221 8213 effect(KILL cr);
8222 8214
8223 8215 size(1);
8224 8216 format %{ "INC $dst" %}
8225 8217 opcode(0x40); /* */
8226 8218 ins_encode( Opc_plus( primary, dst ) );
8227 8219 ins_pipe( ialu_reg );
8228 8220 %}
8229 8221
8230 8222 instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
8231 8223 match(Set dst (AddI src0 src1));
8232 8224 ins_cost(110);
8233 8225
8234 8226 format %{ "LEA $dst,[$src0 + $src1]" %}
8235 8227 opcode(0x8D); /* 0x8D /r */
8236 8228 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8237 8229 ins_pipe( ialu_reg_reg );
8238 8230 %}
8239 8231
8240 8232 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
8241 8233 match(Set dst (AddP src0 src1));
8242 8234 ins_cost(110);
8243 8235
8244 8236 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %}
8245 8237 opcode(0x8D); /* 0x8D /r */
8246 8238 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8247 8239 ins_pipe( ialu_reg_reg );
8248 8240 %}
8249 8241
8250 8242 instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
8251 8243 predicate(UseIncDec);
8252 8244 match(Set dst (AddI dst src));
8253 8245 effect(KILL cr);
8254 8246
8255 8247 size(1);
8256 8248 format %{ "DEC $dst" %}
8257 8249 opcode(0x48); /* */
8258 8250 ins_encode( Opc_plus( primary, dst ) );
8259 8251 ins_pipe( ialu_reg );
8260 8252 %}
8261 8253
8262 8254 instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
8263 8255 match(Set dst (AddP dst src));
8264 8256 effect(KILL cr);
8265 8257
8266 8258 size(2);
8267 8259 format %{ "ADD $dst,$src" %}
8268 8260 opcode(0x03);
8269 8261 ins_encode( OpcP, RegReg( dst, src) );
8270 8262 ins_pipe( ialu_reg_reg );
8271 8263 %}
8272 8264
8273 8265 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
8274 8266 match(Set dst (AddP dst src));
8275 8267 effect(KILL cr);
8276 8268
8277 8269 format %{ "ADD $dst,$src" %}
8278 8270 opcode(0x81,0x00); /* Opcode 81 /0 id */
8279 8271 // ins_encode( RegImm( dst, src) );
8280 8272 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8281 8273 ins_pipe( ialu_reg );
8282 8274 %}
8283 8275
8284 8276 instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8285 8277 match(Set dst (AddI dst (LoadI src)));
8286 8278 effect(KILL cr);
8287 8279
8288 8280 ins_cost(125);
8289 8281 format %{ "ADD $dst,$src" %}
8290 8282 opcode(0x03);
8291 8283 ins_encode( OpcP, RegMem( dst, src) );
8292 8284 ins_pipe( ialu_reg_mem );
8293 8285 %}
8294 8286
8295 8287 instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8296 8288 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8297 8289 effect(KILL cr);
8298 8290
8299 8291 ins_cost(150);
8300 8292 format %{ "ADD $dst,$src" %}
8301 8293 opcode(0x01); /* Opcode 01 /r */
8302 8294 ins_encode( OpcP, RegMem( src, dst ) );
8303 8295 ins_pipe( ialu_mem_reg );
8304 8296 %}
8305 8297
8306 8298 // Add Memory with Immediate
8307 8299 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8308 8300 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8309 8301 effect(KILL cr);
8310 8302
8311 8303 ins_cost(125);
8312 8304 format %{ "ADD $dst,$src" %}
8313 8305 opcode(0x81); /* Opcode 81 /0 id */
8314 8306 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
8315 8307 ins_pipe( ialu_mem_imm );
8316 8308 %}
8317 8309
8318 8310 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
8319 8311 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8320 8312 effect(KILL cr);
8321 8313
8322 8314 ins_cost(125);
8323 8315 format %{ "INC $dst" %}
8324 8316 opcode(0xFF); /* Opcode FF /0 */
8325 8317 ins_encode( OpcP, RMopc_Mem(0x00,dst));
8326 8318 ins_pipe( ialu_mem_imm );
8327 8319 %}
8328 8320
8329 8321 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
8330 8322 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8331 8323 effect(KILL cr);
8332 8324
8333 8325 ins_cost(125);
8334 8326 format %{ "DEC $dst" %}
8335 8327 opcode(0xFF); /* Opcode FF /1 */
8336 8328 ins_encode( OpcP, RMopc_Mem(0x01,dst));
8337 8329 ins_pipe( ialu_mem_imm );
8338 8330 %}
8339 8331
8340 8332
8341 8333 instruct checkCastPP( eRegP dst ) %{
8342 8334 match(Set dst (CheckCastPP dst));
8343 8335
8344 8336 size(0);
8345 8337 format %{ "#checkcastPP of $dst" %}
8346 8338 ins_encode( /*empty encoding*/ );
8347 8339 ins_pipe( empty );
8348 8340 %}
8349 8341
8350 8342 instruct castPP( eRegP dst ) %{
8351 8343 match(Set dst (CastPP dst));
8352 8344 format %{ "#castPP of $dst" %}
8353 8345 ins_encode( /*empty encoding*/ );
8354 8346 ins_pipe( empty );
8355 8347 %}
8356 8348
8357 8349 instruct castII( eRegI dst ) %{
8358 8350 match(Set dst (CastII dst));
8359 8351 format %{ "#castII of $dst" %}
8360 8352 ins_encode( /*empty encoding*/ );
8361 8353 ins_cost(0);
8362 8354 ins_pipe( empty );
8363 8355 %}
8364 8356
8365 8357
8366 8358 // Load-locked - same as a regular pointer load when used with compare-swap
8367 8359 instruct loadPLocked(eRegP dst, memory mem) %{
8368 8360 match(Set dst (LoadPLocked mem));
8369 8361
8370 8362 ins_cost(125);
8371 8363 format %{ "MOV $dst,$mem\t# Load ptr. locked" %}
8372 8364 opcode(0x8B);
8373 8365 ins_encode( OpcP, RegMem(dst,mem));
8374 8366 ins_pipe( ialu_reg_mem );
8375 8367 %}
8376 8368
8377 8369 // LoadLong-locked - same as a volatile long load when used with compare-swap
8378 8370 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8379 8371 predicate(UseSSE<=1);
8380 8372 match(Set dst (LoadLLocked mem));
8381 8373
8382 8374 ins_cost(200);
8383 8375 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
8384 8376 "FISTp $dst" %}
8385 8377 ins_encode(enc_loadL_volatile(mem,dst));
8386 8378 ins_pipe( fpu_reg_mem );
8387 8379 %}
8388 8380
8389 8381 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8390 8382 predicate(UseSSE>=2);
8391 8383 match(Set dst (LoadLLocked mem));
8392 8384 effect(TEMP tmp);
8393 8385 ins_cost(180);
8394 8386 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
8395 8387 "MOVSD $dst,$tmp" %}
8396 8388 ins_encode(enc_loadLX_volatile(mem, dst, tmp));
8397 8389 ins_pipe( pipe_slow );
8398 8390 %}
8399 8391
8400 8392 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8401 8393 predicate(UseSSE>=2);
8402 8394 match(Set dst (LoadLLocked mem));
8403 8395 effect(TEMP tmp);
8404 8396 ins_cost(160);
8405 8397 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
8406 8398 "MOVD $dst.lo,$tmp\n\t"
8407 8399 "PSRLQ $tmp,32\n\t"
8408 8400 "MOVD $dst.hi,$tmp" %}
8409 8401 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
8410 8402 ins_pipe( pipe_slow );
8411 8403 %}
8412 8404
8413 8405 // Conditional-store of the updated heap-top.
8414 8406 // Used during allocation of the shared heap.
8415 8407 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.
8416 8408 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8417 8409 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8418 8410 // EAX is killed if there is contention, but then it's also unused.
8419 8411 // In the common case of no contention, EAX holds the new oop address.
8420 8412 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8421 8413 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8422 8414 ins_pipe( pipe_cmpxchg );
8423 8415 %}
8424 8416
8425 8417 // Conditional-store of an int value.
8426 8418 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.
8427 8419 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8428 8420 match(Set cr (StoreIConditional mem (Binary oldval newval)));
8429 8421 effect(KILL oldval);
8430 8422 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
8431 8423 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
8432 8424 ins_pipe( pipe_cmpxchg );
8433 8425 %}
8434 8426
8435 8427 // Conditional-store of a long value.
8436 8428 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel.
8437 8429 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8438 8430 match(Set cr (StoreLConditional mem (Binary oldval newval)));
8439 8431 effect(KILL oldval);
8440 8432 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
8441 8433 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
8442 8434 "XCHG EBX,ECX"
8443 8435 %}
8444 8436 ins_encode %{
8445 8437 // Note: we need to swap rbx, and rcx before and after the
8446 8438 // cmpxchg8 instruction because the instruction uses
8447 8439 // rcx as the high order word of the new value to store but
8448 8440 // our register encoding uses rbx.
8449 8441 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8450 8442 if( os::is_MP() )
8451 8443 __ lock();
8452 8444 __ cmpxchg8($mem$$Address);
8453 8445 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8454 8446 %}
8455 8447 ins_pipe( pipe_cmpxchg );
8456 8448 %}
8457 8449
8458 8450 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8459 8451
8460 8452 instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8461 8453 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8462 8454 effect(KILL cr, KILL oldval);
8463 8455 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8464 8456 "MOV $res,0\n\t"
8465 8457 "JNE,s fail\n\t"
8466 8458 "MOV $res,1\n"
8467 8459 "fail:" %}
8468 8460 ins_encode( enc_cmpxchg8(mem_ptr),
8469 8461 enc_flags_ne_to_boolean(res) );
8470 8462 ins_pipe( pipe_cmpxchg );
8471 8463 %}
8472 8464
8473 8465 instruct compareAndSwapP( eRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
8474 8466 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8475 8467 effect(KILL cr, KILL oldval);
8476 8468 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8477 8469 "MOV $res,0\n\t"
8478 8470 "JNE,s fail\n\t"
8479 8471 "MOV $res,1\n"
8480 8472 "fail:" %}
8481 8473 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8482 8474 ins_pipe( pipe_cmpxchg );
8483 8475 %}
8484 8476
8485 8477 instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
8486 8478 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8487 8479 effect(KILL cr, KILL oldval);
8488 8480 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8489 8481 "MOV $res,0\n\t"
8490 8482 "JNE,s fail\n\t"
8491 8483 "MOV $res,1\n"
8492 8484 "fail:" %}
8493 8485 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8494 8486 ins_pipe( pipe_cmpxchg );
8495 8487 %}
8496 8488
8497 8489 //----------Subtraction Instructions-------------------------------------------
8498 8490 // Integer Subtraction Instructions
8499 8491 instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8500 8492 match(Set dst (SubI dst src));
8501 8493 effect(KILL cr);
8502 8494
8503 8495 size(2);
8504 8496 format %{ "SUB $dst,$src" %}
8505 8497 opcode(0x2B);
8506 8498 ins_encode( OpcP, RegReg( dst, src) );
8507 8499 ins_pipe( ialu_reg_reg );
8508 8500 %}
8509 8501
8510 8502 instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8511 8503 match(Set dst (SubI dst src));
8512 8504 effect(KILL cr);
8513 8505
8514 8506 format %{ "SUB $dst,$src" %}
8515 8507 opcode(0x81,0x05); /* Opcode 81 /5 */
8516 8508 // ins_encode( RegImm( dst, src) );
8517 8509 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8518 8510 ins_pipe( ialu_reg );
8519 8511 %}
8520 8512
8521 8513 instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8522 8514 match(Set dst (SubI dst (LoadI src)));
8523 8515 effect(KILL cr);
8524 8516
8525 8517 ins_cost(125);
8526 8518 format %{ "SUB $dst,$src" %}
8527 8519 opcode(0x2B);
8528 8520 ins_encode( OpcP, RegMem( dst, src) );
8529 8521 ins_pipe( ialu_reg_mem );
8530 8522 %}
8531 8523
8532 8524 instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8533 8525 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8534 8526 effect(KILL cr);
8535 8527
8536 8528 ins_cost(150);
8537 8529 format %{ "SUB $dst,$src" %}
8538 8530 opcode(0x29); /* Opcode 29 /r */
8539 8531 ins_encode( OpcP, RegMem( src, dst ) );
8540 8532 ins_pipe( ialu_mem_reg );
8541 8533 %}
8542 8534
8543 8535 // Subtract from a pointer
8544 8536 instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8545 8537 match(Set dst (AddP dst (SubI zero src)));
8546 8538 effect(KILL cr);
8547 8539
8548 8540 size(2);
8549 8541 format %{ "SUB $dst,$src" %}
8550 8542 opcode(0x2B);
8551 8543 ins_encode( OpcP, RegReg( dst, src) );
8552 8544 ins_pipe( ialu_reg_reg );
8553 8545 %}
8554 8546
8555 8547 instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8556 8548 match(Set dst (SubI zero dst));
8557 8549 effect(KILL cr);
8558 8550
8559 8551 size(2);
8560 8552 format %{ "NEG $dst" %}
8561 8553 opcode(0xF7,0x03); // Opcode F7 /3
8562 8554 ins_encode( OpcP, RegOpc( dst ) );
8563 8555 ins_pipe( ialu_reg );
8564 8556 %}
8565 8557
8566 8558
8567 8559 //----------Multiplication/Division Instructions-------------------------------
8568 8560 // Integer Multiplication Instructions
8569 8561 // Multiply Register
8570 8562 instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8571 8563 match(Set dst (MulI dst src));
8572 8564 effect(KILL cr);
8573 8565
8574 8566 size(3);
8575 8567 ins_cost(300);
8576 8568 format %{ "IMUL $dst,$src" %}
8577 8569 opcode(0xAF, 0x0F);
8578 8570 ins_encode( OpcS, OpcP, RegReg( dst, src) );
8579 8571 ins_pipe( ialu_reg_reg_alu0 );
8580 8572 %}
8581 8573
8582 8574 // Multiply 32-bit Immediate
8583 8575 instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8584 8576 match(Set dst (MulI src imm));
8585 8577 effect(KILL cr);
8586 8578
8587 8579 ins_cost(300);
8588 8580 format %{ "IMUL $dst,$src,$imm" %}
8589 8581 opcode(0x69); /* 69 /r id */
8590 8582 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8591 8583 ins_pipe( ialu_reg_reg_alu0 );
8592 8584 %}
8593 8585
8594 8586 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8595 8587 match(Set dst src);
8596 8588 effect(KILL cr);
8597 8589
8598 8590 // Note that this is artificially increased to make it more expensive than loadConL
8599 8591 ins_cost(250);
8600 8592 format %{ "MOV EAX,$src\t// low word only" %}
8601 8593 opcode(0xB8);
8602 8594 ins_encode( LdImmL_Lo(dst, src) );
8603 8595 ins_pipe( ialu_reg_fat );
8604 8596 %}
8605 8597
8606 8598 // Multiply by 32-bit Immediate, taking the shifted high order results
8607 8599 // (special case for shift by 32)
8608 8600 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8609 8601 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8610 8602 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8611 8603 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8612 8604 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8613 8605 effect(USE src1, KILL cr);
8614 8606
8615 8607 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8616 8608 ins_cost(0*100 + 1*400 - 150);
8617 8609 format %{ "IMUL EDX:EAX,$src1" %}
8618 8610 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8619 8611 ins_pipe( pipe_slow );
8620 8612 %}
8621 8613
8622 8614 // Multiply by 32-bit Immediate, taking the shifted high order results
8623 8615 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8624 8616 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8625 8617 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8626 8618 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8627 8619 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8628 8620 effect(USE src1, KILL cr);
8629 8621
8630 8622 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8631 8623 ins_cost(1*100 + 1*400 - 150);
8632 8624 format %{ "IMUL EDX:EAX,$src1\n\t"
8633 8625 "SAR EDX,$cnt-32" %}
8634 8626 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8635 8627 ins_pipe( pipe_slow );
8636 8628 %}
8637 8629
8638 8630 // Multiply Memory 32-bit Immediate
8639 8631 instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8640 8632 match(Set dst (MulI (LoadI src) imm));
8641 8633 effect(KILL cr);
8642 8634
8643 8635 ins_cost(300);
8644 8636 format %{ "IMUL $dst,$src,$imm" %}
8645 8637 opcode(0x69); /* 69 /r id */
8646 8638 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8647 8639 ins_pipe( ialu_reg_mem_alu0 );
8648 8640 %}
8649 8641
8650 8642 // Multiply Memory
8651 8643 instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8652 8644 match(Set dst (MulI dst (LoadI src)));
8653 8645 effect(KILL cr);
8654 8646
8655 8647 ins_cost(350);
8656 8648 format %{ "IMUL $dst,$src" %}
8657 8649 opcode(0xAF, 0x0F);
8658 8650 ins_encode( OpcS, OpcP, RegMem( dst, src) );
8659 8651 ins_pipe( ialu_reg_mem_alu0 );
8660 8652 %}
8661 8653
8662 8654 // Multiply Register Int to Long
8663 8655 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8664 8656 // Basic Idea: long = (long)int * (long)int
8665 8657 match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8666 8658 effect(DEF dst, USE src, USE src1, KILL flags);
8667 8659
8668 8660 ins_cost(300);
8669 8661 format %{ "IMUL $dst,$src1" %}
8670 8662
8671 8663 ins_encode( long_int_multiply( dst, src1 ) );
8672 8664 ins_pipe( ialu_reg_reg_alu0 );
8673 8665 %}
8674 8666
8675 8667 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8676 8668 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
8677 8669 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8678 8670 effect(KILL flags);
8679 8671
8680 8672 ins_cost(300);
8681 8673 format %{ "MUL $dst,$src1" %}
8682 8674
8683 8675 ins_encode( long_uint_multiply(dst, src1) );
8684 8676 ins_pipe( ialu_reg_reg_alu0 );
8685 8677 %}
8686 8678
8687 8679 // Multiply Register Long
8688 8680 instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8689 8681 match(Set dst (MulL dst src));
8690 8682 effect(KILL cr, TEMP tmp);
8691 8683 ins_cost(4*100+3*400);
8692 8684 // Basic idea: lo(result) = lo(x_lo * y_lo)
8693 8685 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8694 8686 format %{ "MOV $tmp,$src.lo\n\t"
8695 8687 "IMUL $tmp,EDX\n\t"
8696 8688 "MOV EDX,$src.hi\n\t"
8697 8689 "IMUL EDX,EAX\n\t"
8698 8690 "ADD $tmp,EDX\n\t"
8699 8691 "MUL EDX:EAX,$src.lo\n\t"
8700 8692 "ADD EDX,$tmp" %}
8701 8693 ins_encode( long_multiply( dst, src, tmp ) );
8702 8694 ins_pipe( pipe_slow );
8703 8695 %}
8704 8696
8705 8697 // Multiply Register Long where the left operand's high 32 bits are zero
8706 8698 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8707 8699 predicate(is_operand_hi32_zero(n->in(1)));
8708 8700 match(Set dst (MulL dst src));
8709 8701 effect(KILL cr, TEMP tmp);
8710 8702 ins_cost(2*100+2*400);
8711 8703 // Basic idea: lo(result) = lo(x_lo * y_lo)
8712 8704 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
8713 8705 format %{ "MOV $tmp,$src.hi\n\t"
8714 8706 "IMUL $tmp,EAX\n\t"
8715 8707 "MUL EDX:EAX,$src.lo\n\t"
8716 8708 "ADD EDX,$tmp" %}
8717 8709 ins_encode %{
8718 8710 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
8719 8711 __ imull($tmp$$Register, rax);
8720 8712 __ mull($src$$Register);
8721 8713 __ addl(rdx, $tmp$$Register);
8722 8714 %}
8723 8715 ins_pipe( pipe_slow );
8724 8716 %}
8725 8717
8726 8718 // Multiply Register Long where the right operand's high 32 bits are zero
8727 8719 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8728 8720 predicate(is_operand_hi32_zero(n->in(2)));
8729 8721 match(Set dst (MulL dst src));
8730 8722 effect(KILL cr, TEMP tmp);
8731 8723 ins_cost(2*100+2*400);
8732 8724 // Basic idea: lo(result) = lo(x_lo * y_lo)
8733 8725 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
8734 8726 format %{ "MOV $tmp,$src.lo\n\t"
8735 8727 "IMUL $tmp,EDX\n\t"
8736 8728 "MUL EDX:EAX,$src.lo\n\t"
8737 8729 "ADD EDX,$tmp" %}
8738 8730 ins_encode %{
8739 8731 __ movl($tmp$$Register, $src$$Register);
8740 8732 __ imull($tmp$$Register, rdx);
8741 8733 __ mull($src$$Register);
8742 8734 __ addl(rdx, $tmp$$Register);
8743 8735 %}
8744 8736 ins_pipe( pipe_slow );
8745 8737 %}
8746 8738
8747 8739 // Multiply Register Long where the left and the right operands' high 32 bits are zero
8748 8740 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
8749 8741 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
8750 8742 match(Set dst (MulL dst src));
8751 8743 effect(KILL cr);
8752 8744 ins_cost(1*400);
8753 8745 // Basic idea: lo(result) = lo(x_lo * y_lo)
8754 8746 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
8755 8747 format %{ "MUL EDX:EAX,$src.lo\n\t" %}
8756 8748 ins_encode %{
8757 8749 __ mull($src$$Register);
8758 8750 %}
8759 8751 ins_pipe( pipe_slow );
8760 8752 %}
8761 8753
8762 8754 // Multiply Register Long by small constant
8763 8755 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8764 8756 match(Set dst (MulL dst src));
8765 8757 effect(KILL cr, TEMP tmp);
8766 8758 ins_cost(2*100+2*400);
8767 8759 size(12);
8768 8760 // Basic idea: lo(result) = lo(src * EAX)
8769 8761 // hi(result) = hi(src * EAX) + lo(src * EDX)
8770 8762 format %{ "IMUL $tmp,EDX,$src\n\t"
8771 8763 "MOV EDX,$src\n\t"
8772 8764 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t"
8773 8765 "ADD EDX,$tmp" %}
8774 8766 ins_encode( long_multiply_con( dst, src, tmp ) );
8775 8767 ins_pipe( pipe_slow );
8776 8768 %}
8777 8769
8778 8770 // Integer DIV with Register
8779 8771 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8780 8772 match(Set rax (DivI rax div));
8781 8773 effect(KILL rdx, KILL cr);
8782 8774 size(26);
8783 8775 ins_cost(30*100+10*100);
8784 8776 format %{ "CMP EAX,0x80000000\n\t"
8785 8777 "JNE,s normal\n\t"
8786 8778 "XOR EDX,EDX\n\t"
8787 8779 "CMP ECX,-1\n\t"
8788 8780 "JE,s done\n"
8789 8781 "normal: CDQ\n\t"
8790 8782 "IDIV $div\n\t"
8791 8783 "done:" %}
8792 8784 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8793 8785 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8794 8786 ins_pipe( ialu_reg_reg_alu0 );
8795 8787 %}
8796 8788
8797 8789 // Divide Register Long
8798 8790 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8799 8791 match(Set dst (DivL src1 src2));
8800 8792 effect( KILL cr, KILL cx, KILL bx );
8801 8793 ins_cost(10000);
8802 8794 format %{ "PUSH $src1.hi\n\t"
8803 8795 "PUSH $src1.lo\n\t"
8804 8796 "PUSH $src2.hi\n\t"
8805 8797 "PUSH $src2.lo\n\t"
8806 8798 "CALL SharedRuntime::ldiv\n\t"
8807 8799 "ADD ESP,16" %}
8808 8800 ins_encode( long_div(src1,src2) );
8809 8801 ins_pipe( pipe_slow );
8810 8802 %}
8811 8803
8812 8804 // Integer DIVMOD with Register, both quotient and mod results
8813 8805 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8814 8806 match(DivModI rax div);
8815 8807 effect(KILL cr);
8816 8808 size(26);
8817 8809 ins_cost(30*100+10*100);
8818 8810 format %{ "CMP EAX,0x80000000\n\t"
8819 8811 "JNE,s normal\n\t"
8820 8812 "XOR EDX,EDX\n\t"
8821 8813 "CMP ECX,-1\n\t"
8822 8814 "JE,s done\n"
8823 8815 "normal: CDQ\n\t"
8824 8816 "IDIV $div\n\t"
8825 8817 "done:" %}
8826 8818 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8827 8819 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8828 8820 ins_pipe( pipe_slow );
8829 8821 %}
8830 8822
8831 8823 // Integer MOD with Register
8832 8824 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8833 8825 match(Set rdx (ModI rax div));
8834 8826 effect(KILL rax, KILL cr);
8835 8827
8836 8828 size(26);
8837 8829 ins_cost(300);
8838 8830 format %{ "CDQ\n\t"
8839 8831 "IDIV $div" %}
8840 8832 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8841 8833 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8842 8834 ins_pipe( ialu_reg_reg_alu0 );
8843 8835 %}
8844 8836
8845 8837 // Remainder Register Long
8846 8838 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8847 8839 match(Set dst (ModL src1 src2));
8848 8840 effect( KILL cr, KILL cx, KILL bx );
8849 8841 ins_cost(10000);
8850 8842 format %{ "PUSH $src1.hi\n\t"
8851 8843 "PUSH $src1.lo\n\t"
8852 8844 "PUSH $src2.hi\n\t"
8853 8845 "PUSH $src2.lo\n\t"
8854 8846 "CALL SharedRuntime::lrem\n\t"
8855 8847 "ADD ESP,16" %}
8856 8848 ins_encode( long_mod(src1,src2) );
8857 8849 ins_pipe( pipe_slow );
8858 8850 %}
8859 8851
8860 8852 // Divide Register Long (no special case since divisor != -1)
8861 8853 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8862 8854 match(Set dst (DivL dst imm));
8863 8855 effect( TEMP tmp, TEMP tmp2, KILL cr );
8864 8856 ins_cost(1000);
8865 8857 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8866 8858 "XOR $tmp2,$tmp2\n\t"
8867 8859 "CMP $tmp,EDX\n\t"
8868 8860 "JA,s fast\n\t"
8869 8861 "MOV $tmp2,EAX\n\t"
8870 8862 "MOV EAX,EDX\n\t"
8871 8863 "MOV EDX,0\n\t"
8872 8864 "JLE,s pos\n\t"
8873 8865 "LNEG EAX : $tmp2\n\t"
8874 8866 "DIV $tmp # unsigned division\n\t"
8875 8867 "XCHG EAX,$tmp2\n\t"
8876 8868 "DIV $tmp\n\t"
8877 8869 "LNEG $tmp2 : EAX\n\t"
8878 8870 "JMP,s done\n"
8879 8871 "pos:\n\t"
8880 8872 "DIV $tmp\n\t"
8881 8873 "XCHG EAX,$tmp2\n"
8882 8874 "fast:\n\t"
8883 8875 "DIV $tmp\n"
8884 8876 "done:\n\t"
8885 8877 "MOV EDX,$tmp2\n\t"
8886 8878 "NEG EDX:EAX # if $imm < 0" %}
8887 8879 ins_encode %{
8888 8880 int con = (int)$imm$$constant;
8889 8881 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8890 8882 int pcon = (con > 0) ? con : -con;
8891 8883 Label Lfast, Lpos, Ldone;
8892 8884
8893 8885 __ movl($tmp$$Register, pcon);
8894 8886 __ xorl($tmp2$$Register,$tmp2$$Register);
8895 8887 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8896 8888 __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8897 8889
8898 8890 __ movl($tmp2$$Register, $dst$$Register); // save
8899 8891 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8900 8892 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8901 8893 __ jccb(Assembler::lessEqual, Lpos); // result is positive
8902 8894
8903 8895 // Negative dividend.
8904 8896 // convert value to positive to use unsigned division
8905 8897 __ lneg($dst$$Register, $tmp2$$Register);
8906 8898 __ divl($tmp$$Register);
8907 8899 __ xchgl($dst$$Register, $tmp2$$Register);
8908 8900 __ divl($tmp$$Register);
8909 8901 // revert result back to negative
8910 8902 __ lneg($tmp2$$Register, $dst$$Register);
8911 8903 __ jmpb(Ldone);
8912 8904
8913 8905 __ bind(Lpos);
8914 8906 __ divl($tmp$$Register); // Use unsigned division
8915 8907 __ xchgl($dst$$Register, $tmp2$$Register);
8916 8908 // Fallthrow for final divide, tmp2 has 32 bit hi result
8917 8909
8918 8910 __ bind(Lfast);
8919 8911 // fast path: src is positive
8920 8912 __ divl($tmp$$Register); // Use unsigned division
8921 8913
8922 8914 __ bind(Ldone);
8923 8915 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8924 8916 if (con < 0) {
8925 8917 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8926 8918 }
8927 8919 %}
8928 8920 ins_pipe( pipe_slow );
8929 8921 %}
8930 8922
8931 8923 // Remainder Register Long (remainder fit into 32 bits)
8932 8924 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8933 8925 match(Set dst (ModL dst imm));
8934 8926 effect( TEMP tmp, TEMP tmp2, KILL cr );
8935 8927 ins_cost(1000);
8936 8928 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8937 8929 "CMP $tmp,EDX\n\t"
8938 8930 "JA,s fast\n\t"
8939 8931 "MOV $tmp2,EAX\n\t"
8940 8932 "MOV EAX,EDX\n\t"
8941 8933 "MOV EDX,0\n\t"
8942 8934 "JLE,s pos\n\t"
8943 8935 "LNEG EAX : $tmp2\n\t"
8944 8936 "DIV $tmp # unsigned division\n\t"
8945 8937 "MOV EAX,$tmp2\n\t"
8946 8938 "DIV $tmp\n\t"
8947 8939 "NEG EDX\n\t"
8948 8940 "JMP,s done\n"
8949 8941 "pos:\n\t"
8950 8942 "DIV $tmp\n\t"
8951 8943 "MOV EAX,$tmp2\n"
8952 8944 "fast:\n\t"
8953 8945 "DIV $tmp\n"
8954 8946 "done:\n\t"
8955 8947 "MOV EAX,EDX\n\t"
8956 8948 "SAR EDX,31\n\t" %}
8957 8949 ins_encode %{
8958 8950 int con = (int)$imm$$constant;
8959 8951 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8960 8952 int pcon = (con > 0) ? con : -con;
8961 8953 Label Lfast, Lpos, Ldone;
8962 8954
8963 8955 __ movl($tmp$$Register, pcon);
8964 8956 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8965 8957 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8966 8958
8967 8959 __ movl($tmp2$$Register, $dst$$Register); // save
8968 8960 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8969 8961 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8970 8962 __ jccb(Assembler::lessEqual, Lpos); // result is positive
8971 8963
8972 8964 // Negative dividend.
8973 8965 // convert value to positive to use unsigned division
8974 8966 __ lneg($dst$$Register, $tmp2$$Register);
8975 8967 __ divl($tmp$$Register);
8976 8968 __ movl($dst$$Register, $tmp2$$Register);
8977 8969 __ divl($tmp$$Register);
8978 8970 // revert remainder back to negative
8979 8971 __ negl(HIGH_FROM_LOW($dst$$Register));
8980 8972 __ jmpb(Ldone);
8981 8973
8982 8974 __ bind(Lpos);
8983 8975 __ divl($tmp$$Register);
8984 8976 __ movl($dst$$Register, $tmp2$$Register);
8985 8977
8986 8978 __ bind(Lfast);
8987 8979 // fast path: src is positive
8988 8980 __ divl($tmp$$Register);
8989 8981
8990 8982 __ bind(Ldone);
8991 8983 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8992 8984 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8993 8985
8994 8986 %}
8995 8987 ins_pipe( pipe_slow );
8996 8988 %}
8997 8989
8998 8990 // Integer Shift Instructions
8999 8991 // Shift Left by one
9000 8992 instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9001 8993 match(Set dst (LShiftI dst shift));
9002 8994 effect(KILL cr);
9003 8995
9004 8996 size(2);
9005 8997 format %{ "SHL $dst,$shift" %}
9006 8998 opcode(0xD1, 0x4); /* D1 /4 */
9007 8999 ins_encode( OpcP, RegOpc( dst ) );
9008 9000 ins_pipe( ialu_reg );
9009 9001 %}
9010 9002
9011 9003 // Shift Left by 8-bit immediate
9012 9004 instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9013 9005 match(Set dst (LShiftI dst shift));
9014 9006 effect(KILL cr);
9015 9007
9016 9008 size(3);
9017 9009 format %{ "SHL $dst,$shift" %}
9018 9010 opcode(0xC1, 0x4); /* C1 /4 ib */
9019 9011 ins_encode( RegOpcImm( dst, shift) );
9020 9012 ins_pipe( ialu_reg );
9021 9013 %}
9022 9014
9023 9015 // Shift Left by variable
9024 9016 instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9025 9017 match(Set dst (LShiftI dst shift));
9026 9018 effect(KILL cr);
9027 9019
9028 9020 size(2);
9029 9021 format %{ "SHL $dst,$shift" %}
9030 9022 opcode(0xD3, 0x4); /* D3 /4 */
9031 9023 ins_encode( OpcP, RegOpc( dst ) );
9032 9024 ins_pipe( ialu_reg_reg );
9033 9025 %}
9034 9026
9035 9027 // Arithmetic shift right by one
9036 9028 instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9037 9029 match(Set dst (RShiftI dst shift));
9038 9030 effect(KILL cr);
9039 9031
9040 9032 size(2);
9041 9033 format %{ "SAR $dst,$shift" %}
9042 9034 opcode(0xD1, 0x7); /* D1 /7 */
9043 9035 ins_encode( OpcP, RegOpc( dst ) );
9044 9036 ins_pipe( ialu_reg );
9045 9037 %}
9046 9038
9047 9039 // Arithmetic shift right by one
9048 9040 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
9049 9041 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9050 9042 effect(KILL cr);
9051 9043 format %{ "SAR $dst,$shift" %}
9052 9044 opcode(0xD1, 0x7); /* D1 /7 */
9053 9045 ins_encode( OpcP, RMopc_Mem(secondary,dst) );
9054 9046 ins_pipe( ialu_mem_imm );
9055 9047 %}
9056 9048
9057 9049 // Arithmetic Shift Right by 8-bit immediate
9058 9050 instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9059 9051 match(Set dst (RShiftI dst shift));
9060 9052 effect(KILL cr);
9061 9053
9062 9054 size(3);
9063 9055 format %{ "SAR $dst,$shift" %}
9064 9056 opcode(0xC1, 0x7); /* C1 /7 ib */
9065 9057 ins_encode( RegOpcImm( dst, shift ) );
9066 9058 ins_pipe( ialu_mem_imm );
9067 9059 %}
9068 9060
9069 9061 // Arithmetic Shift Right by 8-bit immediate
9070 9062 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
9071 9063 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9072 9064 effect(KILL cr);
9073 9065
9074 9066 format %{ "SAR $dst,$shift" %}
9075 9067 opcode(0xC1, 0x7); /* C1 /7 ib */
9076 9068 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
9077 9069 ins_pipe( ialu_mem_imm );
9078 9070 %}
9079 9071
9080 9072 // Arithmetic Shift Right by variable
9081 9073 instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9082 9074 match(Set dst (RShiftI dst shift));
9083 9075 effect(KILL cr);
9084 9076
9085 9077 size(2);
9086 9078 format %{ "SAR $dst,$shift" %}
9087 9079 opcode(0xD3, 0x7); /* D3 /7 */
9088 9080 ins_encode( OpcP, RegOpc( dst ) );
9089 9081 ins_pipe( ialu_reg_reg );
9090 9082 %}
9091 9083
9092 9084 // Logical shift right by one
9093 9085 instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9094 9086 match(Set dst (URShiftI dst shift));
9095 9087 effect(KILL cr);
9096 9088
9097 9089 size(2);
9098 9090 format %{ "SHR $dst,$shift" %}
9099 9091 opcode(0xD1, 0x5); /* D1 /5 */
9100 9092 ins_encode( OpcP, RegOpc( dst ) );
9101 9093 ins_pipe( ialu_reg );
9102 9094 %}
9103 9095
9104 9096 // Logical Shift Right by 8-bit immediate
9105 9097 instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9106 9098 match(Set dst (URShiftI dst shift));
9107 9099 effect(KILL cr);
9108 9100
9109 9101 size(3);
9110 9102 format %{ "SHR $dst,$shift" %}
9111 9103 opcode(0xC1, 0x5); /* C1 /5 ib */
9112 9104 ins_encode( RegOpcImm( dst, shift) );
9113 9105 ins_pipe( ialu_reg );
9114 9106 %}
9115 9107
9116 9108
9117 9109 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9118 9110 // This idiom is used by the compiler for the i2b bytecode.
9119 9111 instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
9120 9112 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9121 9113
9122 9114 size(3);
9123 9115 format %{ "MOVSX $dst,$src :8" %}
9124 9116 ins_encode %{
9125 9117 __ movsbl($dst$$Register, $src$$Register);
9126 9118 %}
9127 9119 ins_pipe(ialu_reg_reg);
9128 9120 %}
9129 9121
9130 9122 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9131 9123 // This idiom is used by the compiler the i2s bytecode.
9132 9124 instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
9133 9125 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9134 9126
9135 9127 size(3);
9136 9128 format %{ "MOVSX $dst,$src :16" %}
9137 9129 ins_encode %{
9138 9130 __ movswl($dst$$Register, $src$$Register);
9139 9131 %}
9140 9132 ins_pipe(ialu_reg_reg);
9141 9133 %}
9142 9134
9143 9135
9144 9136 // Logical Shift Right by variable
9145 9137 instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9146 9138 match(Set dst (URShiftI dst shift));
9147 9139 effect(KILL cr);
9148 9140
9149 9141 size(2);
9150 9142 format %{ "SHR $dst,$shift" %}
9151 9143 opcode(0xD3, 0x5); /* D3 /5 */
9152 9144 ins_encode( OpcP, RegOpc( dst ) );
9153 9145 ins_pipe( ialu_reg_reg );
9154 9146 %}
9155 9147
9156 9148
9157 9149 //----------Logical Instructions-----------------------------------------------
9158 9150 //----------Integer Logical Instructions---------------------------------------
9159 9151 // And Instructions
9160 9152 // And Register with Register
9161 9153 instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9162 9154 match(Set dst (AndI dst src));
9163 9155 effect(KILL cr);
9164 9156
9165 9157 size(2);
9166 9158 format %{ "AND $dst,$src" %}
9167 9159 opcode(0x23);
9168 9160 ins_encode( OpcP, RegReg( dst, src) );
9169 9161 ins_pipe( ialu_reg_reg );
9170 9162 %}
9171 9163
9172 9164 // And Register with Immediate
9173 9165 instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9174 9166 match(Set dst (AndI dst src));
9175 9167 effect(KILL cr);
9176 9168
9177 9169 format %{ "AND $dst,$src" %}
9178 9170 opcode(0x81,0x04); /* Opcode 81 /4 */
9179 9171 // ins_encode( RegImm( dst, src) );
9180 9172 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9181 9173 ins_pipe( ialu_reg );
9182 9174 %}
9183 9175
9184 9176 // And Register with Memory
9185 9177 instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9186 9178 match(Set dst (AndI dst (LoadI src)));
9187 9179 effect(KILL cr);
9188 9180
9189 9181 ins_cost(125);
9190 9182 format %{ "AND $dst,$src" %}
9191 9183 opcode(0x23);
9192 9184 ins_encode( OpcP, RegMem( dst, src) );
9193 9185 ins_pipe( ialu_reg_mem );
9194 9186 %}
9195 9187
9196 9188 // And Memory with Register
9197 9189 instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9198 9190 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9199 9191 effect(KILL cr);
9200 9192
9201 9193 ins_cost(150);
9202 9194 format %{ "AND $dst,$src" %}
9203 9195 opcode(0x21); /* Opcode 21 /r */
9204 9196 ins_encode( OpcP, RegMem( src, dst ) );
9205 9197 ins_pipe( ialu_mem_reg );
9206 9198 %}
9207 9199
9208 9200 // And Memory with Immediate
9209 9201 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9210 9202 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9211 9203 effect(KILL cr);
9212 9204
9213 9205 ins_cost(125);
9214 9206 format %{ "AND $dst,$src" %}
9215 9207 opcode(0x81, 0x4); /* Opcode 81 /4 id */
9216 9208 // ins_encode( MemImm( dst, src) );
9217 9209 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9218 9210 ins_pipe( ialu_mem_imm );
9219 9211 %}
9220 9212
9221 9213 // Or Instructions
9222 9214 // Or Register with Register
9223 9215 instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9224 9216 match(Set dst (OrI dst src));
9225 9217 effect(KILL cr);
9226 9218
9227 9219 size(2);
9228 9220 format %{ "OR $dst,$src" %}
9229 9221 opcode(0x0B);
9230 9222 ins_encode( OpcP, RegReg( dst, src) );
9231 9223 ins_pipe( ialu_reg_reg );
9232 9224 %}
9233 9225
9234 9226 instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
9235 9227 match(Set dst (OrI dst (CastP2X src)));
9236 9228 effect(KILL cr);
9237 9229
9238 9230 size(2);
9239 9231 format %{ "OR $dst,$src" %}
9240 9232 opcode(0x0B);
9241 9233 ins_encode( OpcP, RegReg( dst, src) );
9242 9234 ins_pipe( ialu_reg_reg );
9243 9235 %}
9244 9236
9245 9237
9246 9238 // Or Register with Immediate
9247 9239 instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9248 9240 match(Set dst (OrI dst src));
9249 9241 effect(KILL cr);
9250 9242
9251 9243 format %{ "OR $dst,$src" %}
9252 9244 opcode(0x81,0x01); /* Opcode 81 /1 id */
9253 9245 // ins_encode( RegImm( dst, src) );
9254 9246 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9255 9247 ins_pipe( ialu_reg );
9256 9248 %}
9257 9249
9258 9250 // Or Register with Memory
9259 9251 instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9260 9252 match(Set dst (OrI dst (LoadI src)));
9261 9253 effect(KILL cr);
9262 9254
9263 9255 ins_cost(125);
9264 9256 format %{ "OR $dst,$src" %}
9265 9257 opcode(0x0B);
9266 9258 ins_encode( OpcP, RegMem( dst, src) );
9267 9259 ins_pipe( ialu_reg_mem );
9268 9260 %}
9269 9261
9270 9262 // Or Memory with Register
9271 9263 instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9272 9264 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9273 9265 effect(KILL cr);
9274 9266
9275 9267 ins_cost(150);
9276 9268 format %{ "OR $dst,$src" %}
9277 9269 opcode(0x09); /* Opcode 09 /r */
9278 9270 ins_encode( OpcP, RegMem( src, dst ) );
9279 9271 ins_pipe( ialu_mem_reg );
9280 9272 %}
9281 9273
9282 9274 // Or Memory with Immediate
9283 9275 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9284 9276 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9285 9277 effect(KILL cr);
9286 9278
9287 9279 ins_cost(125);
9288 9280 format %{ "OR $dst,$src" %}
9289 9281 opcode(0x81,0x1); /* Opcode 81 /1 id */
9290 9282 // ins_encode( MemImm( dst, src) );
9291 9283 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9292 9284 ins_pipe( ialu_mem_imm );
9293 9285 %}
9294 9286
9295 9287 // ROL/ROR
9296 9288 // ROL expand
9297 9289 instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9298 9290 effect(USE_DEF dst, USE shift, KILL cr);
9299 9291
9300 9292 format %{ "ROL $dst, $shift" %}
9301 9293 opcode(0xD1, 0x0); /* Opcode D1 /0 */
9302 9294 ins_encode( OpcP, RegOpc( dst ));
9303 9295 ins_pipe( ialu_reg );
9304 9296 %}
9305 9297
9306 9298 instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9307 9299 effect(USE_DEF dst, USE shift, KILL cr);
9308 9300
9309 9301 format %{ "ROL $dst, $shift" %}
9310 9302 opcode(0xC1, 0x0); /*Opcode /C1 /0 */
9311 9303 ins_encode( RegOpcImm(dst, shift) );
9312 9304 ins_pipe(ialu_reg);
9313 9305 %}
9314 9306
9315 9307 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
9316 9308 effect(USE_DEF dst, USE shift, KILL cr);
9317 9309
9318 9310 format %{ "ROL $dst, $shift" %}
9319 9311 opcode(0xD3, 0x0); /* Opcode D3 /0 */
9320 9312 ins_encode(OpcP, RegOpc(dst));
9321 9313 ins_pipe( ialu_reg_reg );
9322 9314 %}
9323 9315 // end of ROL expand
9324 9316
9325 9317 // ROL 32bit by one once
9326 9318 instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
9327 9319 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9328 9320
9329 9321 expand %{
9330 9322 rolI_eReg_imm1(dst, lshift, cr);
9331 9323 %}
9332 9324 %}
9333 9325
9334 9326 // ROL 32bit var by imm8 once
9335 9327 instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
9336 9328 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9337 9329 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9338 9330
9339 9331 expand %{
9340 9332 rolI_eReg_imm8(dst, lshift, cr);
9341 9333 %}
9342 9334 %}
9343 9335
9344 9336 // ROL 32bit var by var once
9345 9337 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9346 9338 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9347 9339
9348 9340 expand %{
9349 9341 rolI_eReg_CL(dst, shift, cr);
9350 9342 %}
9351 9343 %}
9352 9344
9353 9345 // ROL 32bit var by var once
9354 9346 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9355 9347 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9356 9348
9357 9349 expand %{
9358 9350 rolI_eReg_CL(dst, shift, cr);
9359 9351 %}
9360 9352 %}
9361 9353
9362 9354 // ROR expand
9363 9355 instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9364 9356 effect(USE_DEF dst, USE shift, KILL cr);
9365 9357
9366 9358 format %{ "ROR $dst, $shift" %}
9367 9359 opcode(0xD1,0x1); /* Opcode D1 /1 */
9368 9360 ins_encode( OpcP, RegOpc( dst ) );
9369 9361 ins_pipe( ialu_reg );
9370 9362 %}
9371 9363
9372 9364 instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9373 9365 effect (USE_DEF dst, USE shift, KILL cr);
9374 9366
9375 9367 format %{ "ROR $dst, $shift" %}
9376 9368 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
9377 9369 ins_encode( RegOpcImm(dst, shift) );
9378 9370 ins_pipe( ialu_reg );
9379 9371 %}
9380 9372
9381 9373 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
9382 9374 effect(USE_DEF dst, USE shift, KILL cr);
9383 9375
9384 9376 format %{ "ROR $dst, $shift" %}
9385 9377 opcode(0xD3, 0x1); /* Opcode D3 /1 */
9386 9378 ins_encode(OpcP, RegOpc(dst));
9387 9379 ins_pipe( ialu_reg_reg );
9388 9380 %}
9389 9381 // end of ROR expand
9390 9382
9391 9383 // ROR right once
9392 9384 instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
9393 9385 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9394 9386
9395 9387 expand %{
9396 9388 rorI_eReg_imm1(dst, rshift, cr);
9397 9389 %}
9398 9390 %}
9399 9391
9400 9392 // ROR 32bit by immI8 once
9401 9393 instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
9402 9394 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9403 9395 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9404 9396
9405 9397 expand %{
9406 9398 rorI_eReg_imm8(dst, rshift, cr);
9407 9399 %}
9408 9400 %}
9409 9401
9410 9402 // ROR 32bit var by var once
9411 9403 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9412 9404 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9413 9405
9414 9406 expand %{
9415 9407 rorI_eReg_CL(dst, shift, cr);
9416 9408 %}
9417 9409 %}
9418 9410
9419 9411 // ROR 32bit var by var once
9420 9412 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9421 9413 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9422 9414
9423 9415 expand %{
9424 9416 rorI_eReg_CL(dst, shift, cr);
9425 9417 %}
9426 9418 %}
9427 9419
9428 9420 // Xor Instructions
9429 9421 // Xor Register with Register
9430 9422 instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9431 9423 match(Set dst (XorI dst src));
9432 9424 effect(KILL cr);
9433 9425
9434 9426 size(2);
9435 9427 format %{ "XOR $dst,$src" %}
9436 9428 opcode(0x33);
9437 9429 ins_encode( OpcP, RegReg( dst, src) );
9438 9430 ins_pipe( ialu_reg_reg );
9439 9431 %}
9440 9432
9441 9433 // Xor Register with Immediate -1
9442 9434 instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
9443 9435 match(Set dst (XorI dst imm));
9444 9436
9445 9437 size(2);
9446 9438 format %{ "NOT $dst" %}
9447 9439 ins_encode %{
9448 9440 __ notl($dst$$Register);
9449 9441 %}
9450 9442 ins_pipe( ialu_reg );
9451 9443 %}
9452 9444
9453 9445 // Xor Register with Immediate
9454 9446 instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9455 9447 match(Set dst (XorI dst src));
9456 9448 effect(KILL cr);
9457 9449
9458 9450 format %{ "XOR $dst,$src" %}
9459 9451 opcode(0x81,0x06); /* Opcode 81 /6 id */
9460 9452 // ins_encode( RegImm( dst, src) );
9461 9453 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9462 9454 ins_pipe( ialu_reg );
9463 9455 %}
9464 9456
9465 9457 // Xor Register with Memory
9466 9458 instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9467 9459 match(Set dst (XorI dst (LoadI src)));
9468 9460 effect(KILL cr);
9469 9461
9470 9462 ins_cost(125);
9471 9463 format %{ "XOR $dst,$src" %}
9472 9464 opcode(0x33);
9473 9465 ins_encode( OpcP, RegMem(dst, src) );
9474 9466 ins_pipe( ialu_reg_mem );
9475 9467 %}
9476 9468
9477 9469 // Xor Memory with Register
9478 9470 instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9479 9471 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9480 9472 effect(KILL cr);
9481 9473
9482 9474 ins_cost(150);
9483 9475 format %{ "XOR $dst,$src" %}
9484 9476 opcode(0x31); /* Opcode 31 /r */
9485 9477 ins_encode( OpcP, RegMem( src, dst ) );
9486 9478 ins_pipe( ialu_mem_reg );
9487 9479 %}
9488 9480
9489 9481 // Xor Memory with Immediate
9490 9482 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9491 9483 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9492 9484 effect(KILL cr);
9493 9485
9494 9486 ins_cost(125);
9495 9487 format %{ "XOR $dst,$src" %}
9496 9488 opcode(0x81,0x6); /* Opcode 81 /6 id */
9497 9489 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9498 9490 ins_pipe( ialu_mem_imm );
9499 9491 %}
9500 9492
9501 9493 //----------Convert Int to Boolean---------------------------------------------
9502 9494
9503 9495 instruct movI_nocopy(eRegI dst, eRegI src) %{
9504 9496 effect( DEF dst, USE src );
9505 9497 format %{ "MOV $dst,$src" %}
9506 9498 ins_encode( enc_Copy( dst, src) );
9507 9499 ins_pipe( ialu_reg_reg );
9508 9500 %}
9509 9501
9510 9502 instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
9511 9503 effect( USE_DEF dst, USE src, KILL cr );
9512 9504
9513 9505 size(4);
9514 9506 format %{ "NEG $dst\n\t"
9515 9507 "ADC $dst,$src" %}
9516 9508 ins_encode( neg_reg(dst),
9517 9509 OpcRegReg(0x13,dst,src) );
9518 9510 ins_pipe( ialu_reg_reg_long );
9519 9511 %}
9520 9512
9521 9513 instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
9522 9514 match(Set dst (Conv2B src));
9523 9515
9524 9516 expand %{
9525 9517 movI_nocopy(dst,src);
9526 9518 ci2b(dst,src,cr);
9527 9519 %}
9528 9520 %}
9529 9521
9530 9522 instruct movP_nocopy(eRegI dst, eRegP src) %{
9531 9523 effect( DEF dst, USE src );
9532 9524 format %{ "MOV $dst,$src" %}
9533 9525 ins_encode( enc_Copy( dst, src) );
9534 9526 ins_pipe( ialu_reg_reg );
9535 9527 %}
9536 9528
9537 9529 instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
9538 9530 effect( USE_DEF dst, USE src, KILL cr );
9539 9531 format %{ "NEG $dst\n\t"
9540 9532 "ADC $dst,$src" %}
9541 9533 ins_encode( neg_reg(dst),
9542 9534 OpcRegReg(0x13,dst,src) );
9543 9535 ins_pipe( ialu_reg_reg_long );
9544 9536 %}
9545 9537
9546 9538 instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
9547 9539 match(Set dst (Conv2B src));
9548 9540
9549 9541 expand %{
9550 9542 movP_nocopy(dst,src);
9551 9543 cp2b(dst,src,cr);
9552 9544 %}
9553 9545 %}
9554 9546
9555 9547 instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
9556 9548 match(Set dst (CmpLTMask p q));
9557 9549 effect( KILL cr );
9558 9550 ins_cost(400);
9559 9551
9560 9552 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
9561 9553 format %{ "XOR $dst,$dst\n\t"
9562 9554 "CMP $p,$q\n\t"
9563 9555 "SETlt $dst\n\t"
9564 9556 "NEG $dst" %}
9565 9557 ins_encode( OpcRegReg(0x33,dst,dst),
9566 9558 OpcRegReg(0x3B,p,q),
9567 9559 setLT_reg(dst), neg_reg(dst) );
9568 9560 ins_pipe( pipe_slow );
9569 9561 %}
9570 9562
9571 9563 instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9572 9564 match(Set dst (CmpLTMask dst zero));
9573 9565 effect( DEF dst, KILL cr );
9574 9566 ins_cost(100);
9575 9567
9576 9568 format %{ "SAR $dst,31" %}
9577 9569 opcode(0xC1, 0x7); /* C1 /7 ib */
9578 9570 ins_encode( RegOpcImm( dst, 0x1F ) );
9579 9571 ins_pipe( ialu_reg );
9580 9572 %}
9581 9573
9582 9574
9583 9575 instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9584 9576 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9585 9577 effect( KILL tmp, KILL cr );
9586 9578 ins_cost(400);
9587 9579 // annoyingly, $tmp has no edges so you cant ask for it in
9588 9580 // any format or encoding
9589 9581 format %{ "SUB $p,$q\n\t"
9590 9582 "SBB ECX,ECX\n\t"
9591 9583 "AND ECX,$y\n\t"
9592 9584 "ADD $p,ECX" %}
9593 9585 ins_encode( enc_cmpLTP(p,q,y,tmp) );
9594 9586 ins_pipe( pipe_cmplt );
9595 9587 %}
9596 9588
9597 9589 /* If I enable this, I encourage spilling in the inner loop of compress.
9598 9590 instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9599 9591 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9600 9592 effect( USE_KILL tmp, KILL cr );
9601 9593 ins_cost(400);
9602 9594
9603 9595 format %{ "SUB $p,$q\n\t"
9604 9596 "SBB ECX,ECX\n\t"
9605 9597 "AND ECX,$y\n\t"
9606 9598 "ADD $p,ECX" %}
9607 9599 ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9608 9600 %}
9609 9601 */
9610 9602
9611 9603 //----------Long Instructions------------------------------------------------
9612 9604 // Add Long Register with Register
9613 9605 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9614 9606 match(Set dst (AddL dst src));
9615 9607 effect(KILL cr);
9616 9608 ins_cost(200);
9617 9609 format %{ "ADD $dst.lo,$src.lo\n\t"
9618 9610 "ADC $dst.hi,$src.hi" %}
9619 9611 opcode(0x03, 0x13);
9620 9612 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9621 9613 ins_pipe( ialu_reg_reg_long );
9622 9614 %}
9623 9615
9624 9616 // Add Long Register with Immediate
9625 9617 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9626 9618 match(Set dst (AddL dst src));
9627 9619 effect(KILL cr);
9628 9620 format %{ "ADD $dst.lo,$src.lo\n\t"
9629 9621 "ADC $dst.hi,$src.hi" %}
9630 9622 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */
9631 9623 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9632 9624 ins_pipe( ialu_reg_long );
9633 9625 %}
9634 9626
9635 9627 // Add Long Register with Memory
9636 9628 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9637 9629 match(Set dst (AddL dst (LoadL mem)));
9638 9630 effect(KILL cr);
9639 9631 ins_cost(125);
9640 9632 format %{ "ADD $dst.lo,$mem\n\t"
9641 9633 "ADC $dst.hi,$mem+4" %}
9642 9634 opcode(0x03, 0x13);
9643 9635 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9644 9636 ins_pipe( ialu_reg_long_mem );
9645 9637 %}
9646 9638
9647 9639 // Subtract Long Register with Register.
9648 9640 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9649 9641 match(Set dst (SubL dst src));
9650 9642 effect(KILL cr);
9651 9643 ins_cost(200);
9652 9644 format %{ "SUB $dst.lo,$src.lo\n\t"
9653 9645 "SBB $dst.hi,$src.hi" %}
9654 9646 opcode(0x2B, 0x1B);
9655 9647 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9656 9648 ins_pipe( ialu_reg_reg_long );
9657 9649 %}
9658 9650
9659 9651 // Subtract Long Register with Immediate
9660 9652 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9661 9653 match(Set dst (SubL dst src));
9662 9654 effect(KILL cr);
9663 9655 format %{ "SUB $dst.lo,$src.lo\n\t"
9664 9656 "SBB $dst.hi,$src.hi" %}
9665 9657 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */
9666 9658 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9667 9659 ins_pipe( ialu_reg_long );
9668 9660 %}
9669 9661
9670 9662 // Subtract Long Register with Memory
9671 9663 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9672 9664 match(Set dst (SubL dst (LoadL mem)));
9673 9665 effect(KILL cr);
9674 9666 ins_cost(125);
9675 9667 format %{ "SUB $dst.lo,$mem\n\t"
9676 9668 "SBB $dst.hi,$mem+4" %}
9677 9669 opcode(0x2B, 0x1B);
9678 9670 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9679 9671 ins_pipe( ialu_reg_long_mem );
9680 9672 %}
9681 9673
9682 9674 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9683 9675 match(Set dst (SubL zero dst));
9684 9676 effect(KILL cr);
9685 9677 ins_cost(300);
9686 9678 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %}
9687 9679 ins_encode( neg_long(dst) );
9688 9680 ins_pipe( ialu_reg_reg_long );
9689 9681 %}
9690 9682
9691 9683 // And Long Register with Register
9692 9684 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9693 9685 match(Set dst (AndL dst src));
9694 9686 effect(KILL cr);
9695 9687 format %{ "AND $dst.lo,$src.lo\n\t"
9696 9688 "AND $dst.hi,$src.hi" %}
9697 9689 opcode(0x23,0x23);
9698 9690 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9699 9691 ins_pipe( ialu_reg_reg_long );
9700 9692 %}
9701 9693
9702 9694 // And Long Register with Immediate
9703 9695 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9704 9696 match(Set dst (AndL dst src));
9705 9697 effect(KILL cr);
9706 9698 format %{ "AND $dst.lo,$src.lo\n\t"
9707 9699 "AND $dst.hi,$src.hi" %}
9708 9700 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */
9709 9701 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9710 9702 ins_pipe( ialu_reg_long );
9711 9703 %}
9712 9704
9713 9705 // And Long Register with Memory
9714 9706 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9715 9707 match(Set dst (AndL dst (LoadL mem)));
9716 9708 effect(KILL cr);
9717 9709 ins_cost(125);
9718 9710 format %{ "AND $dst.lo,$mem\n\t"
9719 9711 "AND $dst.hi,$mem+4" %}
9720 9712 opcode(0x23, 0x23);
9721 9713 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9722 9714 ins_pipe( ialu_reg_long_mem );
9723 9715 %}
9724 9716
9725 9717 // Or Long Register with Register
9726 9718 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9727 9719 match(Set dst (OrL dst src));
9728 9720 effect(KILL cr);
9729 9721 format %{ "OR $dst.lo,$src.lo\n\t"
9730 9722 "OR $dst.hi,$src.hi" %}
9731 9723 opcode(0x0B,0x0B);
9732 9724 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9733 9725 ins_pipe( ialu_reg_reg_long );
9734 9726 %}
9735 9727
9736 9728 // Or Long Register with Immediate
9737 9729 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9738 9730 match(Set dst (OrL dst src));
9739 9731 effect(KILL cr);
9740 9732 format %{ "OR $dst.lo,$src.lo\n\t"
9741 9733 "OR $dst.hi,$src.hi" %}
9742 9734 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */
9743 9735 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9744 9736 ins_pipe( ialu_reg_long );
9745 9737 %}
9746 9738
9747 9739 // Or Long Register with Memory
9748 9740 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9749 9741 match(Set dst (OrL dst (LoadL mem)));
9750 9742 effect(KILL cr);
9751 9743 ins_cost(125);
9752 9744 format %{ "OR $dst.lo,$mem\n\t"
9753 9745 "OR $dst.hi,$mem+4" %}
9754 9746 opcode(0x0B,0x0B);
9755 9747 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9756 9748 ins_pipe( ialu_reg_long_mem );
9757 9749 %}
9758 9750
9759 9751 // Xor Long Register with Register
9760 9752 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9761 9753 match(Set dst (XorL dst src));
9762 9754 effect(KILL cr);
9763 9755 format %{ "XOR $dst.lo,$src.lo\n\t"
9764 9756 "XOR $dst.hi,$src.hi" %}
9765 9757 opcode(0x33,0x33);
9766 9758 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9767 9759 ins_pipe( ialu_reg_reg_long );
9768 9760 %}
9769 9761
9770 9762 // Xor Long Register with Immediate -1
9771 9763 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9772 9764 match(Set dst (XorL dst imm));
9773 9765 format %{ "NOT $dst.lo\n\t"
9774 9766 "NOT $dst.hi" %}
9775 9767 ins_encode %{
9776 9768 __ notl($dst$$Register);
9777 9769 __ notl(HIGH_FROM_LOW($dst$$Register));
9778 9770 %}
9779 9771 ins_pipe( ialu_reg_long );
9780 9772 %}
9781 9773
9782 9774 // Xor Long Register with Immediate
9783 9775 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9784 9776 match(Set dst (XorL dst src));
9785 9777 effect(KILL cr);
9786 9778 format %{ "XOR $dst.lo,$src.lo\n\t"
9787 9779 "XOR $dst.hi,$src.hi" %}
9788 9780 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */
9789 9781 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9790 9782 ins_pipe( ialu_reg_long );
9791 9783 %}
9792 9784
9793 9785 // Xor Long Register with Memory
9794 9786 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9795 9787 match(Set dst (XorL dst (LoadL mem)));
9796 9788 effect(KILL cr);
9797 9789 ins_cost(125);
9798 9790 format %{ "XOR $dst.lo,$mem\n\t"
9799 9791 "XOR $dst.hi,$mem+4" %}
9800 9792 opcode(0x33,0x33);
9801 9793 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9802 9794 ins_pipe( ialu_reg_long_mem );
9803 9795 %}
9804 9796
9805 9797 // Shift Left Long by 1
9806 9798 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9807 9799 predicate(UseNewLongLShift);
9808 9800 match(Set dst (LShiftL dst cnt));
9809 9801 effect(KILL cr);
9810 9802 ins_cost(100);
9811 9803 format %{ "ADD $dst.lo,$dst.lo\n\t"
9812 9804 "ADC $dst.hi,$dst.hi" %}
9813 9805 ins_encode %{
9814 9806 __ addl($dst$$Register,$dst$$Register);
9815 9807 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9816 9808 %}
9817 9809 ins_pipe( ialu_reg_long );
9818 9810 %}
9819 9811
9820 9812 // Shift Left Long by 2
9821 9813 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9822 9814 predicate(UseNewLongLShift);
9823 9815 match(Set dst (LShiftL dst cnt));
9824 9816 effect(KILL cr);
9825 9817 ins_cost(100);
9826 9818 format %{ "ADD $dst.lo,$dst.lo\n\t"
9827 9819 "ADC $dst.hi,$dst.hi\n\t"
9828 9820 "ADD $dst.lo,$dst.lo\n\t"
9829 9821 "ADC $dst.hi,$dst.hi" %}
9830 9822 ins_encode %{
9831 9823 __ addl($dst$$Register,$dst$$Register);
9832 9824 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9833 9825 __ addl($dst$$Register,$dst$$Register);
9834 9826 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9835 9827 %}
9836 9828 ins_pipe( ialu_reg_long );
9837 9829 %}
9838 9830
9839 9831 // Shift Left Long by 3
9840 9832 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9841 9833 predicate(UseNewLongLShift);
9842 9834 match(Set dst (LShiftL dst cnt));
9843 9835 effect(KILL cr);
9844 9836 ins_cost(100);
9845 9837 format %{ "ADD $dst.lo,$dst.lo\n\t"
9846 9838 "ADC $dst.hi,$dst.hi\n\t"
9847 9839 "ADD $dst.lo,$dst.lo\n\t"
9848 9840 "ADC $dst.hi,$dst.hi\n\t"
9849 9841 "ADD $dst.lo,$dst.lo\n\t"
9850 9842 "ADC $dst.hi,$dst.hi" %}
9851 9843 ins_encode %{
9852 9844 __ addl($dst$$Register,$dst$$Register);
9853 9845 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9854 9846 __ addl($dst$$Register,$dst$$Register);
9855 9847 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9856 9848 __ addl($dst$$Register,$dst$$Register);
9857 9849 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9858 9850 %}
9859 9851 ins_pipe( ialu_reg_long );
9860 9852 %}
9861 9853
9862 9854 // Shift Left Long by 1-31
9863 9855 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9864 9856 match(Set dst (LShiftL dst cnt));
9865 9857 effect(KILL cr);
9866 9858 ins_cost(200);
9867 9859 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t"
9868 9860 "SHL $dst.lo,$cnt" %}
9869 9861 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */
9870 9862 ins_encode( move_long_small_shift(dst,cnt) );
9871 9863 ins_pipe( ialu_reg_long );
9872 9864 %}
9873 9865
9874 9866 // Shift Left Long by 32-63
9875 9867 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9876 9868 match(Set dst (LShiftL dst cnt));
9877 9869 effect(KILL cr);
9878 9870 ins_cost(300);
9879 9871 format %{ "MOV $dst.hi,$dst.lo\n"
9880 9872 "\tSHL $dst.hi,$cnt-32\n"
9881 9873 "\tXOR $dst.lo,$dst.lo" %}
9882 9874 opcode(0xC1, 0x4); /* C1 /4 ib */
9883 9875 ins_encode( move_long_big_shift_clr(dst,cnt) );
9884 9876 ins_pipe( ialu_reg_long );
9885 9877 %}
9886 9878
9887 9879 // Shift Left Long by variable
9888 9880 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9889 9881 match(Set dst (LShiftL dst shift));
9890 9882 effect(KILL cr);
9891 9883 ins_cost(500+200);
9892 9884 size(17);
9893 9885 format %{ "TEST $shift,32\n\t"
9894 9886 "JEQ,s small\n\t"
9895 9887 "MOV $dst.hi,$dst.lo\n\t"
9896 9888 "XOR $dst.lo,$dst.lo\n"
9897 9889 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t"
9898 9890 "SHL $dst.lo,$shift" %}
9899 9891 ins_encode( shift_left_long( dst, shift ) );
9900 9892 ins_pipe( pipe_slow );
9901 9893 %}
9902 9894
9903 9895 // Shift Right Long by 1-31
9904 9896 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9905 9897 match(Set dst (URShiftL dst cnt));
9906 9898 effect(KILL cr);
9907 9899 ins_cost(200);
9908 9900 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9909 9901 "SHR $dst.hi,$cnt" %}
9910 9902 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */
9911 9903 ins_encode( move_long_small_shift(dst,cnt) );
9912 9904 ins_pipe( ialu_reg_long );
9913 9905 %}
9914 9906
9915 9907 // Shift Right Long by 32-63
9916 9908 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9917 9909 match(Set dst (URShiftL dst cnt));
9918 9910 effect(KILL cr);
9919 9911 ins_cost(300);
9920 9912 format %{ "MOV $dst.lo,$dst.hi\n"
9921 9913 "\tSHR $dst.lo,$cnt-32\n"
9922 9914 "\tXOR $dst.hi,$dst.hi" %}
9923 9915 opcode(0xC1, 0x5); /* C1 /5 ib */
9924 9916 ins_encode( move_long_big_shift_clr(dst,cnt) );
9925 9917 ins_pipe( ialu_reg_long );
9926 9918 %}
9927 9919
9928 9920 // Shift Right Long by variable
9929 9921 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9930 9922 match(Set dst (URShiftL dst shift));
9931 9923 effect(KILL cr);
9932 9924 ins_cost(600);
9933 9925 size(17);
9934 9926 format %{ "TEST $shift,32\n\t"
9935 9927 "JEQ,s small\n\t"
9936 9928 "MOV $dst.lo,$dst.hi\n\t"
9937 9929 "XOR $dst.hi,$dst.hi\n"
9938 9930 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9939 9931 "SHR $dst.hi,$shift" %}
9940 9932 ins_encode( shift_right_long( dst, shift ) );
9941 9933 ins_pipe( pipe_slow );
9942 9934 %}
9943 9935
9944 9936 // Shift Right Long by 1-31
9945 9937 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9946 9938 match(Set dst (RShiftL dst cnt));
9947 9939 effect(KILL cr);
9948 9940 ins_cost(200);
9949 9941 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9950 9942 "SAR $dst.hi,$cnt" %}
9951 9943 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */
9952 9944 ins_encode( move_long_small_shift(dst,cnt) );
9953 9945 ins_pipe( ialu_reg_long );
9954 9946 %}
9955 9947
9956 9948 // Shift Right Long by 32-63
9957 9949 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9958 9950 match(Set dst (RShiftL dst cnt));
9959 9951 effect(KILL cr);
9960 9952 ins_cost(300);
9961 9953 format %{ "MOV $dst.lo,$dst.hi\n"
9962 9954 "\tSAR $dst.lo,$cnt-32\n"
9963 9955 "\tSAR $dst.hi,31" %}
9964 9956 opcode(0xC1, 0x7); /* C1 /7 ib */
9965 9957 ins_encode( move_long_big_shift_sign(dst,cnt) );
9966 9958 ins_pipe( ialu_reg_long );
9967 9959 %}
9968 9960
9969 9961 // Shift Right arithmetic Long by variable
9970 9962 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9971 9963 match(Set dst (RShiftL dst shift));
9972 9964 effect(KILL cr);
9973 9965 ins_cost(600);
9974 9966 size(18);
9975 9967 format %{ "TEST $shift,32\n\t"
9976 9968 "JEQ,s small\n\t"
9977 9969 "MOV $dst.lo,$dst.hi\n\t"
9978 9970 "SAR $dst.hi,31\n"
9979 9971 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9980 9972 "SAR $dst.hi,$shift" %}
9981 9973 ins_encode( shift_right_arith_long( dst, shift ) );
9982 9974 ins_pipe( pipe_slow );
9983 9975 %}
9984 9976
9985 9977
9986 9978 //----------Double Instructions------------------------------------------------
9987 9979 // Double Math
9988 9980
9989 9981 // Compare & branch
9990 9982
9991 9983 // P6 version of float compare, sets condition codes in EFLAGS
9992 9984 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9993 9985 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9994 9986 match(Set cr (CmpD src1 src2));
9995 9987 effect(KILL rax);
9996 9988 ins_cost(150);
9997 9989 format %{ "FLD $src1\n\t"
9998 9990 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9999 9991 "JNP exit\n\t"
10000 9992 "MOV ah,1 // saw a NaN, set CF\n\t"
10001 9993 "SAHF\n"
10002 9994 "exit:\tNOP // avoid branch to branch" %}
10003 9995 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10004 9996 ins_encode( Push_Reg_D(src1),
10005 9997 OpcP, RegOpc(src2),
10006 9998 cmpF_P6_fixup );
10007 9999 ins_pipe( pipe_slow );
10008 10000 %}
10009 10001
10010 10002 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
10011 10003 predicate(VM_Version::supports_cmov() && UseSSE <=1);
10012 10004 match(Set cr (CmpD src1 src2));
10013 10005 ins_cost(150);
10014 10006 format %{ "FLD $src1\n\t"
10015 10007 "FUCOMIP ST,$src2 // P6 instruction" %}
10016 10008 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10017 10009 ins_encode( Push_Reg_D(src1),
10018 10010 OpcP, RegOpc(src2));
10019 10011 ins_pipe( pipe_slow );
10020 10012 %}
10021 10013
10022 10014 // Compare & branch
10023 10015 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
10024 10016 predicate(UseSSE<=1);
10025 10017 match(Set cr (CmpD src1 src2));
10026 10018 effect(KILL rax);
10027 10019 ins_cost(200);
10028 10020 format %{ "FLD $src1\n\t"
10029 10021 "FCOMp $src2\n\t"
10030 10022 "FNSTSW AX\n\t"
10031 10023 "TEST AX,0x400\n\t"
10032 10024 "JZ,s flags\n\t"
10033 10025 "MOV AH,1\t# unordered treat as LT\n"
10034 10026 "flags:\tSAHF" %}
10035 10027 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10036 10028 ins_encode( Push_Reg_D(src1),
10037 10029 OpcP, RegOpc(src2),
10038 10030 fpu_flags);
10039 10031 ins_pipe( pipe_slow );
10040 10032 %}
10041 10033
10042 10034 // Compare vs zero into -1,0,1
10043 10035 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
10044 10036 predicate(UseSSE<=1);
10045 10037 match(Set dst (CmpD3 src1 zero));
10046 10038 effect(KILL cr, KILL rax);
10047 10039 ins_cost(280);
10048 10040 format %{ "FTSTD $dst,$src1" %}
10049 10041 opcode(0xE4, 0xD9);
10050 10042 ins_encode( Push_Reg_D(src1),
10051 10043 OpcS, OpcP, PopFPU,
10052 10044 CmpF_Result(dst));
10053 10045 ins_pipe( pipe_slow );
10054 10046 %}
10055 10047
10056 10048 // Compare into -1,0,1
10057 10049 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
10058 10050 predicate(UseSSE<=1);
10059 10051 match(Set dst (CmpD3 src1 src2));
10060 10052 effect(KILL cr, KILL rax);
10061 10053 ins_cost(300);
10062 10054 format %{ "FCMPD $dst,$src1,$src2" %}
10063 10055 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10064 10056 ins_encode( Push_Reg_D(src1),
10065 10057 OpcP, RegOpc(src2),
10066 10058 CmpF_Result(dst));
10067 10059 ins_pipe( pipe_slow );
10068 10060 %}
10069 10061
10070 10062 // float compare and set condition codes in EFLAGS by XMM regs
10071 10063 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
10072 10064 predicate(UseSSE>=2);
10073 10065 match(Set cr (CmpD dst src));
10074 10066 effect(KILL rax);
10075 10067 ins_cost(125);
10076 10068 format %{ "COMISD $dst,$src\n"
10077 10069 "\tJNP exit\n"
10078 10070 "\tMOV ah,1 // saw a NaN, set CF\n"
10079 10071 "\tSAHF\n"
10080 10072 "exit:\tNOP // avoid branch to branch" %}
10081 10073 opcode(0x66, 0x0F, 0x2F);
10082 10074 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
10083 10075 ins_pipe( pipe_slow );
10084 10076 %}
10085 10077
10086 10078 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
10087 10079 predicate(UseSSE>=2);
10088 10080 match(Set cr (CmpD dst src));
10089 10081 ins_cost(100);
10090 10082 format %{ "COMISD $dst,$src" %}
10091 10083 opcode(0x66, 0x0F, 0x2F);
10092 10084 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
10093 10085 ins_pipe( pipe_slow );
10094 10086 %}
10095 10087
10096 10088 // float compare and set condition codes in EFLAGS by XMM regs
10097 10089 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
10098 10090 predicate(UseSSE>=2);
10099 10091 match(Set cr (CmpD dst (LoadD src)));
10100 10092 effect(KILL rax);
10101 10093 ins_cost(145);
10102 10094 format %{ "COMISD $dst,$src\n"
10103 10095 "\tJNP exit\n"
10104 10096 "\tMOV ah,1 // saw a NaN, set CF\n"
10105 10097 "\tSAHF\n"
10106 10098 "exit:\tNOP // avoid branch to branch" %}
10107 10099 opcode(0x66, 0x0F, 0x2F);
10108 10100 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
10109 10101 ins_pipe( pipe_slow );
10110 10102 %}
10111 10103
10112 10104 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
10113 10105 predicate(UseSSE>=2);
10114 10106 match(Set cr (CmpD dst (LoadD src)));
10115 10107 ins_cost(100);
10116 10108 format %{ "COMISD $dst,$src" %}
10117 10109 opcode(0x66, 0x0F, 0x2F);
10118 10110 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
10119 10111 ins_pipe( pipe_slow );
10120 10112 %}
10121 10113
10122 10114 // Compare into -1,0,1 in XMM
10123 10115 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
10124 10116 predicate(UseSSE>=2);
10125 10117 match(Set dst (CmpD3 src1 src2));
10126 10118 effect(KILL cr);
10127 10119 ins_cost(255);
10128 10120 format %{ "XOR $dst,$dst\n"
10129 10121 "\tCOMISD $src1,$src2\n"
10130 10122 "\tJP,s nan\n"
10131 10123 "\tJEQ,s exit\n"
10132 10124 "\tJA,s inc\n"
10133 10125 "nan:\tDEC $dst\n"
10134 10126 "\tJMP,s exit\n"
10135 10127 "inc:\tINC $dst\n"
10136 10128 "exit:"
10137 10129 %}
10138 10130 opcode(0x66, 0x0F, 0x2F);
10139 10131 ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
10140 10132 CmpX_Result(dst));
10141 10133 ins_pipe( pipe_slow );
10142 10134 %}
10143 10135
10144 10136 // Compare into -1,0,1 in XMM and memory
10145 10137 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
10146 10138 predicate(UseSSE>=2);
10147 10139 match(Set dst (CmpD3 src1 (LoadD mem)));
10148 10140 effect(KILL cr);
10149 10141 ins_cost(275);
10150 10142 format %{ "COMISD $src1,$mem\n"
10151 10143 "\tMOV $dst,0\t\t# do not blow flags\n"
10152 10144 "\tJP,s nan\n"
10153 10145 "\tJEQ,s exit\n"
10154 10146 "\tJA,s inc\n"
10155 10147 "nan:\tDEC $dst\n"
10156 10148 "\tJMP,s exit\n"
10157 10149 "inc:\tINC $dst\n"
10158 10150 "exit:"
10159 10151 %}
10160 10152 opcode(0x66, 0x0F, 0x2F);
10161 10153 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
10162 10154 LdImmI(dst,0x0), CmpX_Result(dst));
10163 10155 ins_pipe( pipe_slow );
10164 10156 %}
10165 10157
10166 10158
10167 10159 instruct subD_reg(regD dst, regD src) %{
10168 10160 predicate (UseSSE <=1);
10169 10161 match(Set dst (SubD dst src));
10170 10162
10171 10163 format %{ "FLD $src\n\t"
10172 10164 "DSUBp $dst,ST" %}
10173 10165 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10174 10166 ins_cost(150);
10175 10167 ins_encode( Push_Reg_D(src),
10176 10168 OpcP, RegOpc(dst) );
10177 10169 ins_pipe( fpu_reg_reg );
10178 10170 %}
10179 10171
10180 10172 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10181 10173 predicate (UseSSE <=1);
10182 10174 match(Set dst (RoundDouble (SubD src1 src2)));
10183 10175 ins_cost(250);
10184 10176
10185 10177 format %{ "FLD $src2\n\t"
10186 10178 "DSUB ST,$src1\n\t"
10187 10179 "FSTP_D $dst\t# D-round" %}
10188 10180 opcode(0xD8, 0x5);
10189 10181 ins_encode( Push_Reg_D(src2),
10190 10182 OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10191 10183 ins_pipe( fpu_mem_reg_reg );
10192 10184 %}
10193 10185
10194 10186
10195 10187 instruct subD_reg_mem(regD dst, memory src) %{
10196 10188 predicate (UseSSE <=1);
10197 10189 match(Set dst (SubD dst (LoadD src)));
10198 10190 ins_cost(150);
10199 10191
10200 10192 format %{ "FLD $src\n\t"
10201 10193 "DSUBp $dst,ST" %}
10202 10194 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
10203 10195 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10204 10196 OpcP, RegOpc(dst) );
10205 10197 ins_pipe( fpu_reg_mem );
10206 10198 %}
10207 10199
10208 10200 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
10209 10201 predicate (UseSSE<=1);
10210 10202 match(Set dst (AbsD src));
10211 10203 ins_cost(100);
10212 10204 format %{ "FABS" %}
10213 10205 opcode(0xE1, 0xD9);
10214 10206 ins_encode( OpcS, OpcP );
10215 10207 ins_pipe( fpu_reg_reg );
10216 10208 %}
10217 10209
10218 10210 instruct absXD_reg( regXD dst ) %{
10219 10211 predicate(UseSSE>=2);
10220 10212 match(Set dst (AbsD dst));
10221 10213 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
10222 10214 ins_encode( AbsXD_encoding(dst));
10223 10215 ins_pipe( pipe_slow );
10224 10216 %}
10225 10217
10226 10218 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
10227 10219 predicate(UseSSE<=1);
10228 10220 match(Set dst (NegD src));
10229 10221 ins_cost(100);
10230 10222 format %{ "FCHS" %}
10231 10223 opcode(0xE0, 0xD9);
10232 10224 ins_encode( OpcS, OpcP );
10233 10225 ins_pipe( fpu_reg_reg );
10234 10226 %}
10235 10227
10236 10228 instruct negXD_reg( regXD dst ) %{
10237 10229 predicate(UseSSE>=2);
10238 10230 match(Set dst (NegD dst));
10239 10231 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
10240 10232 ins_encode %{
10241 10233 __ xorpd($dst$$XMMRegister,
10242 10234 ExternalAddress((address)double_signflip_pool));
10243 10235 %}
10244 10236 ins_pipe( pipe_slow );
10245 10237 %}
10246 10238
10247 10239 instruct addD_reg(regD dst, regD src) %{
10248 10240 predicate(UseSSE<=1);
10249 10241 match(Set dst (AddD dst src));
10250 10242 format %{ "FLD $src\n\t"
10251 10243 "DADD $dst,ST" %}
10252 10244 size(4);
10253 10245 ins_cost(150);
10254 10246 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10255 10247 ins_encode( Push_Reg_D(src),
10256 10248 OpcP, RegOpc(dst) );
10257 10249 ins_pipe( fpu_reg_reg );
10258 10250 %}
10259 10251
10260 10252
10261 10253 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10262 10254 predicate(UseSSE<=1);
10263 10255 match(Set dst (RoundDouble (AddD src1 src2)));
10264 10256 ins_cost(250);
10265 10257
10266 10258 format %{ "FLD $src2\n\t"
10267 10259 "DADD ST,$src1\n\t"
10268 10260 "FSTP_D $dst\t# D-round" %}
10269 10261 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
10270 10262 ins_encode( Push_Reg_D(src2),
10271 10263 OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10272 10264 ins_pipe( fpu_mem_reg_reg );
10273 10265 %}
10274 10266
10275 10267
10276 10268 instruct addD_reg_mem(regD dst, memory src) %{
10277 10269 predicate(UseSSE<=1);
10278 10270 match(Set dst (AddD dst (LoadD src)));
10279 10271 ins_cost(150);
10280 10272
10281 10273 format %{ "FLD $src\n\t"
10282 10274 "DADDp $dst,ST" %}
10283 10275 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
10284 10276 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10285 10277 OpcP, RegOpc(dst) );
10286 10278 ins_pipe( fpu_reg_mem );
10287 10279 %}
10288 10280
10289 10281 // add-to-memory
10290 10282 instruct addD_mem_reg(memory dst, regD src) %{
10291 10283 predicate(UseSSE<=1);
10292 10284 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
10293 10285 ins_cost(150);
10294 10286
10295 10287 format %{ "FLD_D $dst\n\t"
↓ open down ↓ |
3021 lines elided |
↑ open up ↑ |
10296 10288 "DADD ST,$src\n\t"
10297 10289 "FST_D $dst" %}
10298 10290 opcode(0xDD, 0x0);
10299 10291 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
10300 10292 Opcode(0xD8), RegOpc(src),
10301 10293 set_instruction_start,
10302 10294 Opcode(0xDD), RMopc_Mem(0x03,dst) );
10303 10295 ins_pipe( fpu_reg_mem );
10304 10296 %}
10305 10297
10306 -instruct addD_reg_imm1(regD dst, immD1 src) %{
10298 +instruct addD_reg_imm1(regD dst, immD1 con) %{
10307 10299 predicate(UseSSE<=1);
10308 - match(Set dst (AddD dst src));
10300 + match(Set dst (AddD dst con));
10309 10301 ins_cost(125);
10310 10302 format %{ "FLD1\n\t"
10311 10303 "DADDp $dst,ST" %}
10312 - opcode(0xDE, 0x00);
10313 - ins_encode( LdImmD(src),
10314 - OpcP, RegOpc(dst) );
10315 - ins_pipe( fpu_reg );
10304 + ins_encode %{
10305 + __ fld1();
10306 + __ faddp($dst$$reg);
10307 + %}
10308 + ins_pipe(fpu_reg);
10316 10309 %}
10317 10310
10318 -instruct addD_reg_imm(regD dst, immD src) %{
10311 +instruct addD_reg_imm(regD dst, immD con) %{
10319 10312 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10320 - match(Set dst (AddD dst src));
10313 + match(Set dst (AddD dst con));
10321 10314 ins_cost(200);
10322 - format %{ "FLD_D [$src]\n\t"
10315 + format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10323 10316 "DADDp $dst,ST" %}
10324 - opcode(0xDE, 0x00); /* DE /0 */
10325 - ins_encode( LdImmD(src),
10326 - OpcP, RegOpc(dst));
10327 - ins_pipe( fpu_reg_mem );
10317 + ins_encode %{
10318 + __ fld_d($constantaddress($con));
10319 + __ faddp($dst$$reg);
10320 + %}
10321 + ins_pipe(fpu_reg_mem);
10328 10322 %}
10329 10323
10330 10324 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
10331 10325 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
10332 10326 match(Set dst (RoundDouble (AddD src con)));
10333 10327 ins_cost(200);
10334 - format %{ "FLD_D [$con]\n\t"
10328 + format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10335 10329 "DADD ST,$src\n\t"
10336 10330 "FSTP_D $dst\t# D-round" %}
10337 - opcode(0xD8, 0x00); /* D8 /0 */
10338 - ins_encode( LdImmD(con),
10339 - OpcP, RegOpc(src), Pop_Mem_D(dst));
10340 - ins_pipe( fpu_mem_reg_con );
10331 + ins_encode %{
10332 + __ fld_d($constantaddress($con));
10333 + __ fadd($src$$reg);
10334 + __ fstp_d(Address(rsp, $dst$$disp));
10335 + %}
10336 + ins_pipe(fpu_mem_reg_con);
10341 10337 %}
10342 10338
10343 10339 // Add two double precision floating point values in xmm
10344 10340 instruct addXD_reg(regXD dst, regXD src) %{
10345 10341 predicate(UseSSE>=2);
10346 10342 match(Set dst (AddD dst src));
10347 10343 format %{ "ADDSD $dst,$src" %}
10348 10344 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10349 10345 ins_pipe( pipe_slow );
10350 10346 %}
10351 10347
10352 10348 instruct addXD_imm(regXD dst, immXD con) %{
10353 10349 predicate(UseSSE>=2);
10354 10350 match(Set dst (AddD dst con));
10355 - format %{ "ADDSD $dst,[$con]" %}
10356 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) );
10357 - ins_pipe( pipe_slow );
10351 + format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10352 + ins_encode %{
10353 + __ addsd($dst$$XMMRegister, $constantaddress($con));
10354 + %}
10355 + ins_pipe(pipe_slow);
10358 10356 %}
10359 10357
10360 10358 instruct addXD_mem(regXD dst, memory mem) %{
10361 10359 predicate(UseSSE>=2);
10362 10360 match(Set dst (AddD dst (LoadD mem)));
10363 10361 format %{ "ADDSD $dst,$mem" %}
10364 10362 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10365 10363 ins_pipe( pipe_slow );
10366 10364 %}
10367 10365
10368 10366 // Sub two double precision floating point values in xmm
10369 10367 instruct subXD_reg(regXD dst, regXD src) %{
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
10370 10368 predicate(UseSSE>=2);
10371 10369 match(Set dst (SubD dst src));
10372 10370 format %{ "SUBSD $dst,$src" %}
10373 10371 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10374 10372 ins_pipe( pipe_slow );
10375 10373 %}
10376 10374
10377 10375 instruct subXD_imm(regXD dst, immXD con) %{
10378 10376 predicate(UseSSE>=2);
10379 10377 match(Set dst (SubD dst con));
10380 - format %{ "SUBSD $dst,[$con]" %}
10381 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) );
10382 - ins_pipe( pipe_slow );
10378 + format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10379 + ins_encode %{
10380 + __ subsd($dst$$XMMRegister, $constantaddress($con));
10381 + %}
10382 + ins_pipe(pipe_slow);
10383 10383 %}
10384 10384
10385 10385 instruct subXD_mem(regXD dst, memory mem) %{
10386 10386 predicate(UseSSE>=2);
10387 10387 match(Set dst (SubD dst (LoadD mem)));
10388 10388 format %{ "SUBSD $dst,$mem" %}
10389 10389 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10390 10390 ins_pipe( pipe_slow );
10391 10391 %}
10392 10392
10393 10393 // Mul two double precision floating point values in xmm
10394 10394 instruct mulXD_reg(regXD dst, regXD src) %{
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
10395 10395 predicate(UseSSE>=2);
10396 10396 match(Set dst (MulD dst src));
10397 10397 format %{ "MULSD $dst,$src" %}
10398 10398 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10399 10399 ins_pipe( pipe_slow );
10400 10400 %}
10401 10401
10402 10402 instruct mulXD_imm(regXD dst, immXD con) %{
10403 10403 predicate(UseSSE>=2);
10404 10404 match(Set dst (MulD dst con));
10405 - format %{ "MULSD $dst,[$con]" %}
10406 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) );
10407 - ins_pipe( pipe_slow );
10405 + format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10406 + ins_encode %{
10407 + __ mulsd($dst$$XMMRegister, $constantaddress($con));
10408 + %}
10409 + ins_pipe(pipe_slow);
10408 10410 %}
10409 10411
10410 10412 instruct mulXD_mem(regXD dst, memory mem) %{
10411 10413 predicate(UseSSE>=2);
10412 10414 match(Set dst (MulD dst (LoadD mem)));
10413 10415 format %{ "MULSD $dst,$mem" %}
10414 10416 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10415 10417 ins_pipe( pipe_slow );
10416 10418 %}
10417 10419
10418 10420 // Div two double precision floating point values in xmm
10419 10421 instruct divXD_reg(regXD dst, regXD src) %{
10420 10422 predicate(UseSSE>=2);
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
10421 10423 match(Set dst (DivD dst src));
10422 10424 format %{ "DIVSD $dst,$src" %}
10423 10425 opcode(0xF2, 0x0F, 0x5E);
10424 10426 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10425 10427 ins_pipe( pipe_slow );
10426 10428 %}
10427 10429
10428 10430 instruct divXD_imm(regXD dst, immXD con) %{
10429 10431 predicate(UseSSE>=2);
10430 10432 match(Set dst (DivD dst con));
10431 - format %{ "DIVSD $dst,[$con]" %}
10432 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con));
10433 - ins_pipe( pipe_slow );
10433 + format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10434 + ins_encode %{
10435 + __ divsd($dst$$XMMRegister, $constantaddress($con));
10436 + %}
10437 + ins_pipe(pipe_slow);
10434 10438 %}
10435 10439
10436 10440 instruct divXD_mem(regXD dst, memory mem) %{
10437 10441 predicate(UseSSE>=2);
10438 10442 match(Set dst (DivD dst (LoadD mem)));
10439 10443 format %{ "DIVSD $dst,$mem" %}
10440 10444 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10441 10445 ins_pipe( pipe_slow );
10442 10446 %}
10443 10447
10444 10448
10445 10449 instruct mulD_reg(regD dst, regD src) %{
10446 10450 predicate(UseSSE<=1);
10447 10451 match(Set dst (MulD dst src));
10448 10452 format %{ "FLD $src\n\t"
10449 10453 "DMULp $dst,ST" %}
10450 10454 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10451 10455 ins_cost(150);
10452 10456 ins_encode( Push_Reg_D(src),
10453 10457 OpcP, RegOpc(dst) );
10454 10458 ins_pipe( fpu_reg_reg );
10455 10459 %}
10456 10460
10457 10461 // Strict FP instruction biases argument before multiply then
10458 10462 // biases result to avoid double rounding of subnormals.
10459 10463 //
10460 10464 // scale arg1 by multiplying arg1 by 2^(-15360)
10461 10465 // load arg2
10462 10466 // multiply scaled arg1 by arg2
10463 10467 // rescale product by 2^(15360)
10464 10468 //
10465 10469 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
10466 10470 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10467 10471 match(Set dst (MulD dst src));
10468 10472 ins_cost(1); // Select this instruction for all strict FP double multiplies
10469 10473
10470 10474 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10471 10475 "DMULp $dst,ST\n\t"
10472 10476 "FLD $src\n\t"
10473 10477 "DMULp $dst,ST\n\t"
↓ open down ↓ |
30 lines elided |
↑ open up ↑ |
10474 10478 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10475 10479 "DMULp $dst,ST\n\t" %}
10476 10480 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10477 10481 ins_encode( strictfp_bias1(dst),
10478 10482 Push_Reg_D(src),
10479 10483 OpcP, RegOpc(dst),
10480 10484 strictfp_bias2(dst) );
10481 10485 ins_pipe( fpu_reg_reg );
10482 10486 %}
10483 10487
10484 -instruct mulD_reg_imm(regD dst, immD src) %{
10488 +instruct mulD_reg_imm(regD dst, immD con) %{
10485 10489 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10486 - match(Set dst (MulD dst src));
10490 + match(Set dst (MulD dst con));
10487 10491 ins_cost(200);
10488 - format %{ "FLD_D [$src]\n\t"
10492 + format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10489 10493 "DMULp $dst,ST" %}
10490 - opcode(0xDE, 0x1); /* DE /1 */
10491 - ins_encode( LdImmD(src),
10492 - OpcP, RegOpc(dst) );
10493 - ins_pipe( fpu_reg_mem );
10494 + ins_encode %{
10495 + __ fld_d($constantaddress($con));
10496 + __ fmulp($dst$$reg);
10497 + %}
10498 + ins_pipe(fpu_reg_mem);
10494 10499 %}
10495 10500
10496 10501
10497 10502 instruct mulD_reg_mem(regD dst, memory src) %{
10498 10503 predicate( UseSSE<=1 );
10499 10504 match(Set dst (MulD dst (LoadD src)));
10500 10505 ins_cost(200);
10501 10506 format %{ "FLD_D $src\n\t"
10502 10507 "DMULp $dst,ST" %}
10503 10508 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
10504 10509 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10505 10510 OpcP, RegOpc(dst) );
10506 10511 ins_pipe( fpu_reg_mem );
10507 10512 %}
10508 10513
10509 10514 //
10510 10515 // Cisc-alternate to reg-reg multiply
10511 10516 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
10512 10517 predicate( UseSSE<=1 );
10513 10518 match(Set dst (MulD src (LoadD mem)));
10514 10519 ins_cost(250);
10515 10520 format %{ "FLD_D $mem\n\t"
10516 10521 "DMUL ST,$src\n\t"
10517 10522 "FSTP_D $dst" %}
10518 10523 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
10519 10524 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10520 10525 OpcReg_F(src),
10521 10526 Pop_Reg_D(dst) );
10522 10527 ins_pipe( fpu_reg_reg_mem );
10523 10528 %}
10524 10529
10525 10530
10526 10531 // MACRO3 -- addD a mulD
10527 10532 // This instruction is a '2-address' instruction in that the result goes
10528 10533 // back to src2. This eliminates a move from the macro; possibly the
10529 10534 // register allocator will have to add it back (and maybe not).
10530 10535 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
10531 10536 predicate( UseSSE<=1 );
10532 10537 match(Set src2 (AddD (MulD src0 src1) src2));
10533 10538 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10534 10539 "DMUL ST,$src1\n\t"
10535 10540 "DADDp $src2,ST" %}
10536 10541 ins_cost(250);
10537 10542 opcode(0xDD); /* LoadD DD /0 */
10538 10543 ins_encode( Push_Reg_F(src0),
10539 10544 FMul_ST_reg(src1),
10540 10545 FAddP_reg_ST(src2) );
10541 10546 ins_pipe( fpu_reg_reg_reg );
10542 10547 %}
10543 10548
10544 10549
10545 10550 // MACRO3 -- subD a mulD
10546 10551 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
10547 10552 predicate( UseSSE<=1 );
10548 10553 match(Set src2 (SubD (MulD src0 src1) src2));
10549 10554 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10550 10555 "DMUL ST,$src1\n\t"
10551 10556 "DSUBRp $src2,ST" %}
10552 10557 ins_cost(250);
10553 10558 ins_encode( Push_Reg_F(src0),
10554 10559 FMul_ST_reg(src1),
10555 10560 Opcode(0xDE), Opc_plus(0xE0,src2));
10556 10561 ins_pipe( fpu_reg_reg_reg );
10557 10562 %}
10558 10563
10559 10564
10560 10565 instruct divD_reg(regD dst, regD src) %{
10561 10566 predicate( UseSSE<=1 );
10562 10567 match(Set dst (DivD dst src));
10563 10568
10564 10569 format %{ "FLD $src\n\t"
10565 10570 "FDIVp $dst,ST" %}
10566 10571 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10567 10572 ins_cost(150);
10568 10573 ins_encode( Push_Reg_D(src),
10569 10574 OpcP, RegOpc(dst) );
10570 10575 ins_pipe( fpu_reg_reg );
10571 10576 %}
10572 10577
10573 10578 // Strict FP instruction biases argument before division then
10574 10579 // biases result, to avoid double rounding of subnormals.
10575 10580 //
10576 10581 // scale dividend by multiplying dividend by 2^(-15360)
10577 10582 // load divisor
10578 10583 // divide scaled dividend by divisor
10579 10584 // rescale quotient by 2^(15360)
10580 10585 //
10581 10586 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
10582 10587 predicate (UseSSE<=1);
10583 10588 match(Set dst (DivD dst src));
10584 10589 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10585 10590 ins_cost(01);
10586 10591
10587 10592 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10588 10593 "DMULp $dst,ST\n\t"
10589 10594 "FLD $src\n\t"
10590 10595 "FDIVp $dst,ST\n\t"
10591 10596 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10592 10597 "DMULp $dst,ST\n\t" %}
10593 10598 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10594 10599 ins_encode( strictfp_bias1(dst),
10595 10600 Push_Reg_D(src),
10596 10601 OpcP, RegOpc(dst),
10597 10602 strictfp_bias2(dst) );
10598 10603 ins_pipe( fpu_reg_reg );
10599 10604 %}
10600 10605
10601 10606 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10602 10607 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10603 10608 match(Set dst (RoundDouble (DivD src1 src2)));
10604 10609
10605 10610 format %{ "FLD $src1\n\t"
10606 10611 "FDIV ST,$src2\n\t"
10607 10612 "FSTP_D $dst\t# D-round" %}
10608 10613 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10609 10614 ins_encode( Push_Reg_D(src1),
10610 10615 OpcP, RegOpc(src2), Pop_Mem_D(dst) );
10611 10616 ins_pipe( fpu_mem_reg_reg );
10612 10617 %}
10613 10618
10614 10619
10615 10620 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
10616 10621 predicate(UseSSE<=1);
10617 10622 match(Set dst (ModD dst src));
10618 10623 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
10619 10624
10620 10625 format %{ "DMOD $dst,$src" %}
10621 10626 ins_cost(250);
10622 10627 ins_encode(Push_Reg_Mod_D(dst, src),
10623 10628 emitModD(),
10624 10629 Push_Result_Mod_D(src),
10625 10630 Pop_Reg_D(dst));
10626 10631 ins_pipe( pipe_slow );
10627 10632 %}
10628 10633
10629 10634 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
10630 10635 predicate(UseSSE>=2);
10631 10636 match(Set dst (ModD src0 src1));
10632 10637 effect(KILL rax, KILL cr);
10633 10638
10634 10639 format %{ "SUB ESP,8\t # DMOD\n"
10635 10640 "\tMOVSD [ESP+0],$src1\n"
10636 10641 "\tFLD_D [ESP+0]\n"
10637 10642 "\tMOVSD [ESP+0],$src0\n"
10638 10643 "\tFLD_D [ESP+0]\n"
10639 10644 "loop:\tFPREM\n"
10640 10645 "\tFWAIT\n"
10641 10646 "\tFNSTSW AX\n"
10642 10647 "\tSAHF\n"
10643 10648 "\tJP loop\n"
10644 10649 "\tFSTP_D [ESP+0]\n"
10645 10650 "\tMOVSD $dst,[ESP+0]\n"
10646 10651 "\tADD ESP,8\n"
10647 10652 "\tFSTP ST0\t # Restore FPU Stack"
10648 10653 %}
10649 10654 ins_cost(250);
10650 10655 ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
10651 10656 ins_pipe( pipe_slow );
10652 10657 %}
10653 10658
10654 10659 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
10655 10660 predicate (UseSSE<=1);
10656 10661 match(Set dst (SinD src));
10657 10662 ins_cost(1800);
10658 10663 format %{ "DSIN $dst" %}
10659 10664 opcode(0xD9, 0xFE);
10660 10665 ins_encode( OpcP, OpcS );
10661 10666 ins_pipe( pipe_slow );
10662 10667 %}
10663 10668
10664 10669 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
10665 10670 predicate (UseSSE>=2);
10666 10671 match(Set dst (SinD dst));
10667 10672 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10668 10673 ins_cost(1800);
10669 10674 format %{ "DSIN $dst" %}
10670 10675 opcode(0xD9, 0xFE);
10671 10676 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10672 10677 ins_pipe( pipe_slow );
10673 10678 %}
10674 10679
10675 10680 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
10676 10681 predicate (UseSSE<=1);
10677 10682 match(Set dst (CosD src));
10678 10683 ins_cost(1800);
10679 10684 format %{ "DCOS $dst" %}
10680 10685 opcode(0xD9, 0xFF);
10681 10686 ins_encode( OpcP, OpcS );
10682 10687 ins_pipe( pipe_slow );
10683 10688 %}
10684 10689
10685 10690 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
10686 10691 predicate (UseSSE>=2);
10687 10692 match(Set dst (CosD dst));
10688 10693 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10689 10694 ins_cost(1800);
10690 10695 format %{ "DCOS $dst" %}
10691 10696 opcode(0xD9, 0xFF);
10692 10697 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10693 10698 ins_pipe( pipe_slow );
10694 10699 %}
10695 10700
10696 10701 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10697 10702 predicate (UseSSE<=1);
10698 10703 match(Set dst(TanD src));
10699 10704 format %{ "DTAN $dst" %}
10700 10705 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
10701 10706 Opcode(0xDD), Opcode(0xD8)); // fstp st
10702 10707 ins_pipe( pipe_slow );
10703 10708 %}
10704 10709
10705 10710 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10706 10711 predicate (UseSSE>=2);
10707 10712 match(Set dst(TanD dst));
10708 10713 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10709 10714 format %{ "DTAN $dst" %}
10710 10715 ins_encode( Push_SrcXD(dst),
10711 10716 Opcode(0xD9), Opcode(0xF2), // fptan
10712 10717 Opcode(0xDD), Opcode(0xD8), // fstp st
10713 10718 Push_ResultXD(dst) );
10714 10719 ins_pipe( pipe_slow );
10715 10720 %}
10716 10721
10717 10722 instruct atanD_reg(regD dst, regD src) %{
10718 10723 predicate (UseSSE<=1);
10719 10724 match(Set dst(AtanD dst src));
10720 10725 format %{ "DATA $dst,$src" %}
10721 10726 opcode(0xD9, 0xF3);
10722 10727 ins_encode( Push_Reg_D(src),
10723 10728 OpcP, OpcS, RegOpc(dst) );
10724 10729 ins_pipe( pipe_slow );
10725 10730 %}
10726 10731
10727 10732 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10728 10733 predicate (UseSSE>=2);
10729 10734 match(Set dst(AtanD dst src));
10730 10735 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10731 10736 format %{ "DATA $dst,$src" %}
10732 10737 opcode(0xD9, 0xF3);
10733 10738 ins_encode( Push_SrcXD(src),
10734 10739 OpcP, OpcS, Push_ResultXD(dst) );
10735 10740 ins_pipe( pipe_slow );
10736 10741 %}
10737 10742
10738 10743 instruct sqrtD_reg(regD dst, regD src) %{
10739 10744 predicate (UseSSE<=1);
10740 10745 match(Set dst (SqrtD src));
10741 10746 format %{ "DSQRT $dst,$src" %}
10742 10747 opcode(0xFA, 0xD9);
10743 10748 ins_encode( Push_Reg_D(src),
10744 10749 OpcS, OpcP, Pop_Reg_D(dst) );
10745 10750 ins_pipe( pipe_slow );
10746 10751 %}
10747 10752
10748 10753 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10749 10754 predicate (UseSSE<=1);
10750 10755 match(Set Y (PowD X Y)); // Raise X to the Yth power
10751 10756 effect(KILL rax, KILL rbx, KILL rcx);
10752 10757 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10753 10758 "FLD_D $X\n\t"
10754 10759 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
10755 10760
10756 10761 "FDUP \t\t\t# Q Q\n\t"
10757 10762 "FRNDINT\t\t\t# int(Q) Q\n\t"
10758 10763 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10759 10764 "FISTP dword [ESP]\n\t"
10760 10765 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10761 10766 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10762 10767 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10763 10768 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10764 10769 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10765 10770 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10766 10771 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10767 10772 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10768 10773 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10769 10774 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10770 10775 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10771 10776 "MOV [ESP+0],0\n\t"
10772 10777 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10773 10778
10774 10779 "ADD ESP,8"
10775 10780 %}
10776 10781 ins_encode( push_stack_temp_qword,
10777 10782 Push_Reg_D(X),
10778 10783 Opcode(0xD9), Opcode(0xF1), // fyl2x
10779 10784 pow_exp_core_encoding,
10780 10785 pop_stack_temp_qword);
10781 10786 ins_pipe( pipe_slow );
10782 10787 %}
10783 10788
10784 10789 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10785 10790 predicate (UseSSE>=2);
10786 10791 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
10787 10792 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10788 10793 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10789 10794 "MOVSD [ESP],$src1\n\t"
10790 10795 "FLD FPR1,$src1\n\t"
10791 10796 "MOVSD [ESP],$src0\n\t"
10792 10797 "FLD FPR1,$src0\n\t"
10793 10798 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t"
10794 10799
10795 10800 "FDUP \t\t\t# Q Q\n\t"
10796 10801 "FRNDINT\t\t\t# int(Q) Q\n\t"
10797 10802 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10798 10803 "FISTP dword [ESP]\n\t"
10799 10804 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10800 10805 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10801 10806 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10802 10807 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10803 10808 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10804 10809 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10805 10810 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10806 10811 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10807 10812 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10808 10813 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10809 10814 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10810 10815 "MOV [ESP+0],0\n\t"
10811 10816 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10812 10817
10813 10818 "FST_D [ESP]\n\t"
10814 10819 "MOVSD $dst,[ESP]\n\t"
10815 10820 "ADD ESP,8"
10816 10821 %}
10817 10822 ins_encode( push_stack_temp_qword,
10818 10823 push_xmm_to_fpr1(src1),
10819 10824 push_xmm_to_fpr1(src0),
10820 10825 Opcode(0xD9), Opcode(0xF1), // fyl2x
10821 10826 pow_exp_core_encoding,
10822 10827 Push_ResultXD(dst) );
10823 10828 ins_pipe( pipe_slow );
10824 10829 %}
10825 10830
10826 10831
10827 10832 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10828 10833 predicate (UseSSE<=1);
10829 10834 match(Set dpr1 (ExpD dpr1));
10830 10835 effect(KILL rax, KILL rbx, KILL rcx);
10831 10836 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding"
10832 10837 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10833 10838 "FMULP \t\t\t# Q=X*log2(e)\n\t"
10834 10839
10835 10840 "FDUP \t\t\t# Q Q\n\t"
10836 10841 "FRNDINT\t\t\t# int(Q) Q\n\t"
10837 10842 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10838 10843 "FISTP dword [ESP]\n\t"
10839 10844 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10840 10845 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10841 10846 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10842 10847 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10843 10848 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10844 10849 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10845 10850 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10846 10851 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10847 10852 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10848 10853 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10849 10854 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10850 10855 "MOV [ESP+0],0\n\t"
10851 10856 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10852 10857
10853 10858 "ADD ESP,8"
10854 10859 %}
10855 10860 ins_encode( push_stack_temp_qword,
10856 10861 Opcode(0xD9), Opcode(0xEA), // fldl2e
10857 10862 Opcode(0xDE), Opcode(0xC9), // fmulp
10858 10863 pow_exp_core_encoding,
10859 10864 pop_stack_temp_qword);
10860 10865 ins_pipe( pipe_slow );
10861 10866 %}
10862 10867
10863 10868 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10864 10869 predicate (UseSSE>=2);
10865 10870 match(Set dst (ExpD src));
10866 10871 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10867 10872 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t"
10868 10873 "MOVSD [ESP],$src\n\t"
10869 10874 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10870 10875 "FMULP \t\t\t# Q=X*log2(e) X\n\t"
10871 10876
10872 10877 "FDUP \t\t\t# Q Q\n\t"
10873 10878 "FRNDINT\t\t\t# int(Q) Q\n\t"
10874 10879 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10875 10880 "FISTP dword [ESP]\n\t"
10876 10881 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10877 10882 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10878 10883 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10879 10884 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t"
10880 10885 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t"
10881 10886 "ADD EAX,1023\t\t# Double exponent bias\n\t"
10882 10887 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t"
10883 10888 "SHL EAX,20\t\t# Shift exponent into place\n\t"
10884 10889 "TEST EBX,ECX\t\t# Check for overflow\n\t"
10885 10890 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10886 10891 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10887 10892 "MOV [ESP+0],0\n\t"
10888 10893 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10889 10894
10890 10895 "FST_D [ESP]\n\t"
10891 10896 "MOVSD $dst,[ESP]\n\t"
10892 10897 "ADD ESP,8"
10893 10898 %}
10894 10899 ins_encode( Push_SrcXD(src),
10895 10900 Opcode(0xD9), Opcode(0xEA), // fldl2e
10896 10901 Opcode(0xDE), Opcode(0xC9), // fmulp
10897 10902 pow_exp_core_encoding,
10898 10903 Push_ResultXD(dst) );
10899 10904 ins_pipe( pipe_slow );
10900 10905 %}
10901 10906
10902 10907
10903 10908
10904 10909 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10905 10910 predicate (UseSSE<=1);
10906 10911 // The source Double operand on FPU stack
10907 10912 match(Set dst (Log10D src));
10908 10913 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10909 10914 // fxch ; swap ST(0) with ST(1)
10910 10915 // fyl2x ; compute log_10(2) * log_2(x)
10911 10916 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10912 10917 "FXCH \n\t"
10913 10918 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10914 10919 %}
10915 10920 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10916 10921 Opcode(0xD9), Opcode(0xC9), // fxch
10917 10922 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10918 10923
10919 10924 ins_pipe( pipe_slow );
10920 10925 %}
10921 10926
10922 10927 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10923 10928 predicate (UseSSE>=2);
10924 10929 effect(KILL cr);
10925 10930 match(Set dst (Log10D src));
10926 10931 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10927 10932 // fyl2x ; compute log_10(2) * log_2(x)
10928 10933 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10929 10934 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10930 10935 %}
10931 10936 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10932 10937 Push_SrcXD(src),
10933 10938 Opcode(0xD9), Opcode(0xF1), // fyl2x
10934 10939 Push_ResultXD(dst));
10935 10940
10936 10941 ins_pipe( pipe_slow );
10937 10942 %}
10938 10943
10939 10944 instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10940 10945 predicate (UseSSE<=1);
10941 10946 // The source Double operand on FPU stack
10942 10947 match(Set dst (LogD src));
10943 10948 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10944 10949 // fxch ; swap ST(0) with ST(1)
10945 10950 // fyl2x ; compute log_e(2) * log_2(x)
10946 10951 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10947 10952 "FXCH \n\t"
10948 10953 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10949 10954 %}
10950 10955 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10951 10956 Opcode(0xD9), Opcode(0xC9), // fxch
10952 10957 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10953 10958
10954 10959 ins_pipe( pipe_slow );
10955 10960 %}
10956 10961
10957 10962 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10958 10963 predicate (UseSSE>=2);
10959 10964 effect(KILL cr);
10960 10965 // The source and result Double operands in XMM registers
10961 10966 match(Set dst (LogD src));
10962 10967 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10963 10968 // fyl2x ; compute log_e(2) * log_2(x)
10964 10969 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10965 10970 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10966 10971 %}
10967 10972 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10968 10973 Push_SrcXD(src),
10969 10974 Opcode(0xD9), Opcode(0xF1), // fyl2x
10970 10975 Push_ResultXD(dst));
10971 10976 ins_pipe( pipe_slow );
10972 10977 %}
10973 10978
10974 10979 //-------------Float Instructions-------------------------------
10975 10980 // Float Math
10976 10981
10977 10982 // Code for float compare:
10978 10983 // fcompp();
10979 10984 // fwait(); fnstsw_ax();
10980 10985 // sahf();
10981 10986 // movl(dst, unordered_result);
10982 10987 // jcc(Assembler::parity, exit);
10983 10988 // movl(dst, less_result);
10984 10989 // jcc(Assembler::below, exit);
10985 10990 // movl(dst, equal_result);
10986 10991 // jcc(Assembler::equal, exit);
10987 10992 // movl(dst, greater_result);
10988 10993 // exit:
10989 10994
10990 10995 // P6 version of float compare, sets condition codes in EFLAGS
10991 10996 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10992 10997 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10993 10998 match(Set cr (CmpF src1 src2));
10994 10999 effect(KILL rax);
10995 11000 ins_cost(150);
10996 11001 format %{ "FLD $src1\n\t"
10997 11002 "FUCOMIP ST,$src2 // P6 instruction\n\t"
10998 11003 "JNP exit\n\t"
10999 11004 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
11000 11005 "SAHF\n"
11001 11006 "exit:\tNOP // avoid branch to branch" %}
11002 11007 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11003 11008 ins_encode( Push_Reg_D(src1),
11004 11009 OpcP, RegOpc(src2),
11005 11010 cmpF_P6_fixup );
11006 11011 ins_pipe( pipe_slow );
11007 11012 %}
11008 11013
11009 11014 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
11010 11015 predicate(VM_Version::supports_cmov() && UseSSE == 0);
11011 11016 match(Set cr (CmpF src1 src2));
11012 11017 ins_cost(100);
11013 11018 format %{ "FLD $src1\n\t"
11014 11019 "FUCOMIP ST,$src2 // P6 instruction" %}
11015 11020 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11016 11021 ins_encode( Push_Reg_D(src1),
11017 11022 OpcP, RegOpc(src2));
11018 11023 ins_pipe( pipe_slow );
11019 11024 %}
11020 11025
11021 11026
11022 11027 // Compare & branch
11023 11028 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
11024 11029 predicate(UseSSE == 0);
11025 11030 match(Set cr (CmpF src1 src2));
11026 11031 effect(KILL rax);
11027 11032 ins_cost(200);
11028 11033 format %{ "FLD $src1\n\t"
11029 11034 "FCOMp $src2\n\t"
11030 11035 "FNSTSW AX\n\t"
11031 11036 "TEST AX,0x400\n\t"
11032 11037 "JZ,s flags\n\t"
11033 11038 "MOV AH,1\t# unordered treat as LT\n"
11034 11039 "flags:\tSAHF" %}
11035 11040 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11036 11041 ins_encode( Push_Reg_D(src1),
11037 11042 OpcP, RegOpc(src2),
11038 11043 fpu_flags);
11039 11044 ins_pipe( pipe_slow );
11040 11045 %}
11041 11046
11042 11047 // Compare vs zero into -1,0,1
11043 11048 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
11044 11049 predicate(UseSSE == 0);
11045 11050 match(Set dst (CmpF3 src1 zero));
11046 11051 effect(KILL cr, KILL rax);
11047 11052 ins_cost(280);
11048 11053 format %{ "FTSTF $dst,$src1" %}
11049 11054 opcode(0xE4, 0xD9);
11050 11055 ins_encode( Push_Reg_D(src1),
11051 11056 OpcS, OpcP, PopFPU,
11052 11057 CmpF_Result(dst));
11053 11058 ins_pipe( pipe_slow );
11054 11059 %}
11055 11060
11056 11061 // Compare into -1,0,1
11057 11062 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11058 11063 predicate(UseSSE == 0);
11059 11064 match(Set dst (CmpF3 src1 src2));
11060 11065 effect(KILL cr, KILL rax);
11061 11066 ins_cost(300);
11062 11067 format %{ "FCMPF $dst,$src1,$src2" %}
11063 11068 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11064 11069 ins_encode( Push_Reg_D(src1),
11065 11070 OpcP, RegOpc(src2),
11066 11071 CmpF_Result(dst));
11067 11072 ins_pipe( pipe_slow );
11068 11073 %}
11069 11074
11070 11075 // float compare and set condition codes in EFLAGS by XMM regs
11071 11076 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
11072 11077 predicate(UseSSE>=1);
11073 11078 match(Set cr (CmpF dst src));
11074 11079 effect(KILL rax);
11075 11080 ins_cost(145);
11076 11081 format %{ "COMISS $dst,$src\n"
11077 11082 "\tJNP exit\n"
11078 11083 "\tMOV ah,1 // saw a NaN, set CF\n"
11079 11084 "\tSAHF\n"
11080 11085 "exit:\tNOP // avoid branch to branch" %}
11081 11086 opcode(0x0F, 0x2F);
11082 11087 ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
11083 11088 ins_pipe( pipe_slow );
11084 11089 %}
11085 11090
11086 11091 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
11087 11092 predicate(UseSSE>=1);
11088 11093 match(Set cr (CmpF dst src));
11089 11094 ins_cost(100);
11090 11095 format %{ "COMISS $dst,$src" %}
11091 11096 opcode(0x0F, 0x2F);
11092 11097 ins_encode(OpcP, OpcS, RegReg(dst, src));
11093 11098 ins_pipe( pipe_slow );
11094 11099 %}
11095 11100
11096 11101 // float compare and set condition codes in EFLAGS by XMM regs
11097 11102 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
11098 11103 predicate(UseSSE>=1);
11099 11104 match(Set cr (CmpF dst (LoadF src)));
11100 11105 effect(KILL rax);
11101 11106 ins_cost(165);
11102 11107 format %{ "COMISS $dst,$src\n"
11103 11108 "\tJNP exit\n"
11104 11109 "\tMOV ah,1 // saw a NaN, set CF\n"
11105 11110 "\tSAHF\n"
11106 11111 "exit:\tNOP // avoid branch to branch" %}
11107 11112 opcode(0x0F, 0x2F);
11108 11113 ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
11109 11114 ins_pipe( pipe_slow );
11110 11115 %}
11111 11116
11112 11117 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
11113 11118 predicate(UseSSE>=1);
11114 11119 match(Set cr (CmpF dst (LoadF src)));
11115 11120 ins_cost(100);
11116 11121 format %{ "COMISS $dst,$src" %}
11117 11122 opcode(0x0F, 0x2F);
11118 11123 ins_encode(OpcP, OpcS, RegMem(dst, src));
11119 11124 ins_pipe( pipe_slow );
11120 11125 %}
11121 11126
11122 11127 // Compare into -1,0,1 in XMM
11123 11128 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
11124 11129 predicate(UseSSE>=1);
11125 11130 match(Set dst (CmpF3 src1 src2));
11126 11131 effect(KILL cr);
11127 11132 ins_cost(255);
11128 11133 format %{ "XOR $dst,$dst\n"
11129 11134 "\tCOMISS $src1,$src2\n"
11130 11135 "\tJP,s nan\n"
11131 11136 "\tJEQ,s exit\n"
11132 11137 "\tJA,s inc\n"
11133 11138 "nan:\tDEC $dst\n"
11134 11139 "\tJMP,s exit\n"
11135 11140 "inc:\tINC $dst\n"
11136 11141 "exit:"
11137 11142 %}
11138 11143 opcode(0x0F, 0x2F);
11139 11144 ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
11140 11145 ins_pipe( pipe_slow );
11141 11146 %}
11142 11147
11143 11148 // Compare into -1,0,1 in XMM and memory
11144 11149 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
11145 11150 predicate(UseSSE>=1);
11146 11151 match(Set dst (CmpF3 src1 (LoadF mem)));
11147 11152 effect(KILL cr);
11148 11153 ins_cost(275);
11149 11154 format %{ "COMISS $src1,$mem\n"
11150 11155 "\tMOV $dst,0\t\t# do not blow flags\n"
11151 11156 "\tJP,s nan\n"
11152 11157 "\tJEQ,s exit\n"
11153 11158 "\tJA,s inc\n"
11154 11159 "nan:\tDEC $dst\n"
11155 11160 "\tJMP,s exit\n"
11156 11161 "inc:\tINC $dst\n"
11157 11162 "exit:"
11158 11163 %}
11159 11164 opcode(0x0F, 0x2F);
11160 11165 ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
11161 11166 ins_pipe( pipe_slow );
11162 11167 %}
11163 11168
11164 11169 // Spill to obtain 24-bit precision
11165 11170 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
11166 11171 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11167 11172 match(Set dst (SubF src1 src2));
11168 11173
11169 11174 format %{ "FSUB $dst,$src1 - $src2" %}
11170 11175 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
11171 11176 ins_encode( Push_Reg_F(src1),
11172 11177 OpcReg_F(src2),
11173 11178 Pop_Mem_F(dst) );
11174 11179 ins_pipe( fpu_mem_reg_reg );
11175 11180 %}
11176 11181 //
11177 11182 // This instruction does not round to 24-bits
11178 11183 instruct subF_reg(regF dst, regF src) %{
11179 11184 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11180 11185 match(Set dst (SubF dst src));
11181 11186
11182 11187 format %{ "FSUB $dst,$src" %}
11183 11188 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
11184 11189 ins_encode( Push_Reg_F(src),
11185 11190 OpcP, RegOpc(dst) );
11186 11191 ins_pipe( fpu_reg_reg );
11187 11192 %}
11188 11193
11189 11194 // Spill to obtain 24-bit precision
11190 11195 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
11191 11196 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11192 11197 match(Set dst (AddF src1 src2));
11193 11198
11194 11199 format %{ "FADD $dst,$src1,$src2" %}
11195 11200 opcode(0xD8, 0x0); /* D8 C0+i */
11196 11201 ins_encode( Push_Reg_F(src2),
11197 11202 OpcReg_F(src1),
11198 11203 Pop_Mem_F(dst) );
11199 11204 ins_pipe( fpu_mem_reg_reg );
11200 11205 %}
11201 11206 //
11202 11207 // This instruction does not round to 24-bits
11203 11208 instruct addF_reg(regF dst, regF src) %{
11204 11209 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11205 11210 match(Set dst (AddF dst src));
11206 11211
11207 11212 format %{ "FLD $src\n\t"
11208 11213 "FADDp $dst,ST" %}
11209 11214 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
11210 11215 ins_encode( Push_Reg_F(src),
11211 11216 OpcP, RegOpc(dst) );
11212 11217 ins_pipe( fpu_reg_reg );
11213 11218 %}
11214 11219
11215 11220 // Add two single precision floating point values in xmm
11216 11221 instruct addX_reg(regX dst, regX src) %{
↓ open down ↓ |
713 lines elided |
↑ open up ↑ |
11217 11222 predicate(UseSSE>=1);
11218 11223 match(Set dst (AddF dst src));
11219 11224 format %{ "ADDSS $dst,$src" %}
11220 11225 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
11221 11226 ins_pipe( pipe_slow );
11222 11227 %}
11223 11228
11224 11229 instruct addX_imm(regX dst, immXF con) %{
11225 11230 predicate(UseSSE>=1);
11226 11231 match(Set dst (AddF dst con));
11227 - format %{ "ADDSS $dst,[$con]" %}
11228 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) );
11229 - ins_pipe( pipe_slow );
11232 + format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11233 + ins_encode %{
11234 + __ addss($dst$$XMMRegister, $constantaddress($con));
11235 + %}
11236 + ins_pipe(pipe_slow);
11230 11237 %}
11231 11238
11232 11239 instruct addX_mem(regX dst, memory mem) %{
11233 11240 predicate(UseSSE>=1);
11234 11241 match(Set dst (AddF dst (LoadF mem)));
11235 11242 format %{ "ADDSS $dst,$mem" %}
11236 11243 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
11237 11244 ins_pipe( pipe_slow );
11238 11245 %}
11239 11246
11240 11247 // Subtract two single precision floating point values in xmm
11241 11248 instruct subX_reg(regX dst, regX src) %{
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
11242 11249 predicate(UseSSE>=1);
11243 11250 match(Set dst (SubF dst src));
11244 11251 format %{ "SUBSS $dst,$src" %}
11245 11252 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
11246 11253 ins_pipe( pipe_slow );
11247 11254 %}
11248 11255
11249 11256 instruct subX_imm(regX dst, immXF con) %{
11250 11257 predicate(UseSSE>=1);
11251 11258 match(Set dst (SubF dst con));
11252 - format %{ "SUBSS $dst,[$con]" %}
11253 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) );
11254 - ins_pipe( pipe_slow );
11259 + format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11260 + ins_encode %{
11261 + __ subss($dst$$XMMRegister, $constantaddress($con));
11262 + %}
11263 + ins_pipe(pipe_slow);
11255 11264 %}
11256 11265
11257 11266 instruct subX_mem(regX dst, memory mem) %{
11258 11267 predicate(UseSSE>=1);
11259 11268 match(Set dst (SubF dst (LoadF mem)));
11260 11269 format %{ "SUBSS $dst,$mem" %}
11261 11270 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
11262 11271 ins_pipe( pipe_slow );
11263 11272 %}
11264 11273
11265 11274 // Multiply two single precision floating point values in xmm
11266 11275 instruct mulX_reg(regX dst, regX src) %{
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
11267 11276 predicate(UseSSE>=1);
11268 11277 match(Set dst (MulF dst src));
11269 11278 format %{ "MULSS $dst,$src" %}
11270 11279 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
11271 11280 ins_pipe( pipe_slow );
11272 11281 %}
11273 11282
11274 11283 instruct mulX_imm(regX dst, immXF con) %{
11275 11284 predicate(UseSSE>=1);
11276 11285 match(Set dst (MulF dst con));
11277 - format %{ "MULSS $dst,[$con]" %}
11278 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) );
11279 - ins_pipe( pipe_slow );
11286 + format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11287 + ins_encode %{
11288 + __ mulss($dst$$XMMRegister, $constantaddress($con));
11289 + %}
11290 + ins_pipe(pipe_slow);
11280 11291 %}
11281 11292
11282 11293 instruct mulX_mem(regX dst, memory mem) %{
11283 11294 predicate(UseSSE>=1);
11284 11295 match(Set dst (MulF dst (LoadF mem)));
11285 11296 format %{ "MULSS $dst,$mem" %}
11286 11297 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
11287 11298 ins_pipe( pipe_slow );
11288 11299 %}
11289 11300
11290 11301 // Divide two single precision floating point values in xmm
11291 11302 instruct divX_reg(regX dst, regX src) %{
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
11292 11303 predicate(UseSSE>=1);
11293 11304 match(Set dst (DivF dst src));
11294 11305 format %{ "DIVSS $dst,$src" %}
11295 11306 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
11296 11307 ins_pipe( pipe_slow );
11297 11308 %}
11298 11309
11299 11310 instruct divX_imm(regX dst, immXF con) %{
11300 11311 predicate(UseSSE>=1);
11301 11312 match(Set dst (DivF dst con));
11302 - format %{ "DIVSS $dst,[$con]" %}
11303 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) );
11304 - ins_pipe( pipe_slow );
11313 + format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11314 + ins_encode %{
11315 + __ divss($dst$$XMMRegister, $constantaddress($con));
11316 + %}
11317 + ins_pipe(pipe_slow);
11305 11318 %}
11306 11319
11307 11320 instruct divX_mem(regX dst, memory mem) %{
11308 11321 predicate(UseSSE>=1);
11309 11322 match(Set dst (DivF dst (LoadF mem)));
11310 11323 format %{ "DIVSS $dst,$mem" %}
11311 11324 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
11312 11325 ins_pipe( pipe_slow );
11313 11326 %}
11314 11327
11315 11328 // Get the square root of a single precision floating point values in xmm
11316 11329 instruct sqrtX_reg(regX dst, regX src) %{
11317 11330 predicate(UseSSE>=1);
11318 11331 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11319 11332 format %{ "SQRTSS $dst,$src" %}
11320 11333 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11321 11334 ins_pipe( pipe_slow );
11322 11335 %}
11323 11336
11324 11337 instruct sqrtX_mem(regX dst, memory mem) %{
11325 11338 predicate(UseSSE>=1);
11326 11339 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
11327 11340 format %{ "SQRTSS $dst,$mem" %}
11328 11341 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11329 11342 ins_pipe( pipe_slow );
11330 11343 %}
11331 11344
11332 11345 // Get the square root of a double precision floating point values in xmm
11333 11346 instruct sqrtXD_reg(regXD dst, regXD src) %{
11334 11347 predicate(UseSSE>=2);
11335 11348 match(Set dst (SqrtD src));
11336 11349 format %{ "SQRTSD $dst,$src" %}
11337 11350 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11338 11351 ins_pipe( pipe_slow );
11339 11352 %}
11340 11353
11341 11354 instruct sqrtXD_mem(regXD dst, memory mem) %{
11342 11355 predicate(UseSSE>=2);
11343 11356 match(Set dst (SqrtD (LoadD mem)));
11344 11357 format %{ "SQRTSD $dst,$mem" %}
11345 11358 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11346 11359 ins_pipe( pipe_slow );
11347 11360 %}
11348 11361
11349 11362 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11350 11363 predicate(UseSSE==0);
11351 11364 match(Set dst (AbsF src));
11352 11365 ins_cost(100);
11353 11366 format %{ "FABS" %}
11354 11367 opcode(0xE1, 0xD9);
11355 11368 ins_encode( OpcS, OpcP );
11356 11369 ins_pipe( fpu_reg_reg );
11357 11370 %}
11358 11371
11359 11372 instruct absX_reg(regX dst ) %{
11360 11373 predicate(UseSSE>=1);
11361 11374 match(Set dst (AbsF dst));
11362 11375 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11363 11376 ins_encode( AbsXF_encoding(dst));
11364 11377 ins_pipe( pipe_slow );
11365 11378 %}
11366 11379
11367 11380 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11368 11381 predicate(UseSSE==0);
11369 11382 match(Set dst (NegF src));
11370 11383 ins_cost(100);
11371 11384 format %{ "FCHS" %}
11372 11385 opcode(0xE0, 0xD9);
11373 11386 ins_encode( OpcS, OpcP );
11374 11387 ins_pipe( fpu_reg_reg );
11375 11388 %}
11376 11389
11377 11390 instruct negX_reg( regX dst ) %{
11378 11391 predicate(UseSSE>=1);
11379 11392 match(Set dst (NegF dst));
11380 11393 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
11381 11394 ins_encode( NegXF_encoding(dst));
11382 11395 ins_pipe( pipe_slow );
11383 11396 %}
11384 11397
11385 11398 // Cisc-alternate to addF_reg
11386 11399 // Spill to obtain 24-bit precision
11387 11400 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11388 11401 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11389 11402 match(Set dst (AddF src1 (LoadF src2)));
11390 11403
11391 11404 format %{ "FLD $src2\n\t"
11392 11405 "FADD ST,$src1\n\t"
11393 11406 "FSTP_S $dst" %}
11394 11407 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11395 11408 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11396 11409 OpcReg_F(src1),
11397 11410 Pop_Mem_F(dst) );
11398 11411 ins_pipe( fpu_mem_reg_mem );
11399 11412 %}
11400 11413 //
11401 11414 // Cisc-alternate to addF_reg
11402 11415 // This instruction does not round to 24-bits
11403 11416 instruct addF_reg_mem(regF dst, memory src) %{
11404 11417 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11405 11418 match(Set dst (AddF dst (LoadF src)));
11406 11419
11407 11420 format %{ "FADD $dst,$src" %}
11408 11421 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
11409 11422 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
11410 11423 OpcP, RegOpc(dst) );
11411 11424 ins_pipe( fpu_reg_mem );
11412 11425 %}
11413 11426
11414 11427 // // Following two instructions for _222_mpegaudio
11415 11428 // Spill to obtain 24-bit precision
11416 11429 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
11417 11430 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11418 11431 match(Set dst (AddF src1 src2));
11419 11432
11420 11433 format %{ "FADD $dst,$src1,$src2" %}
11421 11434 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11422 11435 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11423 11436 OpcReg_F(src2),
11424 11437 Pop_Mem_F(dst) );
11425 11438 ins_pipe( fpu_mem_reg_mem );
11426 11439 %}
11427 11440
11428 11441 // Cisc-spill variant
11429 11442 // Spill to obtain 24-bit precision
11430 11443 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11431 11444 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11432 11445 match(Set dst (AddF src1 (LoadF src2)));
11433 11446
11434 11447 format %{ "FADD $dst,$src1,$src2 cisc" %}
11435 11448 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11436 11449 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11437 11450 set_instruction_start,
11438 11451 OpcP, RMopc_Mem(secondary,src1),
11439 11452 Pop_Mem_F(dst) );
11440 11453 ins_pipe( fpu_mem_mem_mem );
11441 11454 %}
11442 11455
11443 11456 // Spill to obtain 24-bit precision
11444 11457 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11445 11458 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11446 11459 match(Set dst (AddF src1 src2));
11447 11460
11448 11461 format %{ "FADD $dst,$src1,$src2" %}
↓ open down ↓ |
134 lines elided |
↑ open up ↑ |
11449 11462 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
11450 11463 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11451 11464 set_instruction_start,
11452 11465 OpcP, RMopc_Mem(secondary,src1),
11453 11466 Pop_Mem_F(dst) );
11454 11467 ins_pipe( fpu_mem_mem_mem );
11455 11468 %}
11456 11469
11457 11470
11458 11471 // Spill to obtain 24-bit precision
11459 -instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
11472 +instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11460 11473 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11461 - match(Set dst (AddF src1 src2));
11462 - format %{ "FLD $src1\n\t"
11463 - "FADD $src2\n\t"
11474 + match(Set dst (AddF src con));
11475 + format %{ "FLD $src\n\t"
11476 + "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11464 11477 "FSTP_S $dst" %}
11465 - opcode(0xD8, 0x00); /* D8 /0 */
11466 - ins_encode( Push_Reg_F(src1),
11467 - Opc_MemImm_F(src2),
11468 - Pop_Mem_F(dst));
11469 - ins_pipe( fpu_mem_reg_con );
11478 + ins_encode %{
11479 + __ fld_s($src$$reg - 1); // FLD ST(i-1)
11480 + __ fadd_s($constantaddress($con));
11481 + __ fstp_s(Address(rsp, $dst$$disp));
11482 + %}
11483 + ins_pipe(fpu_mem_reg_con);
11470 11484 %}
11471 11485 //
11472 11486 // This instruction does not round to 24-bits
11473 -instruct addF_reg_imm(regF dst, regF src1, immF src2) %{
11487 +instruct addF_reg_imm(regF dst, regF src, immF con) %{
11474 11488 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11475 - match(Set dst (AddF src1 src2));
11476 - format %{ "FLD $src1\n\t"
11477 - "FADD $src2\n\t"
11478 - "FSTP_S $dst" %}
11479 - opcode(0xD8, 0x00); /* D8 /0 */
11480 - ins_encode( Push_Reg_F(src1),
11481 - Opc_MemImm_F(src2),
11482 - Pop_Reg_F(dst));
11483 - ins_pipe( fpu_reg_reg_con );
11489 + match(Set dst (AddF src con));
11490 + format %{ "FLD $src\n\t"
11491 + "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11492 + "FSTP $dst" %}
11493 + ins_encode %{
11494 + __ fld_s($src$$reg - 1); // FLD ST(i-1)
11495 + __ fadd_s($constantaddress($con));
11496 + __ fstp_d($dst$$reg);
11497 + %}
11498 + ins_pipe(fpu_reg_reg_con);
11484 11499 %}
11485 11500
11486 11501 // Spill to obtain 24-bit precision
11487 11502 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
11488 11503 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11489 11504 match(Set dst (MulF src1 src2));
11490 11505
11491 11506 format %{ "FLD $src1\n\t"
11492 11507 "FMUL $src2\n\t"
11493 11508 "FSTP_S $dst" %}
11494 11509 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11495 11510 ins_encode( Push_Reg_F(src1),
11496 11511 OpcReg_F(src2),
11497 11512 Pop_Mem_F(dst) );
11498 11513 ins_pipe( fpu_mem_reg_reg );
11499 11514 %}
11500 11515 //
11501 11516 // This instruction does not round to 24-bits
11502 11517 instruct mulF_reg(regF dst, regF src1, regF src2) %{
11503 11518 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11504 11519 match(Set dst (MulF src1 src2));
11505 11520
11506 11521 format %{ "FLD $src1\n\t"
11507 11522 "FMUL $src2\n\t"
11508 11523 "FSTP_S $dst" %}
11509 11524 opcode(0xD8, 0x1); /* D8 C8+i */
11510 11525 ins_encode( Push_Reg_F(src2),
11511 11526 OpcReg_F(src1),
11512 11527 Pop_Reg_F(dst) );
11513 11528 ins_pipe( fpu_reg_reg_reg );
11514 11529 %}
11515 11530
11516 11531
11517 11532 // Spill to obtain 24-bit precision
11518 11533 // Cisc-alternate to reg-reg multiply
11519 11534 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11520 11535 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11521 11536 match(Set dst (MulF src1 (LoadF src2)));
11522 11537
11523 11538 format %{ "FLD_S $src2\n\t"
11524 11539 "FMUL $src1\n\t"
11525 11540 "FSTP_S $dst" %}
11526 11541 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
11527 11542 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11528 11543 OpcReg_F(src1),
11529 11544 Pop_Mem_F(dst) );
11530 11545 ins_pipe( fpu_mem_reg_mem );
11531 11546 %}
11532 11547 //
11533 11548 // This instruction does not round to 24-bits
11534 11549 // Cisc-alternate to reg-reg multiply
11535 11550 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
11536 11551 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11537 11552 match(Set dst (MulF src1 (LoadF src2)));
11538 11553
11539 11554 format %{ "FMUL $dst,$src1,$src2" %}
11540 11555 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
11541 11556 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11542 11557 OpcReg_F(src1),
11543 11558 Pop_Reg_F(dst) );
11544 11559 ins_pipe( fpu_reg_reg_mem );
11545 11560 %}
11546 11561
11547 11562 // Spill to obtain 24-bit precision
11548 11563 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11549 11564 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11550 11565 match(Set dst (MulF src1 src2));
11551 11566
↓ open down ↓ |
58 lines elided |
↑ open up ↑ |
11552 11567 format %{ "FMUL $dst,$src1,$src2" %}
11553 11568 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
11554 11569 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11555 11570 set_instruction_start,
11556 11571 OpcP, RMopc_Mem(secondary,src1),
11557 11572 Pop_Mem_F(dst) );
11558 11573 ins_pipe( fpu_mem_mem_mem );
11559 11574 %}
11560 11575
11561 11576 // Spill to obtain 24-bit precision
11562 -instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
11577 +instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11563 11578 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11564 - match(Set dst (MulF src1 src2));
11579 + match(Set dst (MulF src con));
11565 11580
11566 - format %{ "FMULc $dst,$src1,$src2" %}
11567 - opcode(0xD8, 0x1); /* D8 /1*/
11568 - ins_encode( Push_Reg_F(src1),
11569 - Opc_MemImm_F(src2),
11570 - Pop_Mem_F(dst));
11571 - ins_pipe( fpu_mem_reg_con );
11581 + format %{ "FLD $src\n\t"
11582 + "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11583 + "FSTP_S $dst" %}
11584 + ins_encode %{
11585 + __ fld_s($src$$reg - 1); // FLD ST(i-1)
11586 + __ fmul_s($constantaddress($con));
11587 + __ fstp_s(Address(rsp, $dst$$disp));
11588 + %}
11589 + ins_pipe(fpu_mem_reg_con);
11572 11590 %}
11573 11591 //
11574 11592 // This instruction does not round to 24-bits
11575 -instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{
11593 +instruct mulF_reg_imm(regF dst, regF src, immF con) %{
11576 11594 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11577 - match(Set dst (MulF src1 src2));
11595 + match(Set dst (MulF src con));
11578 11596
11579 - format %{ "FMULc $dst. $src1, $src2" %}
11580 - opcode(0xD8, 0x1); /* D8 /1*/
11581 - ins_encode( Push_Reg_F(src1),
11582 - Opc_MemImm_F(src2),
11583 - Pop_Reg_F(dst));
11584 - ins_pipe( fpu_reg_reg_con );
11597 + format %{ "FLD $src\n\t"
11598 + "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11599 + "FSTP $dst" %}
11600 + ins_encode %{
11601 + __ fld_s($src$$reg - 1); // FLD ST(i-1)
11602 + __ fmul_s($constantaddress($con));
11603 + __ fstp_d($dst$$reg);
11604 + %}
11605 + ins_pipe(fpu_reg_reg_con);
11585 11606 %}
11586 11607
11587 11608
11588 11609 //
11589 11610 // MACRO1 -- subsume unshared load into mulF
11590 11611 // This instruction does not round to 24-bits
11591 11612 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
11592 11613 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11593 11614 match(Set dst (MulF (LoadF mem1) src));
11594 11615
11595 11616 format %{ "FLD $mem1 ===MACRO1===\n\t"
11596 11617 "FMUL ST,$src\n\t"
11597 11618 "FSTP $dst" %}
11598 11619 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
11599 11620 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11600 11621 OpcReg_F(src),
11601 11622 Pop_Reg_F(dst) );
11602 11623 ins_pipe( fpu_reg_reg_mem );
11603 11624 %}
11604 11625 //
11605 11626 // MACRO2 -- addF a mulF which subsumed an unshared load
11606 11627 // This instruction does not round to 24-bits
11607 11628 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
11608 11629 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11609 11630 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11610 11631 ins_cost(95);
11611 11632
11612 11633 format %{ "FLD $mem1 ===MACRO2===\n\t"
11613 11634 "FMUL ST,$src1 subsume mulF left load\n\t"
11614 11635 "FADD ST,$src2\n\t"
11615 11636 "FSTP $dst" %}
11616 11637 opcode(0xD9); /* LoadF D9 /0 */
11617 11638 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11618 11639 FMul_ST_reg(src1),
11619 11640 FAdd_ST_reg(src2),
11620 11641 Pop_Reg_F(dst) );
11621 11642 ins_pipe( fpu_reg_mem_reg_reg );
11622 11643 %}
11623 11644
11624 11645 // MACRO3 -- addF a mulF
11625 11646 // This instruction does not round to 24-bits. It is a '2-address'
11626 11647 // instruction in that the result goes back to src2. This eliminates
11627 11648 // a move from the macro; possibly the register allocator will have
11628 11649 // to add it back (and maybe not).
11629 11650 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
11630 11651 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11631 11652 match(Set src2 (AddF (MulF src0 src1) src2));
11632 11653
11633 11654 format %{ "FLD $src0 ===MACRO3===\n\t"
11634 11655 "FMUL ST,$src1\n\t"
11635 11656 "FADDP $src2,ST" %}
11636 11657 opcode(0xD9); /* LoadF D9 /0 */
11637 11658 ins_encode( Push_Reg_F(src0),
11638 11659 FMul_ST_reg(src1),
11639 11660 FAddP_reg_ST(src2) );
11640 11661 ins_pipe( fpu_reg_reg_reg );
11641 11662 %}
11642 11663
11643 11664 // MACRO4 -- divF subF
11644 11665 // This instruction does not round to 24-bits
11645 11666 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
11646 11667 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11647 11668 match(Set dst (DivF (SubF src2 src1) src3));
11648 11669
11649 11670 format %{ "FLD $src2 ===MACRO4===\n\t"
11650 11671 "FSUB ST,$src1\n\t"
11651 11672 "FDIV ST,$src3\n\t"
11652 11673 "FSTP $dst" %}
11653 11674 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11654 11675 ins_encode( Push_Reg_F(src2),
11655 11676 subF_divF_encode(src1,src3),
11656 11677 Pop_Reg_F(dst) );
11657 11678 ins_pipe( fpu_reg_reg_reg_reg );
11658 11679 %}
11659 11680
11660 11681 // Spill to obtain 24-bit precision
11661 11682 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
11662 11683 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11663 11684 match(Set dst (DivF src1 src2));
11664 11685
11665 11686 format %{ "FDIV $dst,$src1,$src2" %}
11666 11687 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11667 11688 ins_encode( Push_Reg_F(src1),
11668 11689 OpcReg_F(src2),
11669 11690 Pop_Mem_F(dst) );
11670 11691 ins_pipe( fpu_mem_reg_reg );
11671 11692 %}
11672 11693 //
11673 11694 // This instruction does not round to 24-bits
11674 11695 instruct divF_reg(regF dst, regF src) %{
11675 11696 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11676 11697 match(Set dst (DivF dst src));
11677 11698
11678 11699 format %{ "FDIV $dst,$src" %}
11679 11700 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11680 11701 ins_encode( Push_Reg_F(src),
11681 11702 OpcP, RegOpc(dst) );
11682 11703 ins_pipe( fpu_reg_reg );
11683 11704 %}
11684 11705
11685 11706
11686 11707 // Spill to obtain 24-bit precision
11687 11708 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11688 11709 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11689 11710 match(Set dst (ModF src1 src2));
11690 11711 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11691 11712
11692 11713 format %{ "FMOD $dst,$src1,$src2" %}
11693 11714 ins_encode( Push_Reg_Mod_D(src1, src2),
11694 11715 emitModD(),
11695 11716 Push_Result_Mod_D(src2),
11696 11717 Pop_Mem_F(dst));
11697 11718 ins_pipe( pipe_slow );
11698 11719 %}
11699 11720 //
11700 11721 // This instruction does not round to 24-bits
11701 11722 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11702 11723 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11703 11724 match(Set dst (ModF dst src));
11704 11725 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11705 11726
11706 11727 format %{ "FMOD $dst,$src" %}
11707 11728 ins_encode(Push_Reg_Mod_D(dst, src),
11708 11729 emitModD(),
11709 11730 Push_Result_Mod_D(src),
11710 11731 Pop_Reg_F(dst));
11711 11732 ins_pipe( pipe_slow );
11712 11733 %}
11713 11734
11714 11735 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11715 11736 predicate(UseSSE>=1);
11716 11737 match(Set dst (ModF src0 src1));
11717 11738 effect(KILL rax, KILL cr);
11718 11739 format %{ "SUB ESP,4\t # FMOD\n"
11719 11740 "\tMOVSS [ESP+0],$src1\n"
11720 11741 "\tFLD_S [ESP+0]\n"
11721 11742 "\tMOVSS [ESP+0],$src0\n"
11722 11743 "\tFLD_S [ESP+0]\n"
11723 11744 "loop:\tFPREM\n"
11724 11745 "\tFWAIT\n"
11725 11746 "\tFNSTSW AX\n"
11726 11747 "\tSAHF\n"
11727 11748 "\tJP loop\n"
11728 11749 "\tFSTP_S [ESP+0]\n"
11729 11750 "\tMOVSS $dst,[ESP+0]\n"
11730 11751 "\tADD ESP,4\n"
11731 11752 "\tFSTP ST0\t # Restore FPU Stack"
11732 11753 %}
11733 11754 ins_cost(250);
11734 11755 ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11735 11756 ins_pipe( pipe_slow );
11736 11757 %}
11737 11758
11738 11759
11739 11760 //----------Arithmetic Conversion Instructions---------------------------------
11740 11761 // The conversions operations are all Alpha sorted. Please keep it that way!
11741 11762
11742 11763 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11743 11764 predicate(UseSSE==0);
11744 11765 match(Set dst (RoundFloat src));
11745 11766 ins_cost(125);
11746 11767 format %{ "FST_S $dst,$src\t# F-round" %}
11747 11768 ins_encode( Pop_Mem_Reg_F(dst, src) );
11748 11769 ins_pipe( fpu_mem_reg );
11749 11770 %}
11750 11771
11751 11772 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11752 11773 predicate(UseSSE<=1);
11753 11774 match(Set dst (RoundDouble src));
11754 11775 ins_cost(125);
11755 11776 format %{ "FST_D $dst,$src\t# D-round" %}
11756 11777 ins_encode( Pop_Mem_Reg_D(dst, src) );
11757 11778 ins_pipe( fpu_mem_reg );
11758 11779 %}
11759 11780
11760 11781 // Force rounding to 24-bit precision and 6-bit exponent
11761 11782 instruct convD2F_reg(stackSlotF dst, regD src) %{
11762 11783 predicate(UseSSE==0);
11763 11784 match(Set dst (ConvD2F src));
11764 11785 format %{ "FST_S $dst,$src\t# F-round" %}
11765 11786 expand %{
11766 11787 roundFloat_mem_reg(dst,src);
11767 11788 %}
11768 11789 %}
11769 11790
11770 11791 // Force rounding to 24-bit precision and 6-bit exponent
11771 11792 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11772 11793 predicate(UseSSE==1);
11773 11794 match(Set dst (ConvD2F src));
11774 11795 effect( KILL cr );
11775 11796 format %{ "SUB ESP,4\n\t"
11776 11797 "FST_S [ESP],$src\t# F-round\n\t"
11777 11798 "MOVSS $dst,[ESP]\n\t"
11778 11799 "ADD ESP,4" %}
11779 11800 ins_encode( D2X_encoding(dst, src) );
11780 11801 ins_pipe( pipe_slow );
11781 11802 %}
11782 11803
11783 11804 // Force rounding double precision to single precision
11784 11805 instruct convXD2X_reg(regX dst, regXD src) %{
11785 11806 predicate(UseSSE>=2);
11786 11807 match(Set dst (ConvD2F src));
11787 11808 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11788 11809 opcode(0xF2, 0x0F, 0x5A);
11789 11810 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11790 11811 ins_pipe( pipe_slow );
11791 11812 %}
11792 11813
11793 11814 instruct convF2D_reg_reg(regD dst, regF src) %{
11794 11815 predicate(UseSSE==0);
11795 11816 match(Set dst (ConvF2D src));
11796 11817 format %{ "FST_S $dst,$src\t# D-round" %}
11797 11818 ins_encode( Pop_Reg_Reg_D(dst, src));
11798 11819 ins_pipe( fpu_reg_reg );
11799 11820 %}
11800 11821
11801 11822 instruct convF2D_reg(stackSlotD dst, regF src) %{
11802 11823 predicate(UseSSE==1);
11803 11824 match(Set dst (ConvF2D src));
11804 11825 format %{ "FST_D $dst,$src\t# D-round" %}
11805 11826 expand %{
11806 11827 roundDouble_mem_reg(dst,src);
11807 11828 %}
11808 11829 %}
11809 11830
11810 11831 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11811 11832 predicate(UseSSE==1);
11812 11833 match(Set dst (ConvF2D src));
11813 11834 effect( KILL cr );
11814 11835 format %{ "SUB ESP,4\n\t"
11815 11836 "MOVSS [ESP] $src\n\t"
11816 11837 "FLD_S [ESP]\n\t"
11817 11838 "ADD ESP,4\n\t"
11818 11839 "FSTP $dst\t# D-round" %}
11819 11840 ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11820 11841 ins_pipe( pipe_slow );
11821 11842 %}
11822 11843
11823 11844 instruct convX2XD_reg(regXD dst, regX src) %{
11824 11845 predicate(UseSSE>=2);
11825 11846 match(Set dst (ConvF2D src));
11826 11847 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11827 11848 opcode(0xF3, 0x0F, 0x5A);
11828 11849 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11829 11850 ins_pipe( pipe_slow );
11830 11851 %}
11831 11852
11832 11853 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11833 11854 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11834 11855 predicate(UseSSE<=1);
11835 11856 match(Set dst (ConvD2I src));
11836 11857 effect( KILL tmp, KILL cr );
11837 11858 format %{ "FLD $src\t# Convert double to int \n\t"
11838 11859 "FLDCW trunc mode\n\t"
11839 11860 "SUB ESP,4\n\t"
11840 11861 "FISTp [ESP + #0]\n\t"
11841 11862 "FLDCW std/24-bit mode\n\t"
11842 11863 "POP EAX\n\t"
11843 11864 "CMP EAX,0x80000000\n\t"
11844 11865 "JNE,s fast\n\t"
11845 11866 "FLD_D $src\n\t"
11846 11867 "CALL d2i_wrapper\n"
11847 11868 "fast:" %}
11848 11869 ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11849 11870 ins_pipe( pipe_slow );
11850 11871 %}
11851 11872
11852 11873 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11853 11874 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11854 11875 predicate(UseSSE>=2);
11855 11876 match(Set dst (ConvD2I src));
11856 11877 effect( KILL tmp, KILL cr );
11857 11878 format %{ "CVTTSD2SI $dst, $src\n\t"
11858 11879 "CMP $dst,0x80000000\n\t"
11859 11880 "JNE,s fast\n\t"
11860 11881 "SUB ESP, 8\n\t"
11861 11882 "MOVSD [ESP], $src\n\t"
11862 11883 "FLD_D [ESP]\n\t"
11863 11884 "ADD ESP, 8\n\t"
11864 11885 "CALL d2i_wrapper\n"
11865 11886 "fast:" %}
11866 11887 opcode(0x1); // double-precision conversion
11867 11888 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11868 11889 ins_pipe( pipe_slow );
11869 11890 %}
11870 11891
11871 11892 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11872 11893 predicate(UseSSE<=1);
11873 11894 match(Set dst (ConvD2L src));
11874 11895 effect( KILL cr );
11875 11896 format %{ "FLD $src\t# Convert double to long\n\t"
11876 11897 "FLDCW trunc mode\n\t"
11877 11898 "SUB ESP,8\n\t"
11878 11899 "FISTp [ESP + #0]\n\t"
11879 11900 "FLDCW std/24-bit mode\n\t"
11880 11901 "POP EAX\n\t"
11881 11902 "POP EDX\n\t"
11882 11903 "CMP EDX,0x80000000\n\t"
11883 11904 "JNE,s fast\n\t"
11884 11905 "TEST EAX,EAX\n\t"
11885 11906 "JNE,s fast\n\t"
11886 11907 "FLD $src\n\t"
11887 11908 "CALL d2l_wrapper\n"
11888 11909 "fast:" %}
11889 11910 ins_encode( Push_Reg_D(src), D2L_encoding(src) );
11890 11911 ins_pipe( pipe_slow );
11891 11912 %}
11892 11913
11893 11914 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11894 11915 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11895 11916 predicate (UseSSE>=2);
11896 11917 match(Set dst (ConvD2L src));
11897 11918 effect( KILL cr );
11898 11919 format %{ "SUB ESP,8\t# Convert double to long\n\t"
11899 11920 "MOVSD [ESP],$src\n\t"
11900 11921 "FLD_D [ESP]\n\t"
11901 11922 "FLDCW trunc mode\n\t"
11902 11923 "FISTp [ESP + #0]\n\t"
11903 11924 "FLDCW std/24-bit mode\n\t"
11904 11925 "POP EAX\n\t"
11905 11926 "POP EDX\n\t"
11906 11927 "CMP EDX,0x80000000\n\t"
11907 11928 "JNE,s fast\n\t"
11908 11929 "TEST EAX,EAX\n\t"
11909 11930 "JNE,s fast\n\t"
11910 11931 "SUB ESP,8\n\t"
11911 11932 "MOVSD [ESP],$src\n\t"
11912 11933 "FLD_D [ESP]\n\t"
11913 11934 "CALL d2l_wrapper\n"
11914 11935 "fast:" %}
11915 11936 ins_encode( XD2L_encoding(src) );
11916 11937 ins_pipe( pipe_slow );
11917 11938 %}
11918 11939
11919 11940 // Convert a double to an int. Java semantics require we do complex
11920 11941 // manglations in the corner cases. So we set the rounding mode to
11921 11942 // 'zero', store the darned double down as an int, and reset the
11922 11943 // rounding mode to 'nearest'. The hardware stores a flag value down
11923 11944 // if we would overflow or converted a NAN; we check for this and
11924 11945 // and go the slow path if needed.
11925 11946 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11926 11947 predicate(UseSSE==0);
11927 11948 match(Set dst (ConvF2I src));
11928 11949 effect( KILL tmp, KILL cr );
11929 11950 format %{ "FLD $src\t# Convert float to int \n\t"
11930 11951 "FLDCW trunc mode\n\t"
11931 11952 "SUB ESP,4\n\t"
11932 11953 "FISTp [ESP + #0]\n\t"
11933 11954 "FLDCW std/24-bit mode\n\t"
11934 11955 "POP EAX\n\t"
11935 11956 "CMP EAX,0x80000000\n\t"
11936 11957 "JNE,s fast\n\t"
11937 11958 "FLD $src\n\t"
11938 11959 "CALL d2i_wrapper\n"
11939 11960 "fast:" %}
11940 11961 // D2I_encoding works for F2I
11941 11962 ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11942 11963 ins_pipe( pipe_slow );
11943 11964 %}
11944 11965
11945 11966 // Convert a float in xmm to an int reg.
11946 11967 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11947 11968 predicate(UseSSE>=1);
11948 11969 match(Set dst (ConvF2I src));
11949 11970 effect( KILL tmp, KILL cr );
11950 11971 format %{ "CVTTSS2SI $dst, $src\n\t"
11951 11972 "CMP $dst,0x80000000\n\t"
11952 11973 "JNE,s fast\n\t"
11953 11974 "SUB ESP, 4\n\t"
11954 11975 "MOVSS [ESP], $src\n\t"
11955 11976 "FLD [ESP]\n\t"
11956 11977 "ADD ESP, 4\n\t"
11957 11978 "CALL d2i_wrapper\n"
11958 11979 "fast:" %}
11959 11980 opcode(0x0); // single-precision conversion
11960 11981 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11961 11982 ins_pipe( pipe_slow );
11962 11983 %}
11963 11984
11964 11985 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11965 11986 predicate(UseSSE==0);
11966 11987 match(Set dst (ConvF2L src));
11967 11988 effect( KILL cr );
11968 11989 format %{ "FLD $src\t# Convert float to long\n\t"
11969 11990 "FLDCW trunc mode\n\t"
11970 11991 "SUB ESP,8\n\t"
11971 11992 "FISTp [ESP + #0]\n\t"
11972 11993 "FLDCW std/24-bit mode\n\t"
11973 11994 "POP EAX\n\t"
11974 11995 "POP EDX\n\t"
11975 11996 "CMP EDX,0x80000000\n\t"
11976 11997 "JNE,s fast\n\t"
11977 11998 "TEST EAX,EAX\n\t"
11978 11999 "JNE,s fast\n\t"
11979 12000 "FLD $src\n\t"
11980 12001 "CALL d2l_wrapper\n"
11981 12002 "fast:" %}
11982 12003 // D2L_encoding works for F2L
11983 12004 ins_encode( Push_Reg_F(src), D2L_encoding(src) );
11984 12005 ins_pipe( pipe_slow );
11985 12006 %}
11986 12007
11987 12008 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11988 12009 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
11989 12010 predicate (UseSSE>=1);
11990 12011 match(Set dst (ConvF2L src));
11991 12012 effect( KILL cr );
11992 12013 format %{ "SUB ESP,8\t# Convert float to long\n\t"
11993 12014 "MOVSS [ESP],$src\n\t"
11994 12015 "FLD_S [ESP]\n\t"
11995 12016 "FLDCW trunc mode\n\t"
11996 12017 "FISTp [ESP + #0]\n\t"
11997 12018 "FLDCW std/24-bit mode\n\t"
11998 12019 "POP EAX\n\t"
11999 12020 "POP EDX\n\t"
12000 12021 "CMP EDX,0x80000000\n\t"
12001 12022 "JNE,s fast\n\t"
12002 12023 "TEST EAX,EAX\n\t"
12003 12024 "JNE,s fast\n\t"
12004 12025 "SUB ESP,4\t# Convert float to long\n\t"
12005 12026 "MOVSS [ESP],$src\n\t"
12006 12027 "FLD_S [ESP]\n\t"
12007 12028 "ADD ESP,4\n\t"
12008 12029 "CALL d2l_wrapper\n"
12009 12030 "fast:" %}
12010 12031 ins_encode( X2L_encoding(src) );
12011 12032 ins_pipe( pipe_slow );
12012 12033 %}
12013 12034
12014 12035 instruct convI2D_reg(regD dst, stackSlotI src) %{
12015 12036 predicate( UseSSE<=1 );
12016 12037 match(Set dst (ConvI2D src));
12017 12038 format %{ "FILD $src\n\t"
12018 12039 "FSTP $dst" %}
12019 12040 opcode(0xDB, 0x0); /* DB /0 */
12020 12041 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
12021 12042 ins_pipe( fpu_reg_mem );
12022 12043 %}
12023 12044
12024 12045 instruct convI2XD_reg(regXD dst, eRegI src) %{
12025 12046 predicate( UseSSE>=2 && !UseXmmI2D );
12026 12047 match(Set dst (ConvI2D src));
12027 12048 format %{ "CVTSI2SD $dst,$src" %}
12028 12049 opcode(0xF2, 0x0F, 0x2A);
12029 12050 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12030 12051 ins_pipe( pipe_slow );
12031 12052 %}
12032 12053
12033 12054 instruct convI2XD_mem(regXD dst, memory mem) %{
12034 12055 predicate( UseSSE>=2 );
12035 12056 match(Set dst (ConvI2D (LoadI mem)));
12036 12057 format %{ "CVTSI2SD $dst,$mem" %}
12037 12058 opcode(0xF2, 0x0F, 0x2A);
12038 12059 ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
12039 12060 ins_pipe( pipe_slow );
12040 12061 %}
12041 12062
12042 12063 instruct convXI2XD_reg(regXD dst, eRegI src)
12043 12064 %{
12044 12065 predicate( UseSSE>=2 && UseXmmI2D );
12045 12066 match(Set dst (ConvI2D src));
12046 12067
12047 12068 format %{ "MOVD $dst,$src\n\t"
12048 12069 "CVTDQ2PD $dst,$dst\t# i2d" %}
12049 12070 ins_encode %{
12050 12071 __ movdl($dst$$XMMRegister, $src$$Register);
12051 12072 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
12052 12073 %}
12053 12074 ins_pipe(pipe_slow); // XXX
12054 12075 %}
12055 12076
12056 12077 instruct convI2D_mem(regD dst, memory mem) %{
12057 12078 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
12058 12079 match(Set dst (ConvI2D (LoadI mem)));
12059 12080 format %{ "FILD $mem\n\t"
12060 12081 "FSTP $dst" %}
12061 12082 opcode(0xDB); /* DB /0 */
12062 12083 ins_encode( OpcP, RMopc_Mem(0x00,mem),
12063 12084 Pop_Reg_D(dst));
12064 12085 ins_pipe( fpu_reg_mem );
12065 12086 %}
12066 12087
12067 12088 // Convert a byte to a float; no rounding step needed.
12068 12089 instruct conv24I2F_reg(regF dst, stackSlotI src) %{
12069 12090 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
12070 12091 match(Set dst (ConvI2F src));
12071 12092 format %{ "FILD $src\n\t"
12072 12093 "FSTP $dst" %}
12073 12094
12074 12095 opcode(0xDB, 0x0); /* DB /0 */
12075 12096 ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
12076 12097 ins_pipe( fpu_reg_mem );
12077 12098 %}
12078 12099
12079 12100 // In 24-bit mode, force exponent rounding by storing back out
12080 12101 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
12081 12102 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12082 12103 match(Set dst (ConvI2F src));
12083 12104 ins_cost(200);
12084 12105 format %{ "FILD $src\n\t"
12085 12106 "FSTP_S $dst" %}
12086 12107 opcode(0xDB, 0x0); /* DB /0 */
12087 12108 ins_encode( Push_Mem_I(src),
12088 12109 Pop_Mem_F(dst));
12089 12110 ins_pipe( fpu_mem_mem );
12090 12111 %}
12091 12112
12092 12113 // In 24-bit mode, force exponent rounding by storing back out
12093 12114 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
12094 12115 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12095 12116 match(Set dst (ConvI2F (LoadI mem)));
12096 12117 ins_cost(200);
12097 12118 format %{ "FILD $mem\n\t"
12098 12119 "FSTP_S $dst" %}
12099 12120 opcode(0xDB); /* DB /0 */
12100 12121 ins_encode( OpcP, RMopc_Mem(0x00,mem),
12101 12122 Pop_Mem_F(dst));
12102 12123 ins_pipe( fpu_mem_mem );
12103 12124 %}
12104 12125
12105 12126 // This instruction does not round to 24-bits
12106 12127 instruct convI2F_reg(regF dst, stackSlotI src) %{
12107 12128 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12108 12129 match(Set dst (ConvI2F src));
12109 12130 format %{ "FILD $src\n\t"
12110 12131 "FSTP $dst" %}
12111 12132 opcode(0xDB, 0x0); /* DB /0 */
12112 12133 ins_encode( Push_Mem_I(src),
12113 12134 Pop_Reg_F(dst));
12114 12135 ins_pipe( fpu_reg_mem );
12115 12136 %}
12116 12137
12117 12138 // This instruction does not round to 24-bits
12118 12139 instruct convI2F_mem(regF dst, memory mem) %{
12119 12140 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12120 12141 match(Set dst (ConvI2F (LoadI mem)));
12121 12142 format %{ "FILD $mem\n\t"
12122 12143 "FSTP $dst" %}
12123 12144 opcode(0xDB); /* DB /0 */
12124 12145 ins_encode( OpcP, RMopc_Mem(0x00,mem),
12125 12146 Pop_Reg_F(dst));
12126 12147 ins_pipe( fpu_reg_mem );
12127 12148 %}
12128 12149
12129 12150 // Convert an int to a float in xmm; no rounding step needed.
12130 12151 instruct convI2X_reg(regX dst, eRegI src) %{
12131 12152 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
12132 12153 match(Set dst (ConvI2F src));
12133 12154 format %{ "CVTSI2SS $dst, $src" %}
12134 12155
12135 12156 opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */
12136 12157 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12137 12158 ins_pipe( pipe_slow );
12138 12159 %}
12139 12160
12140 12161 instruct convXI2X_reg(regX dst, eRegI src)
12141 12162 %{
12142 12163 predicate( UseSSE>=2 && UseXmmI2F );
12143 12164 match(Set dst (ConvI2F src));
12144 12165
12145 12166 format %{ "MOVD $dst,$src\n\t"
12146 12167 "CVTDQ2PS $dst,$dst\t# i2f" %}
12147 12168 ins_encode %{
12148 12169 __ movdl($dst$$XMMRegister, $src$$Register);
12149 12170 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
12150 12171 %}
12151 12172 ins_pipe(pipe_slow); // XXX
12152 12173 %}
12153 12174
12154 12175 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
12155 12176 match(Set dst (ConvI2L src));
12156 12177 effect(KILL cr);
12157 12178 ins_cost(375);
12158 12179 format %{ "MOV $dst.lo,$src\n\t"
12159 12180 "MOV $dst.hi,$src\n\t"
12160 12181 "SAR $dst.hi,31" %}
12161 12182 ins_encode(convert_int_long(dst,src));
12162 12183 ins_pipe( ialu_reg_reg_long );
12163 12184 %}
12164 12185
12165 12186 // Zero-extend convert int to long
12166 12187 instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
12167 12188 match(Set dst (AndL (ConvI2L src) mask) );
12168 12189 effect( KILL flags );
12169 12190 ins_cost(250);
12170 12191 format %{ "MOV $dst.lo,$src\n\t"
12171 12192 "XOR $dst.hi,$dst.hi" %}
12172 12193 opcode(0x33); // XOR
12173 12194 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12174 12195 ins_pipe( ialu_reg_reg_long );
12175 12196 %}
12176 12197
12177 12198 // Zero-extend long
12178 12199 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
12179 12200 match(Set dst (AndL src mask) );
12180 12201 effect( KILL flags );
12181 12202 ins_cost(250);
12182 12203 format %{ "MOV $dst.lo,$src.lo\n\t"
12183 12204 "XOR $dst.hi,$dst.hi\n\t" %}
12184 12205 opcode(0x33); // XOR
12185 12206 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12186 12207 ins_pipe( ialu_reg_reg_long );
12187 12208 %}
12188 12209
12189 12210 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
12190 12211 predicate (UseSSE<=1);
12191 12212 match(Set dst (ConvL2D src));
12192 12213 effect( KILL cr );
12193 12214 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
12194 12215 "PUSH $src.lo\n\t"
12195 12216 "FILD ST,[ESP + #0]\n\t"
12196 12217 "ADD ESP,8\n\t"
12197 12218 "FSTP_D $dst\t# D-round" %}
12198 12219 opcode(0xDF, 0x5); /* DF /5 */
12199 12220 ins_encode(convert_long_double(src), Pop_Mem_D(dst));
12200 12221 ins_pipe( pipe_slow );
12201 12222 %}
12202 12223
12203 12224 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
12204 12225 predicate (UseSSE>=2);
12205 12226 match(Set dst (ConvL2D src));
12206 12227 effect( KILL cr );
12207 12228 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
12208 12229 "PUSH $src.lo\n\t"
12209 12230 "FILD_D [ESP]\n\t"
12210 12231 "FSTP_D [ESP]\n\t"
12211 12232 "MOVSD $dst,[ESP]\n\t"
12212 12233 "ADD ESP,8" %}
12213 12234 opcode(0xDF, 0x5); /* DF /5 */
12214 12235 ins_encode(convert_long_double2(src), Push_ResultXD(dst));
12215 12236 ins_pipe( pipe_slow );
12216 12237 %}
12217 12238
12218 12239 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
12219 12240 predicate (UseSSE>=1);
12220 12241 match(Set dst (ConvL2F src));
12221 12242 effect( KILL cr );
12222 12243 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
12223 12244 "PUSH $src.lo\n\t"
12224 12245 "FILD_D [ESP]\n\t"
12225 12246 "FSTP_S [ESP]\n\t"
12226 12247 "MOVSS $dst,[ESP]\n\t"
12227 12248 "ADD ESP,8" %}
12228 12249 opcode(0xDF, 0x5); /* DF /5 */
12229 12250 ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
12230 12251 ins_pipe( pipe_slow );
12231 12252 %}
12232 12253
12233 12254 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
12234 12255 match(Set dst (ConvL2F src));
12235 12256 effect( KILL cr );
12236 12257 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
12237 12258 "PUSH $src.lo\n\t"
12238 12259 "FILD ST,[ESP + #0]\n\t"
12239 12260 "ADD ESP,8\n\t"
12240 12261 "FSTP_S $dst\t# F-round" %}
12241 12262 opcode(0xDF, 0x5); /* DF /5 */
12242 12263 ins_encode(convert_long_double(src), Pop_Mem_F(dst));
12243 12264 ins_pipe( pipe_slow );
12244 12265 %}
12245 12266
12246 12267 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12247 12268 match(Set dst (ConvL2I src));
12248 12269 effect( DEF dst, USE src );
12249 12270 format %{ "MOV $dst,$src.lo" %}
12250 12271 ins_encode(enc_CopyL_Lo(dst,src));
12251 12272 ins_pipe( ialu_reg_reg );
12252 12273 %}
12253 12274
12254 12275
12255 12276 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12256 12277 match(Set dst (MoveF2I src));
12257 12278 effect( DEF dst, USE src );
12258 12279 ins_cost(100);
12259 12280 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
12260 12281 opcode(0x8B);
12261 12282 ins_encode( OpcP, RegMem(dst,src));
12262 12283 ins_pipe( ialu_reg_mem );
12263 12284 %}
12264 12285
12265 12286 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12266 12287 predicate(UseSSE==0);
12267 12288 match(Set dst (MoveF2I src));
12268 12289 effect( DEF dst, USE src );
12269 12290
12270 12291 ins_cost(125);
12271 12292 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
12272 12293 ins_encode( Pop_Mem_Reg_F(dst, src) );
12273 12294 ins_pipe( fpu_mem_reg );
12274 12295 %}
12275 12296
12276 12297 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12277 12298 predicate(UseSSE>=1);
12278 12299 match(Set dst (MoveF2I src));
12279 12300 effect( DEF dst, USE src );
12280 12301
12281 12302 ins_cost(95);
12282 12303 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
12283 12304 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
12284 12305 ins_pipe( pipe_slow );
12285 12306 %}
12286 12307
12287 12308 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12288 12309 predicate(UseSSE>=2);
12289 12310 match(Set dst (MoveF2I src));
12290 12311 effect( DEF dst, USE src );
12291 12312 ins_cost(85);
12292 12313 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
12293 12314 ins_encode( MovX2I_reg(dst, src));
12294 12315 ins_pipe( pipe_slow );
12295 12316 %}
12296 12317
12297 12318 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12298 12319 match(Set dst (MoveI2F src));
12299 12320 effect( DEF dst, USE src );
12300 12321
12301 12322 ins_cost(100);
12302 12323 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
12303 12324 opcode(0x89);
12304 12325 ins_encode( OpcPRegSS( dst, src ) );
12305 12326 ins_pipe( ialu_mem_reg );
12306 12327 %}
12307 12328
12308 12329
12309 12330 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12310 12331 predicate(UseSSE==0);
12311 12332 match(Set dst (MoveI2F src));
12312 12333 effect(DEF dst, USE src);
12313 12334
12314 12335 ins_cost(125);
12315 12336 format %{ "FLD_S $src\n\t"
12316 12337 "FSTP $dst\t# MoveI2F_stack_reg" %}
12317 12338 opcode(0xD9); /* D9 /0, FLD m32real */
12318 12339 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12319 12340 Pop_Reg_F(dst) );
12320 12341 ins_pipe( fpu_reg_mem );
12321 12342 %}
12322 12343
12323 12344 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12324 12345 predicate(UseSSE>=1);
12325 12346 match(Set dst (MoveI2F src));
12326 12347 effect( DEF dst, USE src );
12327 12348
12328 12349 ins_cost(95);
12329 12350 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
12330 12351 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12331 12352 ins_pipe( pipe_slow );
12332 12353 %}
12333 12354
12334 12355 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12335 12356 predicate(UseSSE>=2);
12336 12357 match(Set dst (MoveI2F src));
12337 12358 effect( DEF dst, USE src );
12338 12359
12339 12360 ins_cost(85);
12340 12361 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
12341 12362 ins_encode( MovI2X_reg(dst, src) );
12342 12363 ins_pipe( pipe_slow );
12343 12364 %}
12344 12365
12345 12366 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12346 12367 match(Set dst (MoveD2L src));
12347 12368 effect(DEF dst, USE src);
12348 12369
12349 12370 ins_cost(250);
12350 12371 format %{ "MOV $dst.lo,$src\n\t"
12351 12372 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12352 12373 opcode(0x8B, 0x8B);
12353 12374 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12354 12375 ins_pipe( ialu_mem_long_reg );
12355 12376 %}
12356 12377
12357 12378 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12358 12379 predicate(UseSSE<=1);
12359 12380 match(Set dst (MoveD2L src));
12360 12381 effect(DEF dst, USE src);
12361 12382
12362 12383 ins_cost(125);
12363 12384 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
12364 12385 ins_encode( Pop_Mem_Reg_D(dst, src) );
12365 12386 ins_pipe( fpu_mem_reg );
12366 12387 %}
12367 12388
12368 12389 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12369 12390 predicate(UseSSE>=2);
12370 12391 match(Set dst (MoveD2L src));
12371 12392 effect(DEF dst, USE src);
12372 12393 ins_cost(95);
12373 12394
12374 12395 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
12375 12396 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
12376 12397 ins_pipe( pipe_slow );
12377 12398 %}
12378 12399
12379 12400 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12380 12401 predicate(UseSSE>=2);
12381 12402 match(Set dst (MoveD2L src));
12382 12403 effect(DEF dst, USE src, TEMP tmp);
12383 12404 ins_cost(85);
12384 12405 format %{ "MOVD $dst.lo,$src\n\t"
12385 12406 "PSHUFLW $tmp,$src,0x4E\n\t"
12386 12407 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12387 12408 ins_encode( MovXD2L_reg(dst, src, tmp) );
12388 12409 ins_pipe( pipe_slow );
12389 12410 %}
12390 12411
12391 12412 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12392 12413 match(Set dst (MoveL2D src));
12393 12414 effect(DEF dst, USE src);
12394 12415
12395 12416 ins_cost(200);
12396 12417 format %{ "MOV $dst,$src.lo\n\t"
12397 12418 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12398 12419 opcode(0x89, 0x89);
12399 12420 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12400 12421 ins_pipe( ialu_mem_long_reg );
12401 12422 %}
12402 12423
12403 12424
12404 12425 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12405 12426 predicate(UseSSE<=1);
12406 12427 match(Set dst (MoveL2D src));
12407 12428 effect(DEF dst, USE src);
12408 12429 ins_cost(125);
12409 12430
12410 12431 format %{ "FLD_D $src\n\t"
12411 12432 "FSTP $dst\t# MoveL2D_stack_reg" %}
12412 12433 opcode(0xDD); /* DD /0, FLD m64real */
12413 12434 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12414 12435 Pop_Reg_D(dst) );
12415 12436 ins_pipe( fpu_reg_mem );
12416 12437 %}
12417 12438
12418 12439
12419 12440 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12420 12441 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12421 12442 match(Set dst (MoveL2D src));
12422 12443 effect(DEF dst, USE src);
12423 12444
12424 12445 ins_cost(95);
12425 12446 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12426 12447 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12427 12448 ins_pipe( pipe_slow );
12428 12449 %}
12429 12450
12430 12451 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12431 12452 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12432 12453 match(Set dst (MoveL2D src));
12433 12454 effect(DEF dst, USE src);
12434 12455
12435 12456 ins_cost(95);
12436 12457 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12437 12458 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
12438 12459 ins_pipe( pipe_slow );
12439 12460 %}
12440 12461
12441 12462 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12442 12463 predicate(UseSSE>=2);
12443 12464 match(Set dst (MoveL2D src));
12444 12465 effect(TEMP dst, USE src, TEMP tmp);
12445 12466 ins_cost(85);
12446 12467 format %{ "MOVD $dst,$src.lo\n\t"
12447 12468 "MOVD $tmp,$src.hi\n\t"
12448 12469 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12449 12470 ins_encode( MovL2XD_reg(dst, src, tmp) );
12450 12471 ins_pipe( pipe_slow );
12451 12472 %}
12452 12473
12453 12474 // Replicate scalar to packed byte (1 byte) values in xmm
12454 12475 instruct Repl8B_reg(regXD dst, regXD src) %{
12455 12476 predicate(UseSSE>=2);
12456 12477 match(Set dst (Replicate8B src));
12457 12478 format %{ "MOVDQA $dst,$src\n\t"
12458 12479 "PUNPCKLBW $dst,$dst\n\t"
12459 12480 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12460 12481 ins_encode( pshufd_8x8(dst, src));
12461 12482 ins_pipe( pipe_slow );
12462 12483 %}
12463 12484
12464 12485 // Replicate scalar to packed byte (1 byte) values in xmm
12465 12486 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12466 12487 predicate(UseSSE>=2);
12467 12488 match(Set dst (Replicate8B src));
12468 12489 format %{ "MOVD $dst,$src\n\t"
12469 12490 "PUNPCKLBW $dst,$dst\n\t"
12470 12491 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12471 12492 ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
12472 12493 ins_pipe( pipe_slow );
12473 12494 %}
12474 12495
12475 12496 // Replicate scalar zero to packed byte (1 byte) values in xmm
12476 12497 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12477 12498 predicate(UseSSE>=2);
12478 12499 match(Set dst (Replicate8B zero));
12479 12500 format %{ "PXOR $dst,$dst\t! replicate8B" %}
12480 12501 ins_encode( pxor(dst, dst));
12481 12502 ins_pipe( fpu_reg_reg );
12482 12503 %}
12483 12504
12484 12505 // Replicate scalar to packed shore (2 byte) values in xmm
12485 12506 instruct Repl4S_reg(regXD dst, regXD src) %{
12486 12507 predicate(UseSSE>=2);
12487 12508 match(Set dst (Replicate4S src));
12488 12509 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12489 12510 ins_encode( pshufd_4x16(dst, src));
12490 12511 ins_pipe( fpu_reg_reg );
12491 12512 %}
12492 12513
12493 12514 // Replicate scalar to packed shore (2 byte) values in xmm
12494 12515 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12495 12516 predicate(UseSSE>=2);
12496 12517 match(Set dst (Replicate4S src));
12497 12518 format %{ "MOVD $dst,$src\n\t"
12498 12519 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12499 12520 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12500 12521 ins_pipe( fpu_reg_reg );
12501 12522 %}
12502 12523
12503 12524 // Replicate scalar zero to packed short (2 byte) values in xmm
12504 12525 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12505 12526 predicate(UseSSE>=2);
12506 12527 match(Set dst (Replicate4S zero));
12507 12528 format %{ "PXOR $dst,$dst\t! replicate4S" %}
12508 12529 ins_encode( pxor(dst, dst));
12509 12530 ins_pipe( fpu_reg_reg );
12510 12531 %}
12511 12532
12512 12533 // Replicate scalar to packed char (2 byte) values in xmm
12513 12534 instruct Repl4C_reg(regXD dst, regXD src) %{
12514 12535 predicate(UseSSE>=2);
12515 12536 match(Set dst (Replicate4C src));
12516 12537 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12517 12538 ins_encode( pshufd_4x16(dst, src));
12518 12539 ins_pipe( fpu_reg_reg );
12519 12540 %}
12520 12541
12521 12542 // Replicate scalar to packed char (2 byte) values in xmm
12522 12543 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12523 12544 predicate(UseSSE>=2);
12524 12545 match(Set dst (Replicate4C src));
12525 12546 format %{ "MOVD $dst,$src\n\t"
12526 12547 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12527 12548 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12528 12549 ins_pipe( fpu_reg_reg );
12529 12550 %}
12530 12551
12531 12552 // Replicate scalar zero to packed char (2 byte) values in xmm
12532 12553 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12533 12554 predicate(UseSSE>=2);
12534 12555 match(Set dst (Replicate4C zero));
12535 12556 format %{ "PXOR $dst,$dst\t! replicate4C" %}
12536 12557 ins_encode( pxor(dst, dst));
12537 12558 ins_pipe( fpu_reg_reg );
12538 12559 %}
12539 12560
12540 12561 // Replicate scalar to packed integer (4 byte) values in xmm
12541 12562 instruct Repl2I_reg(regXD dst, regXD src) %{
12542 12563 predicate(UseSSE>=2);
12543 12564 match(Set dst (Replicate2I src));
12544 12565 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12545 12566 ins_encode( pshufd(dst, src, 0x00));
12546 12567 ins_pipe( fpu_reg_reg );
12547 12568 %}
12548 12569
12549 12570 // Replicate scalar to packed integer (4 byte) values in xmm
12550 12571 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12551 12572 predicate(UseSSE>=2);
12552 12573 match(Set dst (Replicate2I src));
12553 12574 format %{ "MOVD $dst,$src\n\t"
12554 12575 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12555 12576 ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12556 12577 ins_pipe( fpu_reg_reg );
12557 12578 %}
12558 12579
12559 12580 // Replicate scalar zero to packed integer (2 byte) values in xmm
12560 12581 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12561 12582 predicate(UseSSE>=2);
12562 12583 match(Set dst (Replicate2I zero));
12563 12584 format %{ "PXOR $dst,$dst\t! replicate2I" %}
12564 12585 ins_encode( pxor(dst, dst));
12565 12586 ins_pipe( fpu_reg_reg );
12566 12587 %}
12567 12588
12568 12589 // Replicate scalar to packed single precision floating point values in xmm
12569 12590 instruct Repl2F_reg(regXD dst, regXD src) %{
12570 12591 predicate(UseSSE>=2);
12571 12592 match(Set dst (Replicate2F src));
12572 12593 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12573 12594 ins_encode( pshufd(dst, src, 0xe0));
12574 12595 ins_pipe( fpu_reg_reg );
12575 12596 %}
12576 12597
12577 12598 // Replicate scalar to packed single precision floating point values in xmm
12578 12599 instruct Repl2F_regX(regXD dst, regX src) %{
12579 12600 predicate(UseSSE>=2);
12580 12601 match(Set dst (Replicate2F src));
12581 12602 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12582 12603 ins_encode( pshufd(dst, src, 0xe0));
12583 12604 ins_pipe( fpu_reg_reg );
12584 12605 %}
12585 12606
12586 12607 // Replicate scalar to packed single precision floating point values in xmm
12587 12608 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12588 12609 predicate(UseSSE>=2);
12589 12610 match(Set dst (Replicate2F zero));
12590 12611 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12591 12612 ins_encode( pxor(dst, dst));
12592 12613 ins_pipe( fpu_reg_reg );
12593 12614 %}
12594 12615
12595 12616 // =======================================================================
12596 12617 // fast clearing of an array
12597 12618 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12598 12619 match(Set dummy (ClearArray cnt base));
12599 12620 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12600 12621 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
12601 12622 "XOR EAX,EAX\n\t"
12602 12623 "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12603 12624 opcode(0,0x4);
12604 12625 ins_encode( Opcode(0xD1), RegOpc(ECX),
12605 12626 OpcRegReg(0x33,EAX,EAX),
12606 12627 Opcode(0xF3), Opcode(0xAB) );
12607 12628 ins_pipe( pipe_slow );
12608 12629 %}
12609 12630
12610 12631 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2,
12611 12632 eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{
12612 12633 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12613 12634 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12614 12635
12615 12636 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1, $tmp2" %}
12616 12637 ins_encode %{
12617 12638 __ string_compare($str1$$Register, $str2$$Register,
12618 12639 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12619 12640 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12620 12641 %}
12621 12642 ins_pipe( pipe_slow );
12622 12643 %}
12623 12644
12624 12645 // fast string equals
12625 12646 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12626 12647 regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12627 12648 match(Set result (StrEquals (Binary str1 str2) cnt));
12628 12649 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12629 12650
12630 12651 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12631 12652 ins_encode %{
12632 12653 __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12633 12654 $cnt$$Register, $result$$Register, $tmp3$$Register,
12634 12655 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12635 12656 %}
12636 12657 ins_pipe( pipe_slow );
12637 12658 %}
12638 12659
12639 12660 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12640 12661 eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12641 12662 predicate(UseSSE42Intrinsics);
12642 12663 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12643 12664 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
12644 12665
12645 12666 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp2, $tmp1" %}
12646 12667 ins_encode %{
12647 12668 __ string_indexof($str1$$Register, $str2$$Register,
12648 12669 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12649 12670 $tmp1$$XMMRegister, $tmp2$$Register);
12650 12671 %}
12651 12672 ins_pipe( pipe_slow );
12652 12673 %}
12653 12674
12654 12675 // fast array equals
12655 12676 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12656 12677 regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12657 12678 %{
12658 12679 match(Set result (AryEq ary1 ary2));
12659 12680 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12660 12681 //ins_cost(300);
12661 12682
12662 12683 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12663 12684 ins_encode %{
12664 12685 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12665 12686 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12666 12687 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12667 12688 %}
12668 12689 ins_pipe( pipe_slow );
12669 12690 %}
12670 12691
12671 12692 //----------Control Flow Instructions------------------------------------------
12672 12693 // Signed compare Instructions
12673 12694 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12674 12695 match(Set cr (CmpI op1 op2));
12675 12696 effect( DEF cr, USE op1, USE op2 );
12676 12697 format %{ "CMP $op1,$op2" %}
12677 12698 opcode(0x3B); /* Opcode 3B /r */
12678 12699 ins_encode( OpcP, RegReg( op1, op2) );
12679 12700 ins_pipe( ialu_cr_reg_reg );
12680 12701 %}
12681 12702
12682 12703 instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12683 12704 match(Set cr (CmpI op1 op2));
12684 12705 effect( DEF cr, USE op1 );
12685 12706 format %{ "CMP $op1,$op2" %}
12686 12707 opcode(0x81,0x07); /* Opcode 81 /7 */
12687 12708 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
12688 12709 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12689 12710 ins_pipe( ialu_cr_reg_imm );
12690 12711 %}
12691 12712
12692 12713 // Cisc-spilled version of cmpI_eReg
12693 12714 instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
12694 12715 match(Set cr (CmpI op1 (LoadI op2)));
12695 12716
12696 12717 format %{ "CMP $op1,$op2" %}
12697 12718 ins_cost(500);
12698 12719 opcode(0x3B); /* Opcode 3B /r */
12699 12720 ins_encode( OpcP, RegMem( op1, op2) );
12700 12721 ins_pipe( ialu_cr_reg_mem );
12701 12722 %}
12702 12723
12703 12724 instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
12704 12725 match(Set cr (CmpI src zero));
12705 12726 effect( DEF cr, USE src );
12706 12727
12707 12728 format %{ "TEST $src,$src" %}
12708 12729 opcode(0x85);
12709 12730 ins_encode( OpcP, RegReg( src, src ) );
12710 12731 ins_pipe( ialu_cr_reg_imm );
12711 12732 %}
12712 12733
12713 12734 instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
12714 12735 match(Set cr (CmpI (AndI src con) zero));
12715 12736
12716 12737 format %{ "TEST $src,$con" %}
12717 12738 opcode(0xF7,0x00);
12718 12739 ins_encode( OpcP, RegOpc(src), Con32(con) );
12719 12740 ins_pipe( ialu_cr_reg_imm );
12720 12741 %}
12721 12742
12722 12743 instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
12723 12744 match(Set cr (CmpI (AndI src mem) zero));
12724 12745
12725 12746 format %{ "TEST $src,$mem" %}
12726 12747 opcode(0x85);
12727 12748 ins_encode( OpcP, RegMem( src, mem ) );
12728 12749 ins_pipe( ialu_cr_reg_mem );
12729 12750 %}
12730 12751
12731 12752 // Unsigned compare Instructions; really, same as signed except they
12732 12753 // produce an eFlagsRegU instead of eFlagsReg.
12733 12754 instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12734 12755 match(Set cr (CmpU op1 op2));
12735 12756
12736 12757 format %{ "CMPu $op1,$op2" %}
12737 12758 opcode(0x3B); /* Opcode 3B /r */
12738 12759 ins_encode( OpcP, RegReg( op1, op2) );
12739 12760 ins_pipe( ialu_cr_reg_reg );
12740 12761 %}
12741 12762
12742 12763 instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12743 12764 match(Set cr (CmpU op1 op2));
12744 12765
12745 12766 format %{ "CMPu $op1,$op2" %}
12746 12767 opcode(0x81,0x07); /* Opcode 81 /7 */
12747 12768 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12748 12769 ins_pipe( ialu_cr_reg_imm );
12749 12770 %}
12750 12771
12751 12772 // // Cisc-spilled version of cmpU_eReg
12752 12773 instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12753 12774 match(Set cr (CmpU op1 (LoadI op2)));
12754 12775
12755 12776 format %{ "CMPu $op1,$op2" %}
12756 12777 ins_cost(500);
12757 12778 opcode(0x3B); /* Opcode 3B /r */
12758 12779 ins_encode( OpcP, RegMem( op1, op2) );
12759 12780 ins_pipe( ialu_cr_reg_mem );
12760 12781 %}
12761 12782
12762 12783 // // Cisc-spilled version of cmpU_eReg
12763 12784 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12764 12785 // match(Set cr (CmpU (LoadI op1) op2));
12765 12786 //
12766 12787 // format %{ "CMPu $op1,$op2" %}
12767 12788 // ins_cost(500);
12768 12789 // opcode(0x39); /* Opcode 39 /r */
12769 12790 // ins_encode( OpcP, RegMem( op1, op2) );
12770 12791 //%}
12771 12792
12772 12793 instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12773 12794 match(Set cr (CmpU src zero));
12774 12795
12775 12796 format %{ "TESTu $src,$src" %}
12776 12797 opcode(0x85);
12777 12798 ins_encode( OpcP, RegReg( src, src ) );
12778 12799 ins_pipe( ialu_cr_reg_imm );
12779 12800 %}
12780 12801
12781 12802 // Unsigned pointer compare Instructions
12782 12803 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12783 12804 match(Set cr (CmpP op1 op2));
12784 12805
12785 12806 format %{ "CMPu $op1,$op2" %}
12786 12807 opcode(0x3B); /* Opcode 3B /r */
12787 12808 ins_encode( OpcP, RegReg( op1, op2) );
12788 12809 ins_pipe( ialu_cr_reg_reg );
12789 12810 %}
12790 12811
12791 12812 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12792 12813 match(Set cr (CmpP op1 op2));
12793 12814
12794 12815 format %{ "CMPu $op1,$op2" %}
12795 12816 opcode(0x81,0x07); /* Opcode 81 /7 */
12796 12817 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12797 12818 ins_pipe( ialu_cr_reg_imm );
12798 12819 %}
12799 12820
12800 12821 // // Cisc-spilled version of cmpP_eReg
12801 12822 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12802 12823 match(Set cr (CmpP op1 (LoadP op2)));
12803 12824
12804 12825 format %{ "CMPu $op1,$op2" %}
12805 12826 ins_cost(500);
12806 12827 opcode(0x3B); /* Opcode 3B /r */
12807 12828 ins_encode( OpcP, RegMem( op1, op2) );
12808 12829 ins_pipe( ialu_cr_reg_mem );
12809 12830 %}
12810 12831
12811 12832 // // Cisc-spilled version of cmpP_eReg
12812 12833 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12813 12834 // match(Set cr (CmpP (LoadP op1) op2));
12814 12835 //
12815 12836 // format %{ "CMPu $op1,$op2" %}
12816 12837 // ins_cost(500);
12817 12838 // opcode(0x39); /* Opcode 39 /r */
12818 12839 // ins_encode( OpcP, RegMem( op1, op2) );
12819 12840 //%}
12820 12841
12821 12842 // Compare raw pointer (used in out-of-heap check).
12822 12843 // Only works because non-oop pointers must be raw pointers
12823 12844 // and raw pointers have no anti-dependencies.
12824 12845 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12825 12846 predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12826 12847 match(Set cr (CmpP op1 (LoadP op2)));
12827 12848
12828 12849 format %{ "CMPu $op1,$op2" %}
12829 12850 opcode(0x3B); /* Opcode 3B /r */
12830 12851 ins_encode( OpcP, RegMem( op1, op2) );
12831 12852 ins_pipe( ialu_cr_reg_mem );
12832 12853 %}
12833 12854
12834 12855 //
12835 12856 // This will generate a signed flags result. This should be ok
12836 12857 // since any compare to a zero should be eq/neq.
12837 12858 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12838 12859 match(Set cr (CmpP src zero));
12839 12860
12840 12861 format %{ "TEST $src,$src" %}
12841 12862 opcode(0x85);
12842 12863 ins_encode( OpcP, RegReg( src, src ) );
12843 12864 ins_pipe( ialu_cr_reg_imm );
12844 12865 %}
12845 12866
12846 12867 // Cisc-spilled version of testP_reg
12847 12868 // This will generate a signed flags result. This should be ok
12848 12869 // since any compare to a zero should be eq/neq.
12849 12870 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12850 12871 match(Set cr (CmpP (LoadP op) zero));
12851 12872
12852 12873 format %{ "TEST $op,0xFFFFFFFF" %}
12853 12874 ins_cost(500);
12854 12875 opcode(0xF7); /* Opcode F7 /0 */
12855 12876 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12856 12877 ins_pipe( ialu_cr_reg_imm );
12857 12878 %}
12858 12879
12859 12880 // Yanked all unsigned pointer compare operations.
12860 12881 // Pointer compares are done with CmpP which is already unsigned.
12861 12882
12862 12883 //----------Max and Min--------------------------------------------------------
12863 12884 // Min Instructions
12864 12885 ////
12865 12886 // *** Min and Max using the conditional move are slower than the
12866 12887 // *** branch version on a Pentium III.
12867 12888 // // Conditional move for min
12868 12889 //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12869 12890 // effect( USE_DEF op2, USE op1, USE cr );
12870 12891 // format %{ "CMOVlt $op2,$op1\t! min" %}
12871 12892 // opcode(0x4C,0x0F);
12872 12893 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12873 12894 // ins_pipe( pipe_cmov_reg );
12874 12895 //%}
12875 12896 //
12876 12897 //// Min Register with Register (P6 version)
12877 12898 //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12878 12899 // predicate(VM_Version::supports_cmov() );
12879 12900 // match(Set op2 (MinI op1 op2));
12880 12901 // ins_cost(200);
12881 12902 // expand %{
12882 12903 // eFlagsReg cr;
12883 12904 // compI_eReg(cr,op1,op2);
12884 12905 // cmovI_reg_lt(op2,op1,cr);
12885 12906 // %}
12886 12907 //%}
12887 12908
12888 12909 // Min Register with Register (generic version)
12889 12910 instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12890 12911 match(Set dst (MinI dst src));
12891 12912 effect(KILL flags);
12892 12913 ins_cost(300);
12893 12914
12894 12915 format %{ "MIN $dst,$src" %}
12895 12916 opcode(0xCC);
12896 12917 ins_encode( min_enc(dst,src) );
12897 12918 ins_pipe( pipe_slow );
12898 12919 %}
12899 12920
12900 12921 // Max Register with Register
12901 12922 // *** Min and Max using the conditional move are slower than the
12902 12923 // *** branch version on a Pentium III.
12903 12924 // // Conditional move for max
12904 12925 //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12905 12926 // effect( USE_DEF op2, USE op1, USE cr );
12906 12927 // format %{ "CMOVgt $op2,$op1\t! max" %}
12907 12928 // opcode(0x4F,0x0F);
12908 12929 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12909 12930 // ins_pipe( pipe_cmov_reg );
12910 12931 //%}
12911 12932 //
12912 12933 // // Max Register with Register (P6 version)
12913 12934 //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12914 12935 // predicate(VM_Version::supports_cmov() );
12915 12936 // match(Set op2 (MaxI op1 op2));
12916 12937 // ins_cost(200);
12917 12938 // expand %{
12918 12939 // eFlagsReg cr;
12919 12940 // compI_eReg(cr,op1,op2);
12920 12941 // cmovI_reg_gt(op2,op1,cr);
12921 12942 // %}
12922 12943 //%}
12923 12944
12924 12945 // Max Register with Register (generic version)
12925 12946 instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12926 12947 match(Set dst (MaxI dst src));
12927 12948 effect(KILL flags);
12928 12949 ins_cost(300);
12929 12950
12930 12951 format %{ "MAX $dst,$src" %}
12931 12952 opcode(0xCC);
↓ open down ↓ |
1337 lines elided |
↑ open up ↑ |
12932 12953 ins_encode( max_enc(dst,src) );
12933 12954 ins_pipe( pipe_slow );
12934 12955 %}
12935 12956
12936 12957 // ============================================================================
12937 12958 // Branch Instructions
12938 12959 // Jump Table
12939 12960 instruct jumpXtnd(eRegI switch_val) %{
12940 12961 match(Jump switch_val);
12941 12962 ins_cost(350);
12942 -
12943 - format %{ "JMP [table_base](,$switch_val,1)\n\t" %}
12944 -
12963 + format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %}
12945 12964 ins_encode %{
12946 - address table_base = __ address_table_constant(_index2label);
12947 -
12948 12965 // Jump to Address(table_base + switch_reg)
12949 - InternalAddress table(table_base);
12950 12966 Address index(noreg, $switch_val$$Register, Address::times_1);
12951 - __ jump(ArrayAddress(table, index));
12967 + __ jump(ArrayAddress($constantaddress, index));
12952 12968 %}
12953 12969 ins_pc_relative(1);
12954 12970 ins_pipe(pipe_jmp);
12955 12971 %}
12956 12972
12957 12973 // Jump Direct - Label defines a relative address from JMP+1
12958 12974 instruct jmpDir(label labl) %{
12959 12975 match(Goto);
12960 12976 effect(USE labl);
12961 12977
12962 12978 ins_cost(300);
12963 12979 format %{ "JMP $labl" %}
12964 12980 size(5);
12965 12981 opcode(0xE9);
12966 12982 ins_encode( OpcP, Lbl( labl ) );
12967 12983 ins_pipe( pipe_jmp );
12968 12984 ins_pc_relative(1);
12969 12985 %}
12970 12986
12971 12987 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12972 12988 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12973 12989 match(If cop cr);
12974 12990 effect(USE labl);
12975 12991
12976 12992 ins_cost(300);
12977 12993 format %{ "J$cop $labl" %}
12978 12994 size(6);
12979 12995 opcode(0x0F, 0x80);
12980 12996 ins_encode( Jcc( cop, labl) );
12981 12997 ins_pipe( pipe_jcc );
12982 12998 ins_pc_relative(1);
12983 12999 %}
12984 13000
12985 13001 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12986 13002 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12987 13003 match(CountedLoopEnd cop cr);
12988 13004 effect(USE labl);
12989 13005
12990 13006 ins_cost(300);
12991 13007 format %{ "J$cop $labl\t# Loop end" %}
12992 13008 size(6);
12993 13009 opcode(0x0F, 0x80);
12994 13010 ins_encode( Jcc( cop, labl) );
12995 13011 ins_pipe( pipe_jcc );
12996 13012 ins_pc_relative(1);
12997 13013 %}
12998 13014
12999 13015 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13000 13016 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13001 13017 match(CountedLoopEnd cop cmp);
13002 13018 effect(USE labl);
13003 13019
13004 13020 ins_cost(300);
13005 13021 format %{ "J$cop,u $labl\t# Loop end" %}
13006 13022 size(6);
13007 13023 opcode(0x0F, 0x80);
13008 13024 ins_encode( Jcc( cop, labl) );
13009 13025 ins_pipe( pipe_jcc );
13010 13026 ins_pc_relative(1);
13011 13027 %}
13012 13028
13013 13029 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13014 13030 match(CountedLoopEnd cop cmp);
13015 13031 effect(USE labl);
13016 13032
13017 13033 ins_cost(200);
13018 13034 format %{ "J$cop,u $labl\t# Loop end" %}
13019 13035 size(6);
13020 13036 opcode(0x0F, 0x80);
13021 13037 ins_encode( Jcc( cop, labl) );
13022 13038 ins_pipe( pipe_jcc );
13023 13039 ins_pc_relative(1);
13024 13040 %}
13025 13041
13026 13042 // Jump Direct Conditional - using unsigned comparison
13027 13043 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13028 13044 match(If cop cmp);
13029 13045 effect(USE labl);
13030 13046
13031 13047 ins_cost(300);
13032 13048 format %{ "J$cop,u $labl" %}
13033 13049 size(6);
13034 13050 opcode(0x0F, 0x80);
13035 13051 ins_encode(Jcc(cop, labl));
13036 13052 ins_pipe(pipe_jcc);
13037 13053 ins_pc_relative(1);
13038 13054 %}
13039 13055
13040 13056 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13041 13057 match(If cop cmp);
13042 13058 effect(USE labl);
13043 13059
13044 13060 ins_cost(200);
13045 13061 format %{ "J$cop,u $labl" %}
13046 13062 size(6);
13047 13063 opcode(0x0F, 0x80);
13048 13064 ins_encode(Jcc(cop, labl));
13049 13065 ins_pipe(pipe_jcc);
13050 13066 ins_pc_relative(1);
13051 13067 %}
13052 13068
13053 13069 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13054 13070 match(If cop cmp);
13055 13071 effect(USE labl);
13056 13072
13057 13073 ins_cost(200);
13058 13074 format %{ $$template
13059 13075 if ($cop$$cmpcode == Assembler::notEqual) {
13060 13076 $$emit$$"JP,u $labl\n\t"
13061 13077 $$emit$$"J$cop,u $labl"
13062 13078 } else {
13063 13079 $$emit$$"JP,u done\n\t"
13064 13080 $$emit$$"J$cop,u $labl\n\t"
13065 13081 $$emit$$"done:"
13066 13082 }
13067 13083 %}
13068 13084 size(12);
13069 13085 opcode(0x0F, 0x80);
13070 13086 ins_encode %{
13071 13087 Label* l = $labl$$label;
13072 13088 $$$emit8$primary;
13073 13089 emit_cc(cbuf, $secondary, Assembler::parity);
13074 13090 int parity_disp = -1;
13075 13091 bool ok = false;
13076 13092 if ($cop$$cmpcode == Assembler::notEqual) {
13077 13093 // the two jumps 6 bytes apart so the jump distances are too
13078 13094 parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13079 13095 } else if ($cop$$cmpcode == Assembler::equal) {
13080 13096 parity_disp = 6;
13081 13097 ok = true;
13082 13098 } else {
13083 13099 ShouldNotReachHere();
13084 13100 }
13085 13101 emit_d32(cbuf, parity_disp);
13086 13102 $$$emit8$primary;
13087 13103 emit_cc(cbuf, $secondary, $cop$$cmpcode);
13088 13104 int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13089 13105 emit_d32(cbuf, disp);
13090 13106 %}
13091 13107 ins_pipe(pipe_jcc);
13092 13108 ins_pc_relative(1);
13093 13109 %}
13094 13110
13095 13111 // ============================================================================
13096 13112 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
13097 13113 // array for an instance of the superklass. Set a hidden internal cache on a
13098 13114 // hit (cache is checked with exposed code in gen_subtype_check()). Return
13099 13115 // NZ for a miss or zero for a hit. The encoding ALSO sets flags.
13100 13116 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
13101 13117 match(Set result (PartialSubtypeCheck sub super));
13102 13118 effect( KILL rcx, KILL cr );
13103 13119
13104 13120 ins_cost(1100); // slightly larger than the next version
13105 13121 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
13106 13122 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13107 13123 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13108 13124 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13109 13125 "JNE,s miss\t\t# Missed: EDI not-zero\n\t"
13110 13126 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
13111 13127 "XOR $result,$result\t\t Hit: EDI zero\n\t"
13112 13128 "miss:\t" %}
13113 13129
13114 13130 opcode(0x1); // Force a XOR of EDI
13115 13131 ins_encode( enc_PartialSubtypeCheck() );
13116 13132 ins_pipe( pipe_slow );
13117 13133 %}
13118 13134
13119 13135 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
13120 13136 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13121 13137 effect( KILL rcx, KILL result );
13122 13138
13123 13139 ins_cost(1000);
13124 13140 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
13125 13141 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13126 13142 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13127 13143 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13128 13144 "JNE,s miss\t\t# Missed: flags NZ\n\t"
13129 13145 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
13130 13146 "miss:\t" %}
13131 13147
13132 13148 opcode(0x0); // No need to XOR EDI
13133 13149 ins_encode( enc_PartialSubtypeCheck() );
13134 13150 ins_pipe( pipe_slow );
13135 13151 %}
13136 13152
13137 13153 // ============================================================================
13138 13154 // Branch Instructions -- short offset versions
13139 13155 //
13140 13156 // These instructions are used to replace jumps of a long offset (the default
13141 13157 // match) with jumps of a shorter offset. These instructions are all tagged
13142 13158 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13143 13159 // match rules in general matching. Instead, the ADLC generates a conversion
13144 13160 // method in the MachNode which can be used to do in-place replacement of the
13145 13161 // long variant with the shorter variant. The compiler will determine if a
13146 13162 // branch can be taken by the is_short_branch_offset() predicate in the machine
13147 13163 // specific code section of the file.
13148 13164
13149 13165 // Jump Direct - Label defines a relative address from JMP+1
13150 13166 instruct jmpDir_short(label labl) %{
13151 13167 match(Goto);
13152 13168 effect(USE labl);
13153 13169
13154 13170 ins_cost(300);
13155 13171 format %{ "JMP,s $labl" %}
13156 13172 size(2);
13157 13173 opcode(0xEB);
13158 13174 ins_encode( OpcP, LblShort( labl ) );
13159 13175 ins_pipe( pipe_jmp );
13160 13176 ins_pc_relative(1);
13161 13177 ins_short_branch(1);
13162 13178 %}
13163 13179
13164 13180 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13165 13181 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
13166 13182 match(If cop cr);
13167 13183 effect(USE labl);
13168 13184
13169 13185 ins_cost(300);
13170 13186 format %{ "J$cop,s $labl" %}
13171 13187 size(2);
13172 13188 opcode(0x70);
13173 13189 ins_encode( JccShort( cop, labl) );
13174 13190 ins_pipe( pipe_jcc );
13175 13191 ins_pc_relative(1);
13176 13192 ins_short_branch(1);
13177 13193 %}
13178 13194
13179 13195 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13180 13196 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
13181 13197 match(CountedLoopEnd cop cr);
13182 13198 effect(USE labl);
13183 13199
13184 13200 ins_cost(300);
13185 13201 format %{ "J$cop,s $labl\t# Loop end" %}
13186 13202 size(2);
13187 13203 opcode(0x70);
13188 13204 ins_encode( JccShort( cop, labl) );
13189 13205 ins_pipe( pipe_jcc );
13190 13206 ins_pc_relative(1);
13191 13207 ins_short_branch(1);
13192 13208 %}
13193 13209
13194 13210 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13195 13211 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13196 13212 match(CountedLoopEnd cop cmp);
13197 13213 effect(USE labl);
13198 13214
13199 13215 ins_cost(300);
13200 13216 format %{ "J$cop,us $labl\t# Loop end" %}
13201 13217 size(2);
13202 13218 opcode(0x70);
13203 13219 ins_encode( JccShort( cop, labl) );
13204 13220 ins_pipe( pipe_jcc );
13205 13221 ins_pc_relative(1);
13206 13222 ins_short_branch(1);
13207 13223 %}
13208 13224
13209 13225 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13210 13226 match(CountedLoopEnd cop cmp);
13211 13227 effect(USE labl);
13212 13228
13213 13229 ins_cost(300);
13214 13230 format %{ "J$cop,us $labl\t# Loop end" %}
13215 13231 size(2);
13216 13232 opcode(0x70);
13217 13233 ins_encode( JccShort( cop, labl) );
13218 13234 ins_pipe( pipe_jcc );
13219 13235 ins_pc_relative(1);
13220 13236 ins_short_branch(1);
13221 13237 %}
13222 13238
13223 13239 // Jump Direct Conditional - using unsigned comparison
13224 13240 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13225 13241 match(If cop cmp);
13226 13242 effect(USE labl);
13227 13243
13228 13244 ins_cost(300);
13229 13245 format %{ "J$cop,us $labl" %}
13230 13246 size(2);
13231 13247 opcode(0x70);
13232 13248 ins_encode( JccShort( cop, labl) );
13233 13249 ins_pipe( pipe_jcc );
13234 13250 ins_pc_relative(1);
13235 13251 ins_short_branch(1);
13236 13252 %}
13237 13253
13238 13254 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13239 13255 match(If cop cmp);
13240 13256 effect(USE labl);
13241 13257
13242 13258 ins_cost(300);
13243 13259 format %{ "J$cop,us $labl" %}
13244 13260 size(2);
13245 13261 opcode(0x70);
13246 13262 ins_encode( JccShort( cop, labl) );
13247 13263 ins_pipe( pipe_jcc );
13248 13264 ins_pc_relative(1);
13249 13265 ins_short_branch(1);
13250 13266 %}
13251 13267
13252 13268 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13253 13269 match(If cop cmp);
13254 13270 effect(USE labl);
13255 13271
13256 13272 ins_cost(300);
13257 13273 format %{ $$template
13258 13274 if ($cop$$cmpcode == Assembler::notEqual) {
13259 13275 $$emit$$"JP,u,s $labl\n\t"
13260 13276 $$emit$$"J$cop,u,s $labl"
13261 13277 } else {
13262 13278 $$emit$$"JP,u,s done\n\t"
13263 13279 $$emit$$"J$cop,u,s $labl\n\t"
13264 13280 $$emit$$"done:"
13265 13281 }
13266 13282 %}
13267 13283 size(4);
13268 13284 opcode(0x70);
13269 13285 ins_encode %{
13270 13286 Label* l = $labl$$label;
13271 13287 emit_cc(cbuf, $primary, Assembler::parity);
13272 13288 int parity_disp = -1;
13273 13289 if ($cop$$cmpcode == Assembler::notEqual) {
13274 13290 parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13275 13291 } else if ($cop$$cmpcode == Assembler::equal) {
13276 13292 parity_disp = 2;
13277 13293 } else {
13278 13294 ShouldNotReachHere();
13279 13295 }
13280 13296 emit_d8(cbuf, parity_disp);
13281 13297 emit_cc(cbuf, $primary, $cop$$cmpcode);
13282 13298 int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13283 13299 emit_d8(cbuf, disp);
13284 13300 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
13285 13301 assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
13286 13302 %}
13287 13303 ins_pipe(pipe_jcc);
13288 13304 ins_pc_relative(1);
13289 13305 ins_short_branch(1);
13290 13306 %}
13291 13307
13292 13308 // ============================================================================
13293 13309 // Long Compare
13294 13310 //
13295 13311 // Currently we hold longs in 2 registers. Comparing such values efficiently
13296 13312 // is tricky. The flavor of compare used depends on whether we are testing
13297 13313 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit.
13298 13314 // The GE test is the negated LT test. The LE test can be had by commuting
13299 13315 // the operands (yielding a GE test) and then negating; negate again for the
13300 13316 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the
13301 13317 // NE test is negated from that.
13302 13318
13303 13319 // Due to a shortcoming in the ADLC, it mixes up expressions like:
13304 13320 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
13305 13321 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections
13306 13322 // are collapsed internally in the ADLC's dfa-gen code. The match for
13307 13323 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
13308 13324 // foo match ends up with the wrong leaf. One fix is to not match both
13309 13325 // reg-reg and reg-zero forms of long-compare. This is unfortunate because
13310 13326 // both forms beat the trinary form of long-compare and both are very useful
13311 13327 // on Intel which has so few registers.
13312 13328
13313 13329 // Manifest a CmpL result in an integer register. Very painful.
13314 13330 // This is the test to avoid.
13315 13331 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
13316 13332 match(Set dst (CmpL3 src1 src2));
13317 13333 effect( KILL flags );
13318 13334 ins_cost(1000);
13319 13335 format %{ "XOR $dst,$dst\n\t"
13320 13336 "CMP $src1.hi,$src2.hi\n\t"
13321 13337 "JLT,s m_one\n\t"
13322 13338 "JGT,s p_one\n\t"
13323 13339 "CMP $src1.lo,$src2.lo\n\t"
13324 13340 "JB,s m_one\n\t"
13325 13341 "JEQ,s done\n"
13326 13342 "p_one:\tINC $dst\n\t"
13327 13343 "JMP,s done\n"
13328 13344 "m_one:\tDEC $dst\n"
13329 13345 "done:" %}
13330 13346 ins_encode %{
13331 13347 Label p_one, m_one, done;
13332 13348 __ xorptr($dst$$Register, $dst$$Register);
13333 13349 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13334 13350 __ jccb(Assembler::less, m_one);
13335 13351 __ jccb(Assembler::greater, p_one);
13336 13352 __ cmpl($src1$$Register, $src2$$Register);
13337 13353 __ jccb(Assembler::below, m_one);
13338 13354 __ jccb(Assembler::equal, done);
13339 13355 __ bind(p_one);
13340 13356 __ incrementl($dst$$Register);
13341 13357 __ jmpb(done);
13342 13358 __ bind(m_one);
13343 13359 __ decrementl($dst$$Register);
13344 13360 __ bind(done);
13345 13361 %}
13346 13362 ins_pipe( pipe_slow );
13347 13363 %}
13348 13364
13349 13365 //======
13350 13366 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13351 13367 // compares. Can be used for LE or GT compares by reversing arguments.
13352 13368 // NOT GOOD FOR EQ/NE tests.
13353 13369 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13354 13370 match( Set flags (CmpL src zero ));
13355 13371 ins_cost(100);
13356 13372 format %{ "TEST $src.hi,$src.hi" %}
13357 13373 opcode(0x85);
13358 13374 ins_encode( OpcP, RegReg_Hi2( src, src ) );
13359 13375 ins_pipe( ialu_cr_reg_reg );
13360 13376 %}
13361 13377
13362 13378 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13363 13379 // compares. Can be used for LE or GT compares by reversing arguments.
13364 13380 // NOT GOOD FOR EQ/NE tests.
13365 13381 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13366 13382 match( Set flags (CmpL src1 src2 ));
13367 13383 effect( TEMP tmp );
13368 13384 ins_cost(300);
13369 13385 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13370 13386 "MOV $tmp,$src1.hi\n\t"
13371 13387 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %}
13372 13388 ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13373 13389 ins_pipe( ialu_cr_reg_reg );
13374 13390 %}
13375 13391
13376 13392 // Long compares reg < zero/req OR reg >= zero/req.
13377 13393 // Just a wrapper for a normal branch, plus the predicate test.
13378 13394 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13379 13395 match(If cmp flags);
13380 13396 effect(USE labl);
13381 13397 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13382 13398 expand %{
13383 13399 jmpCon(cmp,flags,labl); // JLT or JGE...
13384 13400 %}
13385 13401 %}
13386 13402
13387 13403 // Compare 2 longs and CMOVE longs.
13388 13404 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13389 13405 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13390 13406 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13391 13407 ins_cost(400);
13392 13408 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13393 13409 "CMOV$cmp $dst.hi,$src.hi" %}
13394 13410 opcode(0x0F,0x40);
13395 13411 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13396 13412 ins_pipe( pipe_cmov_reg_long );
13397 13413 %}
13398 13414
13399 13415 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13400 13416 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13401 13417 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13402 13418 ins_cost(500);
13403 13419 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13404 13420 "CMOV$cmp $dst.hi,$src.hi" %}
13405 13421 opcode(0x0F,0x40);
13406 13422 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13407 13423 ins_pipe( pipe_cmov_reg_long );
13408 13424 %}
13409 13425
13410 13426 // Compare 2 longs and CMOVE ints.
13411 13427 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
13412 13428 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13413 13429 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13414 13430 ins_cost(200);
13415 13431 format %{ "CMOV$cmp $dst,$src" %}
13416 13432 opcode(0x0F,0x40);
13417 13433 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13418 13434 ins_pipe( pipe_cmov_reg );
13419 13435 %}
13420 13436
13421 13437 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
13422 13438 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13423 13439 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13424 13440 ins_cost(250);
13425 13441 format %{ "CMOV$cmp $dst,$src" %}
13426 13442 opcode(0x0F,0x40);
13427 13443 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13428 13444 ins_pipe( pipe_cmov_mem );
13429 13445 %}
13430 13446
13431 13447 // Compare 2 longs and CMOVE ints.
13432 13448 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13433 13449 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13434 13450 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13435 13451 ins_cost(200);
13436 13452 format %{ "CMOV$cmp $dst,$src" %}
13437 13453 opcode(0x0F,0x40);
13438 13454 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13439 13455 ins_pipe( pipe_cmov_reg );
13440 13456 %}
13441 13457
13442 13458 // Compare 2 longs and CMOVE doubles
13443 13459 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13444 13460 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13445 13461 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13446 13462 ins_cost(200);
13447 13463 expand %{
13448 13464 fcmovD_regS(cmp,flags,dst,src);
13449 13465 %}
13450 13466 %}
13451 13467
13452 13468 // Compare 2 longs and CMOVE doubles
13453 13469 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
13454 13470 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13455 13471 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13456 13472 ins_cost(200);
13457 13473 expand %{
13458 13474 fcmovXD_regS(cmp,flags,dst,src);
13459 13475 %}
13460 13476 %}
13461 13477
13462 13478 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13463 13479 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13464 13480 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13465 13481 ins_cost(200);
13466 13482 expand %{
13467 13483 fcmovF_regS(cmp,flags,dst,src);
13468 13484 %}
13469 13485 %}
13470 13486
13471 13487 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
13472 13488 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13473 13489 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13474 13490 ins_cost(200);
13475 13491 expand %{
13476 13492 fcmovX_regS(cmp,flags,dst,src);
13477 13493 %}
13478 13494 %}
13479 13495
13480 13496 //======
13481 13497 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13482 13498 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
13483 13499 match( Set flags (CmpL src zero ));
13484 13500 effect(TEMP tmp);
13485 13501 ins_cost(200);
13486 13502 format %{ "MOV $tmp,$src.lo\n\t"
13487 13503 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13488 13504 ins_encode( long_cmp_flags0( src, tmp ) );
13489 13505 ins_pipe( ialu_reg_reg_long );
13490 13506 %}
13491 13507
13492 13508 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13493 13509 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13494 13510 match( Set flags (CmpL src1 src2 ));
13495 13511 ins_cost(200+300);
13496 13512 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13497 13513 "JNE,s skip\n\t"
13498 13514 "CMP $src1.hi,$src2.hi\n\t"
13499 13515 "skip:\t" %}
13500 13516 ins_encode( long_cmp_flags1( src1, src2 ) );
13501 13517 ins_pipe( ialu_cr_reg_reg );
13502 13518 %}
13503 13519
13504 13520 // Long compare reg == zero/reg OR reg != zero/reg
13505 13521 // Just a wrapper for a normal branch, plus the predicate test.
13506 13522 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13507 13523 match(If cmp flags);
13508 13524 effect(USE labl);
13509 13525 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13510 13526 expand %{
13511 13527 jmpCon(cmp,flags,labl); // JEQ or JNE...
13512 13528 %}
13513 13529 %}
13514 13530
13515 13531 // Compare 2 longs and CMOVE longs.
13516 13532 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13517 13533 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13518 13534 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13519 13535 ins_cost(400);
13520 13536 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13521 13537 "CMOV$cmp $dst.hi,$src.hi" %}
13522 13538 opcode(0x0F,0x40);
13523 13539 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13524 13540 ins_pipe( pipe_cmov_reg_long );
13525 13541 %}
13526 13542
13527 13543 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13528 13544 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13529 13545 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13530 13546 ins_cost(500);
13531 13547 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13532 13548 "CMOV$cmp $dst.hi,$src.hi" %}
13533 13549 opcode(0x0F,0x40);
13534 13550 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13535 13551 ins_pipe( pipe_cmov_reg_long );
13536 13552 %}
13537 13553
13538 13554 // Compare 2 longs and CMOVE ints.
13539 13555 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
13540 13556 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13541 13557 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13542 13558 ins_cost(200);
13543 13559 format %{ "CMOV$cmp $dst,$src" %}
13544 13560 opcode(0x0F,0x40);
13545 13561 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13546 13562 ins_pipe( pipe_cmov_reg );
13547 13563 %}
13548 13564
13549 13565 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
13550 13566 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13551 13567 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13552 13568 ins_cost(250);
13553 13569 format %{ "CMOV$cmp $dst,$src" %}
13554 13570 opcode(0x0F,0x40);
13555 13571 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13556 13572 ins_pipe( pipe_cmov_mem );
13557 13573 %}
13558 13574
13559 13575 // Compare 2 longs and CMOVE ints.
13560 13576 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13561 13577 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13562 13578 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13563 13579 ins_cost(200);
13564 13580 format %{ "CMOV$cmp $dst,$src" %}
13565 13581 opcode(0x0F,0x40);
13566 13582 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13567 13583 ins_pipe( pipe_cmov_reg );
13568 13584 %}
13569 13585
13570 13586 // Compare 2 longs and CMOVE doubles
13571 13587 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13572 13588 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13573 13589 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13574 13590 ins_cost(200);
13575 13591 expand %{
13576 13592 fcmovD_regS(cmp,flags,dst,src);
13577 13593 %}
13578 13594 %}
13579 13595
13580 13596 // Compare 2 longs and CMOVE doubles
13581 13597 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
13582 13598 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13583 13599 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13584 13600 ins_cost(200);
13585 13601 expand %{
13586 13602 fcmovXD_regS(cmp,flags,dst,src);
13587 13603 %}
13588 13604 %}
13589 13605
13590 13606 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13591 13607 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13592 13608 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13593 13609 ins_cost(200);
13594 13610 expand %{
13595 13611 fcmovF_regS(cmp,flags,dst,src);
13596 13612 %}
13597 13613 %}
13598 13614
13599 13615 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
13600 13616 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13601 13617 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13602 13618 ins_cost(200);
13603 13619 expand %{
13604 13620 fcmovX_regS(cmp,flags,dst,src);
13605 13621 %}
13606 13622 %}
13607 13623
13608 13624 //======
13609 13625 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13610 13626 // Same as cmpL_reg_flags_LEGT except must negate src
13611 13627 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13612 13628 match( Set flags (CmpL src zero ));
13613 13629 effect( TEMP tmp );
13614 13630 ins_cost(300);
13615 13631 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13616 13632 "CMP $tmp,$src.lo\n\t"
13617 13633 "SBB $tmp,$src.hi\n\t" %}
13618 13634 ins_encode( long_cmp_flags3(src, tmp) );
13619 13635 ins_pipe( ialu_reg_reg_long );
13620 13636 %}
13621 13637
13622 13638 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13623 13639 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
13624 13640 // requires a commuted test to get the same result.
13625 13641 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13626 13642 match( Set flags (CmpL src1 src2 ));
13627 13643 effect( TEMP tmp );
13628 13644 ins_cost(300);
13629 13645 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13630 13646 "MOV $tmp,$src2.hi\n\t"
13631 13647 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %}
13632 13648 ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13633 13649 ins_pipe( ialu_cr_reg_reg );
13634 13650 %}
13635 13651
13636 13652 // Long compares reg < zero/req OR reg >= zero/req.
13637 13653 // Just a wrapper for a normal branch, plus the predicate test
13638 13654 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13639 13655 match(If cmp flags);
13640 13656 effect(USE labl);
13641 13657 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13642 13658 ins_cost(300);
13643 13659 expand %{
13644 13660 jmpCon(cmp,flags,labl); // JGT or JLE...
13645 13661 %}
13646 13662 %}
13647 13663
13648 13664 // Compare 2 longs and CMOVE longs.
13649 13665 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13650 13666 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13651 13667 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13652 13668 ins_cost(400);
13653 13669 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13654 13670 "CMOV$cmp $dst.hi,$src.hi" %}
13655 13671 opcode(0x0F,0x40);
13656 13672 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13657 13673 ins_pipe( pipe_cmov_reg_long );
13658 13674 %}
13659 13675
13660 13676 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13661 13677 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13662 13678 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13663 13679 ins_cost(500);
13664 13680 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13665 13681 "CMOV$cmp $dst.hi,$src.hi+4" %}
13666 13682 opcode(0x0F,0x40);
13667 13683 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13668 13684 ins_pipe( pipe_cmov_reg_long );
13669 13685 %}
13670 13686
13671 13687 // Compare 2 longs and CMOVE ints.
13672 13688 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
13673 13689 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13674 13690 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13675 13691 ins_cost(200);
13676 13692 format %{ "CMOV$cmp $dst,$src" %}
13677 13693 opcode(0x0F,0x40);
13678 13694 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13679 13695 ins_pipe( pipe_cmov_reg );
13680 13696 %}
13681 13697
13682 13698 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
13683 13699 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13684 13700 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13685 13701 ins_cost(250);
13686 13702 format %{ "CMOV$cmp $dst,$src" %}
13687 13703 opcode(0x0F,0x40);
13688 13704 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13689 13705 ins_pipe( pipe_cmov_mem );
13690 13706 %}
13691 13707
13692 13708 // Compare 2 longs and CMOVE ptrs.
13693 13709 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13694 13710 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13695 13711 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13696 13712 ins_cost(200);
13697 13713 format %{ "CMOV$cmp $dst,$src" %}
13698 13714 opcode(0x0F,0x40);
13699 13715 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13700 13716 ins_pipe( pipe_cmov_reg );
13701 13717 %}
13702 13718
13703 13719 // Compare 2 longs and CMOVE doubles
13704 13720 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13705 13721 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13706 13722 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13707 13723 ins_cost(200);
13708 13724 expand %{
13709 13725 fcmovD_regS(cmp,flags,dst,src);
13710 13726 %}
13711 13727 %}
13712 13728
13713 13729 // Compare 2 longs and CMOVE doubles
13714 13730 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
13715 13731 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13716 13732 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13717 13733 ins_cost(200);
13718 13734 expand %{
13719 13735 fcmovXD_regS(cmp,flags,dst,src);
13720 13736 %}
13721 13737 %}
13722 13738
13723 13739 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13724 13740 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13725 13741 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13726 13742 ins_cost(200);
13727 13743 expand %{
13728 13744 fcmovF_regS(cmp,flags,dst,src);
13729 13745 %}
13730 13746 %}
13731 13747
13732 13748
13733 13749 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13734 13750 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13735 13751 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13736 13752 ins_cost(200);
13737 13753 expand %{
13738 13754 fcmovX_regS(cmp,flags,dst,src);
13739 13755 %}
13740 13756 %}
13741 13757
13742 13758
13743 13759 // ============================================================================
13744 13760 // Procedure Call/Return Instructions
13745 13761 // Call Java Static Instruction
13746 13762 // Note: If this code changes, the corresponding ret_addr_offset() and
13747 13763 // compute_padding() functions will have to be adjusted.
13748 13764 instruct CallStaticJavaDirect(method meth) %{
13749 13765 match(CallStaticJava);
13750 13766 predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13751 13767 effect(USE meth);
13752 13768
13753 13769 ins_cost(300);
13754 13770 format %{ "CALL,static " %}
13755 13771 opcode(0xE8); /* E8 cd */
13756 13772 ins_encode( pre_call_FPU,
13757 13773 Java_Static_Call( meth ),
13758 13774 call_epilog,
13759 13775 post_call_FPU );
13760 13776 ins_pipe( pipe_slow );
13761 13777 ins_pc_relative(1);
13762 13778 ins_alignment(4);
13763 13779 %}
13764 13780
13765 13781 // Call Java Static Instruction (method handle version)
13766 13782 // Note: If this code changes, the corresponding ret_addr_offset() and
13767 13783 // compute_padding() functions will have to be adjusted.
13768 13784 instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
13769 13785 match(CallStaticJava);
13770 13786 predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13771 13787 effect(USE meth);
13772 13788 // EBP is saved by all callees (for interpreter stack correction).
13773 13789 // We use it here for a similar purpose, in {preserve,restore}_SP.
13774 13790
13775 13791 ins_cost(300);
13776 13792 format %{ "CALL,static/MethodHandle " %}
13777 13793 opcode(0xE8); /* E8 cd */
13778 13794 ins_encode( pre_call_FPU,
13779 13795 preserve_SP,
13780 13796 Java_Static_Call( meth ),
13781 13797 restore_SP,
13782 13798 call_epilog,
13783 13799 post_call_FPU );
13784 13800 ins_pipe( pipe_slow );
13785 13801 ins_pc_relative(1);
13786 13802 ins_alignment(4);
13787 13803 %}
13788 13804
13789 13805 // Call Java Dynamic Instruction
13790 13806 // Note: If this code changes, the corresponding ret_addr_offset() and
13791 13807 // compute_padding() functions will have to be adjusted.
13792 13808 instruct CallDynamicJavaDirect(method meth) %{
13793 13809 match(CallDynamicJava);
13794 13810 effect(USE meth);
13795 13811
13796 13812 ins_cost(300);
13797 13813 format %{ "MOV EAX,(oop)-1\n\t"
13798 13814 "CALL,dynamic" %}
13799 13815 opcode(0xE8); /* E8 cd */
13800 13816 ins_encode( pre_call_FPU,
13801 13817 Java_Dynamic_Call( meth ),
13802 13818 call_epilog,
13803 13819 post_call_FPU );
13804 13820 ins_pipe( pipe_slow );
13805 13821 ins_pc_relative(1);
13806 13822 ins_alignment(4);
13807 13823 %}
13808 13824
13809 13825 // Call Runtime Instruction
13810 13826 instruct CallRuntimeDirect(method meth) %{
13811 13827 match(CallRuntime );
13812 13828 effect(USE meth);
13813 13829
13814 13830 ins_cost(300);
13815 13831 format %{ "CALL,runtime " %}
13816 13832 opcode(0xE8); /* E8 cd */
13817 13833 // Use FFREEs to clear entries in float stack
13818 13834 ins_encode( pre_call_FPU,
13819 13835 FFree_Float_Stack_All,
13820 13836 Java_To_Runtime( meth ),
13821 13837 post_call_FPU );
13822 13838 ins_pipe( pipe_slow );
13823 13839 ins_pc_relative(1);
13824 13840 %}
13825 13841
13826 13842 // Call runtime without safepoint
13827 13843 instruct CallLeafDirect(method meth) %{
13828 13844 match(CallLeaf);
13829 13845 effect(USE meth);
13830 13846
13831 13847 ins_cost(300);
13832 13848 format %{ "CALL_LEAF,runtime " %}
13833 13849 opcode(0xE8); /* E8 cd */
13834 13850 ins_encode( pre_call_FPU,
13835 13851 FFree_Float_Stack_All,
13836 13852 Java_To_Runtime( meth ),
13837 13853 Verify_FPU_For_Leaf, post_call_FPU );
13838 13854 ins_pipe( pipe_slow );
13839 13855 ins_pc_relative(1);
13840 13856 %}
13841 13857
13842 13858 instruct CallLeafNoFPDirect(method meth) %{
13843 13859 match(CallLeafNoFP);
13844 13860 effect(USE meth);
13845 13861
13846 13862 ins_cost(300);
13847 13863 format %{ "CALL_LEAF_NOFP,runtime " %}
13848 13864 opcode(0xE8); /* E8 cd */
13849 13865 ins_encode(Java_To_Runtime(meth));
13850 13866 ins_pipe( pipe_slow );
13851 13867 ins_pc_relative(1);
13852 13868 %}
13853 13869
13854 13870
13855 13871 // Return Instruction
13856 13872 // Remove the return address & jump to it.
13857 13873 instruct Ret() %{
13858 13874 match(Return);
13859 13875 format %{ "RET" %}
13860 13876 opcode(0xC3);
13861 13877 ins_encode(OpcP);
13862 13878 ins_pipe( pipe_jmp );
13863 13879 %}
13864 13880
13865 13881 // Tail Call; Jump from runtime stub to Java code.
13866 13882 // Also known as an 'interprocedural jump'.
13867 13883 // Target of jump will eventually return to caller.
13868 13884 // TailJump below removes the return address.
13869 13885 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13870 13886 match(TailCall jump_target method_oop );
13871 13887 ins_cost(300);
13872 13888 format %{ "JMP $jump_target \t# EBX holds method oop" %}
13873 13889 opcode(0xFF, 0x4); /* Opcode FF /4 */
13874 13890 ins_encode( OpcP, RegOpc(jump_target) );
13875 13891 ins_pipe( pipe_jmp );
13876 13892 %}
13877 13893
13878 13894
13879 13895 // Tail Jump; remove the return address; jump to target.
13880 13896 // TailCall above leaves the return address around.
13881 13897 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13882 13898 match( TailJump jump_target ex_oop );
13883 13899 ins_cost(300);
13884 13900 format %{ "POP EDX\t# pop return address into dummy\n\t"
13885 13901 "JMP $jump_target " %}
13886 13902 opcode(0xFF, 0x4); /* Opcode FF /4 */
13887 13903 ins_encode( enc_pop_rdx,
13888 13904 OpcP, RegOpc(jump_target) );
13889 13905 ins_pipe( pipe_jmp );
13890 13906 %}
13891 13907
13892 13908 // Create exception oop: created by stack-crawling runtime code.
13893 13909 // Created exception is now available to this handler, and is setup
13894 13910 // just prior to jumping to this handler. No code emitted.
13895 13911 instruct CreateException( eAXRegP ex_oop )
13896 13912 %{
13897 13913 match(Set ex_oop (CreateEx));
13898 13914
13899 13915 size(0);
13900 13916 // use the following format syntax
13901 13917 format %{ "# exception oop is in EAX; no code emitted" %}
13902 13918 ins_encode();
13903 13919 ins_pipe( empty );
13904 13920 %}
13905 13921
13906 13922
13907 13923 // Rethrow exception:
13908 13924 // The exception oop will come in the first argument position.
13909 13925 // Then JUMP (not call) to the rethrow stub code.
13910 13926 instruct RethrowException()
13911 13927 %{
13912 13928 match(Rethrow);
13913 13929
13914 13930 // use the following format syntax
13915 13931 format %{ "JMP rethrow_stub" %}
13916 13932 ins_encode(enc_rethrow);
13917 13933 ins_pipe( pipe_jmp );
13918 13934 %}
13919 13935
13920 13936 // inlined locking and unlocking
13921 13937
13922 13938
13923 13939 instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
13924 13940 match( Set cr (FastLock object box) );
13925 13941 effect( TEMP tmp, TEMP scr );
13926 13942 ins_cost(300);
13927 13943 format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
13928 13944 ins_encode( Fast_Lock(object,box,tmp,scr) );
13929 13945 ins_pipe( pipe_slow );
13930 13946 ins_pc_relative(1);
13931 13947 %}
13932 13948
13933 13949 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13934 13950 match( Set cr (FastUnlock object box) );
13935 13951 effect( TEMP tmp );
13936 13952 ins_cost(300);
13937 13953 format %{ "FASTUNLOCK $object, $box, $tmp" %}
13938 13954 ins_encode( Fast_Unlock(object,box,tmp) );
13939 13955 ins_pipe( pipe_slow );
13940 13956 ins_pc_relative(1);
13941 13957 %}
13942 13958
13943 13959
13944 13960
13945 13961 // ============================================================================
13946 13962 // Safepoint Instruction
13947 13963 instruct safePoint_poll(eFlagsReg cr) %{
13948 13964 match(SafePoint);
13949 13965 effect(KILL cr);
13950 13966
13951 13967 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13952 13968 // On SPARC that might be acceptable as we can generate the address with
13953 13969 // just a sethi, saving an or. By polling at offset 0 we can end up
13954 13970 // putting additional pressure on the index-0 in the D$. Because of
13955 13971 // alignment (just like the situation at hand) the lower indices tend
13956 13972 // to see more traffic. It'd be better to change the polling address
13957 13973 // to offset 0 of the last $line in the polling page.
13958 13974
13959 13975 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %}
13960 13976 ins_cost(125);
13961 13977 size(6) ;
13962 13978 ins_encode( Safepoint_Poll() );
13963 13979 ins_pipe( ialu_reg_mem );
13964 13980 %}
13965 13981
13966 13982 //----------PEEPHOLE RULES-----------------------------------------------------
13967 13983 // These must follow all instruction definitions as they use the names
13968 13984 // defined in the instructions definitions.
13969 13985 //
13970 13986 // peepmatch ( root_instr_name [preceding_instruction]* );
13971 13987 //
13972 13988 // peepconstraint %{
13973 13989 // (instruction_number.operand_name relational_op instruction_number.operand_name
13974 13990 // [, ...] );
13975 13991 // // instruction numbers are zero-based using left to right order in peepmatch
13976 13992 //
13977 13993 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
13978 13994 // // provide an instruction_number.operand_name for each operand that appears
13979 13995 // // in the replacement instruction's match rule
13980 13996 //
13981 13997 // ---------VM FLAGS---------------------------------------------------------
13982 13998 //
13983 13999 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13984 14000 //
13985 14001 // Each peephole rule is given an identifying number starting with zero and
13986 14002 // increasing by one in the order seen by the parser. An individual peephole
13987 14003 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13988 14004 // on the command-line.
13989 14005 //
13990 14006 // ---------CURRENT LIMITATIONS----------------------------------------------
13991 14007 //
13992 14008 // Only match adjacent instructions in same basic block
13993 14009 // Only equality constraints
13994 14010 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13995 14011 // Only one replacement instruction
13996 14012 //
13997 14013 // ---------EXAMPLE----------------------------------------------------------
13998 14014 //
13999 14015 // // pertinent parts of existing instructions in architecture description
14000 14016 // instruct movI(eRegI dst, eRegI src) %{
14001 14017 // match(Set dst (CopyI src));
14002 14018 // %}
14003 14019 //
14004 14020 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14005 14021 // match(Set dst (AddI dst src));
14006 14022 // effect(KILL cr);
14007 14023 // %}
14008 14024 //
14009 14025 // // Change (inc mov) to lea
14010 14026 // peephole %{
14011 14027 // // increment preceeded by register-register move
14012 14028 // peepmatch ( incI_eReg movI );
14013 14029 // // require that the destination register of the increment
14014 14030 // // match the destination register of the move
14015 14031 // peepconstraint ( 0.dst == 1.dst );
14016 14032 // // construct a replacement instruction that sets
14017 14033 // // the destination to ( move's source register + one )
14018 14034 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14019 14035 // %}
14020 14036 //
14021 14037 // Implementation no longer uses movX instructions since
14022 14038 // machine-independent system no longer uses CopyX nodes.
14023 14039 //
14024 14040 // peephole %{
14025 14041 // peepmatch ( incI_eReg movI );
14026 14042 // peepconstraint ( 0.dst == 1.dst );
14027 14043 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14028 14044 // %}
14029 14045 //
14030 14046 // peephole %{
14031 14047 // peepmatch ( decI_eReg movI );
14032 14048 // peepconstraint ( 0.dst == 1.dst );
14033 14049 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14034 14050 // %}
14035 14051 //
14036 14052 // peephole %{
14037 14053 // peepmatch ( addI_eReg_imm movI );
14038 14054 // peepconstraint ( 0.dst == 1.dst );
14039 14055 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14040 14056 // %}
14041 14057 //
14042 14058 // peephole %{
14043 14059 // peepmatch ( addP_eReg_imm movP );
14044 14060 // peepconstraint ( 0.dst == 1.dst );
14045 14061 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14046 14062 // %}
14047 14063
14048 14064 // // Change load of spilled value to only a spill
14049 14065 // instruct storeI(memory mem, eRegI src) %{
14050 14066 // match(Set mem (StoreI mem src));
14051 14067 // %}
14052 14068 //
14053 14069 // instruct loadI(eRegI dst, memory mem) %{
14054 14070 // match(Set dst (LoadI mem));
14055 14071 // %}
14056 14072 //
14057 14073 peephole %{
14058 14074 peepmatch ( loadI storeI );
14059 14075 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14060 14076 peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14061 14077 %}
14062 14078
14063 14079 //----------SMARTSPILL RULES---------------------------------------------------
14064 14080 // These must follow all instruction definitions as they use the names
14065 14081 // defined in the instructions definitions.
↓ open down ↓ |
1104 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX