Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/src/cpu/x86/vm/x86_32.ad
+++ new/src/cpu/x86/vm/x86_32.ad
1 1 //
2 2 // Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 //
5 5 // This code is free software; you can redistribute it and/or modify it
6 6 // under the terms of the GNU General Public License version 2 only, as
7 7 // published by the Free Software Foundation.
8 8 //
9 9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 // version 2 for more details (a copy is included in the LICENSE file that
13 13 // accompanied this code).
14 14 //
15 15 // You should have received a copy of the GNU General Public License version
16 16 // 2 along with this work; if not, write to the Free Software Foundation,
17 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 //
19 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 // or visit www.oracle.com if you need additional information or have any
21 21 // questions.
22 22 //
23 23 //
24 24
25 25 // X86 Architecture Description File
26 26
27 27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 28 // This information is used by the matcher and the register allocator to
29 29 // describe individual registers and classes of registers within the target
30 30 // archtecture.
31 31
32 32 register %{
33 33 //----------Architecture Description Register Definitions----------------------
34 34 // General Registers
35 35 // "reg_def" name ( register save type, C convention save type,
36 36 // ideal register type, encoding );
37 37 // Register Save Types:
38 38 //
39 39 // NS = No-Save: The register allocator assumes that these registers
40 40 // can be used without saving upon entry to the method, &
41 41 // that they do not need to be saved at call sites.
42 42 //
43 43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 44 // can be used without saving upon entry to the method,
45 45 // but that they must be saved at call sites.
46 46 //
47 47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 48 // must be saved before using them upon entry to the
49 49 // method, but they do not need to be saved at call
50 50 // sites.
51 51 //
52 52 // AS = Always-Save: The register allocator assumes that these registers
53 53 // must be saved before using them upon entry to the
54 54 // method, & that they must be saved at call sites.
55 55 //
56 56 // Ideal Register Type is used to determine how to save & restore a
57 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 59 //
60 60 // The encoding number is the actual bit-pattern placed into the opcodes.
61 61
62 62 // General Registers
63 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
64 64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
65 65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
66 66
67 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
76 76
77 77 // Special Registers
78 78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
79 79
80 80 // Float registers. We treat TOS/FPR0 special. It is invisible to the
81 81 // allocator, and only shows up in the encodings.
82 82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
83 83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
84 84 // Ok so here's the trick FPR1 is really st(0) except in the midst
85 85 // of emission of assembly for a machnode. During the emission the fpu stack
86 86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
87 87 // the stack will not have this element so FPR1 == st(0) from the
88 88 // oopMap viewpoint. This same weirdness with numbering causes
89 89 // instruction encoding to have to play games with the register
90 90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
91 91 // where it does flt->flt moves to see an example
92 92 //
93 93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
94 94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
95 95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
96 96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
97 97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
98 98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
99 99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
100 100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
101 101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
102 102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
103 103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
104 104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
105 105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
106 106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
107 107
108 108 // XMM registers. 128-bit registers or 4 words each, labeled a-d.
109 109 // Word a in each register holds a Float, words ab hold a Double.
110 110 // We currently do not use the SIMD capabilities, so registers cd
111 111 // are unused at the moment.
112 112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
113 113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
114 114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
115 115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
116 116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
117 117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
118 118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
119 119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
120 120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
121 121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
122 122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
123 123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
124 124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
125 125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
126 126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
127 127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
128 128
129 129 // Specify priority of register selection within phases of register
130 130 // allocation. Highest priority is first. A useful heuristic is to
131 131 // give registers a low priority when they are required by machine
132 132 // instructions, like EAX and EDX. Registers which are used as
133 133 // pairs must fall on an even boundary (witness the FPR#L's in this list).
134 134 // For the Intel integer registers, the equivalent Long pairs are
135 135 // EDX:EAX, EBX:ECX, and EDI:EBP.
136 136 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
137 137 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
138 138 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
139 139 FPR6L, FPR6H, FPR7L, FPR7H );
140 140
141 141 alloc_class chunk1( XMM0a, XMM0b,
142 142 XMM1a, XMM1b,
143 143 XMM2a, XMM2b,
144 144 XMM3a, XMM3b,
145 145 XMM4a, XMM4b,
146 146 XMM5a, XMM5b,
147 147 XMM6a, XMM6b,
148 148 XMM7a, XMM7b, EFLAGS);
149 149
150 150
151 151 //----------Architecture Description Register Classes--------------------------
152 152 // Several register classes are automatically defined based upon information in
153 153 // this architecture description.
154 154 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
155 155 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )
156 156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
157 157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
158 158 //
159 159 // Class for all registers
160 160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
161 161 // Class for general registers
162 162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
163 163 // Class for general registers which may be used for implicit null checks on win95
164 164 // Also safe for use by tailjump. We don't want to allocate in rbp,
165 165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
166 166 // Class of "X" registers
167 167 reg_class x_reg(EBX, ECX, EDX, EAX);
168 168 // Class of registers that can appear in an address with no offset.
169 169 // EBP and ESP require an extra instruction byte for zero offset.
170 170 // Used in fast-unlock
171 171 reg_class p_reg(EDX, EDI, ESI, EBX);
172 172 // Class for general registers not including ECX
173 173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
174 174 // Class for general registers not including EAX
175 175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
176 176 // Class for general registers not including EAX or EBX.
177 177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
178 178 // Class of EAX (for multiply and divide operations)
179 179 reg_class eax_reg(EAX);
180 180 // Class of EBX (for atomic add)
181 181 reg_class ebx_reg(EBX);
182 182 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
183 183 reg_class ecx_reg(ECX);
184 184 // Class of EDX (for multiply and divide operations)
185 185 reg_class edx_reg(EDX);
186 186 // Class of EDI (for synchronization)
187 187 reg_class edi_reg(EDI);
188 188 // Class of ESI (for synchronization)
189 189 reg_class esi_reg(ESI);
190 190 // Singleton class for interpreter's stack pointer
191 191 reg_class ebp_reg(EBP);
192 192 // Singleton class for stack pointer
193 193 reg_class sp_reg(ESP);
194 194 // Singleton class for instruction pointer
195 195 // reg_class ip_reg(EIP);
196 196 // Singleton class for condition codes
197 197 reg_class int_flags(EFLAGS);
198 198 // Class of integer register pairs
199 199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
200 200 // Class of integer register pairs that aligns with calling convention
201 201 reg_class eadx_reg( EAX,EDX );
202 202 reg_class ebcx_reg( ECX,EBX );
203 203 // Not AX or DX, used in divides
204 204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
205 205
206 206 // Floating point registers. Notice FPR0 is not a choice.
207 207 // FPR0 is not ever allocated; we use clever encodings to fake
208 208 // a 2-address instructions out of Intels FP stack.
209 209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
210 210
211 211 // make a register class for SSE registers
212 212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
213 213
214 214 // make a double register class for SSE2 registers
215 215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
216 216 XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
217 217
218 218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
219 219 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
220 220 FPR7L,FPR7H );
221 221
222 222 reg_class flt_reg0( FPR1L );
223 223 reg_class dbl_reg0( FPR1L,FPR1H );
224 224 reg_class dbl_reg1( FPR2L,FPR2H );
225 225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
226 226 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
227 227
228 228 // XMM6 and XMM7 could be used as temporary registers for long, float and
229 229 // double values for SSE2.
230 230 reg_class xdb_reg6( XMM6a,XMM6b );
231 231 reg_class xdb_reg7( XMM7a,XMM7b );
232 232 %}
233 233
234 234
235 235 //----------SOURCE BLOCK-------------------------------------------------------
236 236 // This is a block of C++ code which provides values, functions, and
237 237 // definitions necessary in the rest of the architecture description
238 238 source_hpp %{
239 239 // Must be visible to the DFA in dfa_x86_32.cpp
240 240 extern bool is_operand_hi32_zero(Node* n);
241 241 %}
242 242
243 243 source %{
244 244 #define RELOC_IMM32 Assembler::imm_operand
245 245 #define RELOC_DISP32 Assembler::disp32_operand
246 246
247 247 #define __ _masm.
248 248
249 249 // How to find the high register of a Long pair, given the low register
250 250 #define HIGH_FROM_LOW(x) ((x)+2)
251 251
252 252 // These masks are used to provide 128-bit aligned bitmasks to the XMM
253 253 // instructions, to allow sign-masking or sign-bit flipping. They allow
254 254 // fast versions of NegF/NegD and AbsF/AbsD.
255 255
256 256 // Note: 'double' and 'long long' have 32-bits alignment on x86.
257 257 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
258 258 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
259 259 // of 128-bits operands for SSE instructions.
260 260 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
261 261 // Store the value to a 128-bits operand.
262 262 operand[0] = lo;
263 263 operand[1] = hi;
264 264 return operand;
265 265 }
266 266
267 267 // Buffer for 128-bits masks used by SSE instructions.
268 268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
269 269
270 270 // Static initialization during VM startup.
271 271 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
272 272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
273 273 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
274 274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
275 275
276 276 // Offset hacking within calls.
277 277 static int pre_call_FPU_size() {
278 278 if (Compile::current()->in_24_bit_fp_mode())
279 279 return 6; // fldcw
280 280 return 0;
281 281 }
282 282
283 283 static int preserve_SP_size() {
284 284 return 2; // op, rm(reg/reg)
285 285 }
286 286
287 287 // !!!!! Special hack to get all type of calls to specify the byte offset
288 288 // from the start of the call to the point where the return address
289 289 // will point.
290 290 int MachCallStaticJavaNode::ret_addr_offset() {
291 291 int offset = 5 + pre_call_FPU_size(); // 5 bytes from start of call to where return address points
292 292 if (_method_handle_invoke)
293 293 offset += preserve_SP_size();
294 294 return offset;
295 295 }
296 296
297 297 int MachCallDynamicJavaNode::ret_addr_offset() {
298 298 return 10 + pre_call_FPU_size(); // 10 bytes from start of call to where return address points
299 299 }
300 300
301 301 static int sizeof_FFree_Float_Stack_All = -1;
302 302
303 303 int MachCallRuntimeNode::ret_addr_offset() {
304 304 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
305 305 return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
306 306 }
307 307
308 308 // Indicate if the safepoint node needs the polling page as an input.
309 309 // Since x86 does have absolute addressing, it doesn't.
310 310 bool SafePointNode::needs_polling_address_input() {
311 311 return false;
312 312 }
313 313
314 314 //
315 315 // Compute padding required for nodes which need alignment
316 316 //
317 317
318 318 // The address of the call instruction needs to be 4-byte aligned to
319 319 // ensure that it does not span a cache line so that it can be patched.
320 320 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
321 321 current_offset += pre_call_FPU_size(); // skip fldcw, if any
322 322 current_offset += 1; // skip call opcode byte
323 323 return round_to(current_offset, alignment_required()) - current_offset;
324 324 }
325 325
326 326 // The address of the call instruction needs to be 4-byte aligned to
327 327 // ensure that it does not span a cache line so that it can be patched.
328 328 int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
329 329 current_offset += pre_call_FPU_size(); // skip fldcw, if any
330 330 current_offset += preserve_SP_size(); // skip mov rbp, rsp
331 331 current_offset += 1; // skip call opcode byte
332 332 return round_to(current_offset, alignment_required()) - current_offset;
333 333 }
334 334
335 335 // The address of the call instruction needs to be 4-byte aligned to
336 336 // ensure that it does not span a cache line so that it can be patched.
337 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
338 338 current_offset += pre_call_FPU_size(); // skip fldcw, if any
339 339 current_offset += 5; // skip MOV instruction
340 340 current_offset += 1; // skip call opcode byte
341 341 return round_to(current_offset, alignment_required()) - current_offset;
342 342 }
343 343
344 344 // EMIT_RM()
345 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
346 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
347 347 cbuf.insts()->emit_int8(c);
348 348 }
349 349
350 350 // EMIT_CC()
351 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
352 352 unsigned char c = (unsigned char)( f1 | f2 );
353 353 cbuf.insts()->emit_int8(c);
354 354 }
355 355
356 356 // EMIT_OPCODE()
357 357 void emit_opcode(CodeBuffer &cbuf, int code) {
358 358 cbuf.insts()->emit_int8((unsigned char) code);
359 359 }
360 360
361 361 // EMIT_OPCODE() w/ relocation information
362 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
363 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc);
364 364 emit_opcode(cbuf, code);
365 365 }
366 366
367 367 // EMIT_D8()
368 368 void emit_d8(CodeBuffer &cbuf, int d8) {
369 369 cbuf.insts()->emit_int8((unsigned char) d8);
370 370 }
371 371
372 372 // EMIT_D16()
373 373 void emit_d16(CodeBuffer &cbuf, int d16) {
374 374 cbuf.insts()->emit_int16(d16);
375 375 }
376 376
377 377 // EMIT_D32()
378 378 void emit_d32(CodeBuffer &cbuf, int d32) {
379 379 cbuf.insts()->emit_int32(d32);
380 380 }
381 381
382 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
383 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
384 384 int format) {
385 385 cbuf.relocate(cbuf.insts_mark(), reloc, format);
386 386 cbuf.insts()->emit_int32(d32);
387 387 }
388 388
389 389 // emit 32 bit value and construct relocation entry from RelocationHolder
390 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
391 391 int format) {
392 392 #ifdef ASSERT
393 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
394 394 assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
395 395 }
396 396 #endif
397 397 cbuf.relocate(cbuf.insts_mark(), rspec, format);
398 398 cbuf.insts()->emit_int32(d32);
399 399 }
400 400
401 401 // Access stack slot for load or store
402 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
403 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src])
404 404 if( -128 <= disp && disp <= 127 ) {
405 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte
406 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
407 407 emit_d8 (cbuf, disp); // Displacement // R/M byte
408 408 } else {
409 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte
410 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
411 411 emit_d32(cbuf, disp); // Displacement // R/M byte
412 412 }
413 413 }
414 414
415 415 // eRegI ereg, memory mem) %{ // emit_reg_mem
416 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
417 417 // There is no index & no scale, use form without SIB byte
418 418 if ((index == 0x4) &&
419 419 (scale == 0) && (base != ESP_enc)) {
420 420 // If no displacement, mode is 0x0; unless base is [EBP]
421 421 if ( (displace == 0) && (base != EBP_enc) ) {
422 422 emit_rm(cbuf, 0x0, reg_encoding, base);
423 423 }
424 424 else { // If 8-bit displacement, mode 0x1
425 425 if ((displace >= -128) && (displace <= 127)
426 426 && !(displace_is_oop) ) {
427 427 emit_rm(cbuf, 0x1, reg_encoding, base);
428 428 emit_d8(cbuf, displace);
429 429 }
430 430 else { // If 32-bit displacement
431 431 if (base == -1) { // Special flag for absolute address
432 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5);
433 433 // (manual lies; no SIB needed here)
434 434 if ( displace_is_oop ) {
435 435 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
436 436 } else {
437 437 emit_d32 (cbuf, displace);
438 438 }
439 439 }
440 440 else { // Normal base + offset
441 441 emit_rm(cbuf, 0x2, reg_encoding, base);
442 442 if ( displace_is_oop ) {
443 443 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
444 444 } else {
445 445 emit_d32 (cbuf, displace);
446 446 }
447 447 }
448 448 }
449 449 }
450 450 }
451 451 else { // Else, encode with the SIB byte
452 452 // If no displacement, mode is 0x0; unless base is [EBP]
453 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement
454 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4);
455 455 emit_rm(cbuf, scale, index, base);
456 456 }
457 457 else { // If 8-bit displacement, mode 0x1
458 458 if ((displace >= -128) && (displace <= 127)
459 459 && !(displace_is_oop) ) {
460 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4);
461 461 emit_rm(cbuf, scale, index, base);
462 462 emit_d8(cbuf, displace);
463 463 }
464 464 else { // If 32-bit displacement
465 465 if (base == 0x04 ) {
466 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
467 467 emit_rm(cbuf, scale, index, 0x04);
468 468 } else {
469 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
470 470 emit_rm(cbuf, scale, index, base);
471 471 }
472 472 if ( displace_is_oop ) {
473 473 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
474 474 } else {
475 475 emit_d32 (cbuf, displace);
476 476 }
477 477 }
478 478 }
479 479 }
480 480 }
481 481
482 482
483 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
484 484 if( dst_encoding == src_encoding ) {
485 485 // reg-reg copy, use an empty encoding
486 486 } else {
487 487 emit_opcode( cbuf, 0x8B );
488 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
489 489 }
490 490 }
491 491
492 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
493 493 Label exit;
494 494 __ jccb(Assembler::noParity, exit);
495 495 __ pushf();
496 496 //
497 497 // comiss/ucomiss instructions set ZF,PF,CF flags and
498 498 // zero OF,AF,SF for NaN values.
499 499 // Fixup flags by zeroing ZF,PF so that compare of NaN
500 500 // values returns 'less than' result (CF is set).
501 501 // Leave the rest of flags unchanged.
502 502 //
503 503 // 7 6 5 4 3 2 1 0
504 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
505 505 // 0 0 1 0 1 0 1 1 (0x2B)
506 506 //
507 507 __ andl(Address(rsp, 0), 0xffffff2b);
508 508 __ popf();
509 509 __ bind(exit);
510 510 }
511 511
512 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
513 513 Label done;
514 514 __ movl(dst, -1);
515 515 __ jcc(Assembler::parity, done);
516 516 __ jcc(Assembler::below, done);
517 517 __ setb(Assembler::notEqual, dst);
518 518 __ movzbl(dst, dst);
519 519 __ bind(done);
520 520 }
521 521
522 522
523 523 //=============================================================================
524 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
525 525
526 526 int Compile::ConstantTable::calculate_table_base_offset() const {
527 527 return 0; // absolute addressing, no offset
528 528 }
529 529
530 530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
531 531 // Empty encoding
532 532 }
533 533
534 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
535 535 return 0;
536 536 }
537 537
538 538 #ifndef PRODUCT
539 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
540 540 st->print("# MachConstantBaseNode (empty encoding)");
541 541 }
542 542 #endif
543 543
544 544
545 545 //=============================================================================
546 546 #ifndef PRODUCT
547 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
548 548 Compile* C = ra_->C;
549 549
550 550 int framesize = C->frame_slots() << LogBytesPerInt;
551 551 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
552 552 // Remove wordSize for return addr which is already pushed.
553 553 framesize -= wordSize;
554 554
555 555 if (C->need_stack_bang(framesize)) {
556 556 framesize -= wordSize;
557 557 st->print("# stack bang");
558 558 st->print("\n\t");
559 559 st->print("PUSH EBP\t# Save EBP");
560 560 if (framesize) {
561 561 st->print("\n\t");
562 562 st->print("SUB ESP, #%d\t# Create frame",framesize);
563 563 }
564 564 } else {
565 565 st->print("SUB ESP, #%d\t# Create frame",framesize);
566 566 st->print("\n\t");
567 567 framesize -= wordSize;
568 568 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize);
569 569 }
570 570
571 571 if (VerifyStackAtCalls) {
572 572 st->print("\n\t");
573 573 framesize -= wordSize;
574 574 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
575 575 }
576 576
577 577 if( C->in_24_bit_fp_mode() ) {
578 578 st->print("\n\t");
579 579 st->print("FLDCW \t# load 24 bit fpu control word");
580 580 }
581 581 if (UseSSE >= 2 && VerifyFPU) {
582 582 st->print("\n\t");
583 583 st->print("# verify FPU stack (must be clean on entry)");
584 584 }
585 585
586 586 #ifdef ASSERT
587 587 if (VerifyStackAtCalls) {
588 588 st->print("\n\t");
589 589 st->print("# stack alignment check");
590 590 }
591 591 #endif
592 592 st->cr();
593 593 }
594 594 #endif
595 595
596 596
597 597 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
598 598 Compile* C = ra_->C;
599 599 MacroAssembler _masm(&cbuf);
600 600
601 601 int framesize = C->frame_slots() << LogBytesPerInt;
602 602
603 603 __ verified_entry(framesize, C->need_stack_bang(framesize), C->in_24_bit_fp_mode());
604 604
605 605 C->set_frame_complete(cbuf.insts_size());
606 606
607 607 if (C->has_mach_constant_base_node()) {
608 608 // NOTE: We set the table base offset here because users might be
609 609 // emitted before MachConstantBaseNode.
610 610 Compile::ConstantTable& constant_table = C->constant_table();
611 611 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
612 612 }
613 613 }
614 614
615 615 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
616 616 return MachNode::size(ra_); // too many variables; just compute it the hard way
617 617 }
618 618
619 619 int MachPrologNode::reloc() const {
620 620 return 0; // a large enough number
621 621 }
622 622
623 623 //=============================================================================
624 624 #ifndef PRODUCT
625 625 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
626 626 Compile *C = ra_->C;
627 627 int framesize = C->frame_slots() << LogBytesPerInt;
628 628 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
629 629 // Remove two words for return addr and rbp,
630 630 framesize -= 2*wordSize;
631 631
632 632 if( C->in_24_bit_fp_mode() ) {
633 633 st->print("FLDCW standard control word");
634 634 st->cr(); st->print("\t");
635 635 }
636 636 if( framesize ) {
637 637 st->print("ADD ESP,%d\t# Destroy frame",framesize);
638 638 st->cr(); st->print("\t");
639 639 }
640 640 st->print_cr("POPL EBP"); st->print("\t");
641 641 if( do_polling() && C->is_method_compilation() ) {
642 642 st->print("TEST PollPage,EAX\t! Poll Safepoint");
643 643 st->cr(); st->print("\t");
644 644 }
645 645 }
646 646 #endif
647 647
648 648 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
649 649 Compile *C = ra_->C;
650 650
651 651 // If method set FPU control word, restore to standard control word
652 652 if( C->in_24_bit_fp_mode() ) {
653 653 MacroAssembler masm(&cbuf);
654 654 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
655 655 }
656 656
657 657 int framesize = C->frame_slots() << LogBytesPerInt;
658 658 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
659 659 // Remove two words for return addr and rbp,
660 660 framesize -= 2*wordSize;
661 661
662 662 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
663 663
664 664 if( framesize >= 128 ) {
665 665 emit_opcode(cbuf, 0x81); // add SP, #framesize
666 666 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
667 667 emit_d32(cbuf, framesize);
668 668 }
669 669 else if( framesize ) {
670 670 emit_opcode(cbuf, 0x83); // add SP, #framesize
671 671 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
672 672 emit_d8(cbuf, framesize);
673 673 }
674 674
675 675 emit_opcode(cbuf, 0x58 | EBP_enc);
676 676
677 677 if( do_polling() && C->is_method_compilation() ) {
678 678 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
679 679 emit_opcode(cbuf,0x85);
680 680 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
681 681 emit_d32(cbuf, (intptr_t)os::get_polling_page());
682 682 }
683 683 }
684 684
685 685 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
686 686 Compile *C = ra_->C;
687 687 // If method set FPU control word, restore to standard control word
688 688 int size = C->in_24_bit_fp_mode() ? 6 : 0;
689 689 if( do_polling() && C->is_method_compilation() ) size += 6;
690 690
691 691 int framesize = C->frame_slots() << LogBytesPerInt;
692 692 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
693 693 // Remove two words for return addr and rbp,
694 694 framesize -= 2*wordSize;
695 695
696 696 size++; // popl rbp,
697 697
698 698 if( framesize >= 128 ) {
699 699 size += 6;
700 700 } else {
701 701 size += framesize ? 3 : 0;
702 702 }
703 703 return size;
704 704 }
705 705
706 706 int MachEpilogNode::reloc() const {
707 707 return 0; // a large enough number
708 708 }
709 709
710 710 const Pipeline * MachEpilogNode::pipeline() const {
711 711 return MachNode::pipeline_class();
712 712 }
713 713
714 714 int MachEpilogNode::safepoint_offset() const { return 0; }
715 715
716 716 //=============================================================================
717 717
718 718 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
719 719 static enum RC rc_class( OptoReg::Name reg ) {
720 720
721 721 if( !OptoReg::is_valid(reg) ) return rc_bad;
722 722 if (OptoReg::is_stack(reg)) return rc_stack;
723 723
724 724 VMReg r = OptoReg::as_VMReg(reg);
725 725 if (r->is_Register()) return rc_int;
726 726 if (r->is_FloatRegister()) {
727 727 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
728 728 return rc_float;
729 729 }
730 730 assert(r->is_XMMRegister(), "must be");
731 731 return rc_xmm;
732 732 }
733 733
734 734 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
735 735 int opcode, const char *op_str, int size, outputStream* st ) {
736 736 if( cbuf ) {
737 737 emit_opcode (*cbuf, opcode );
738 738 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
739 739 #ifndef PRODUCT
740 740 } else if( !do_size ) {
741 741 if( size != 0 ) st->print("\n\t");
742 742 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
743 743 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
744 744 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
745 745 } else { // FLD, FST, PUSH, POP
746 746 st->print("%s [ESP + #%d]",op_str,offset);
747 747 }
748 748 #endif
749 749 }
750 750 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
751 751 return size+3+offset_size;
752 752 }
753 753
754 754 // Helper for XMM registers. Extra opcode bits, limited syntax.
755 755 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
756 756 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
757 757 if (cbuf) {
758 758 MacroAssembler _masm(cbuf);
759 759 if (reg_lo+1 == reg_hi) { // double move?
760 760 if (is_load) {
761 761 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
762 762 } else {
763 763 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
764 764 }
765 765 } else {
766 766 if (is_load) {
767 767 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
768 768 } else {
769 769 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
770 770 }
771 771 }
772 772 #ifndef PRODUCT
773 773 } else if (!do_size) {
774 774 if (size != 0) st->print("\n\t");
775 775 if (reg_lo+1 == reg_hi) { // double move?
776 776 if (is_load) st->print("%s %s,[ESP + #%d]",
777 777 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
778 778 Matcher::regName[reg_lo], offset);
779 779 else st->print("MOVSD [ESP + #%d],%s",
780 780 offset, Matcher::regName[reg_lo]);
781 781 } else {
782 782 if (is_load) st->print("MOVSS %s,[ESP + #%d]",
783 783 Matcher::regName[reg_lo], offset);
784 784 else st->print("MOVSS [ESP + #%d],%s",
785 785 offset, Matcher::regName[reg_lo]);
786 786 }
787 787 #endif
788 788 }
789 789 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
790 790 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
791 791 return size+5+offset_size;
792 792 }
793 793
794 794
795 795 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
796 796 int src_hi, int dst_hi, int size, outputStream* st ) {
797 797 if (cbuf) {
798 798 MacroAssembler _masm(cbuf);
799 799 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
800 800 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
801 801 as_XMMRegister(Matcher::_regEncode[src_lo]));
802 802 } else {
803 803 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
804 804 as_XMMRegister(Matcher::_regEncode[src_lo]));
805 805 }
806 806 #ifndef PRODUCT
807 807 } else if (!do_size) {
808 808 if (size != 0) st->print("\n\t");
809 809 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
810 810 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
811 811 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
812 812 } else {
813 813 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
814 814 }
815 815 } else {
816 816 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
817 817 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
818 818 } else {
819 819 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
820 820 }
821 821 }
822 822 #endif
823 823 }
824 824 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
825 825 // Only MOVAPS SSE prefix uses 1 byte.
826 826 int sz = 4;
827 827 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
828 828 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
829 829 return size + sz;
830 830 }
831 831
832 832 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
833 833 int src_hi, int dst_hi, int size, outputStream* st ) {
834 834 // 32-bit
835 835 if (cbuf) {
836 836 MacroAssembler _masm(cbuf);
837 837 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
838 838 as_Register(Matcher::_regEncode[src_lo]));
839 839 #ifndef PRODUCT
840 840 } else if (!do_size) {
841 841 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
842 842 #endif
843 843 }
844 844 return 4;
845 845 }
846 846
847 847
848 848 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
849 849 int src_hi, int dst_hi, int size, outputStream* st ) {
850 850 // 32-bit
851 851 if (cbuf) {
852 852 MacroAssembler _masm(cbuf);
853 853 __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
854 854 as_XMMRegister(Matcher::_regEncode[src_lo]));
855 855 #ifndef PRODUCT
856 856 } else if (!do_size) {
857 857 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
858 858 #endif
859 859 }
860 860 return 4;
861 861 }
862 862
863 863 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
864 864 if( cbuf ) {
865 865 emit_opcode(*cbuf, 0x8B );
866 866 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
867 867 #ifndef PRODUCT
868 868 } else if( !do_size ) {
869 869 if( size != 0 ) st->print("\n\t");
870 870 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
871 871 #endif
872 872 }
873 873 return size+2;
874 874 }
875 875
876 876 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
877 877 int offset, int size, outputStream* st ) {
878 878 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
879 879 if( cbuf ) {
880 880 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
881 881 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
882 882 #ifndef PRODUCT
883 883 } else if( !do_size ) {
884 884 if( size != 0 ) st->print("\n\t");
885 885 st->print("FLD %s",Matcher::regName[src_lo]);
886 886 #endif
887 887 }
888 888 size += 2;
889 889 }
890 890
891 891 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
892 892 const char *op_str;
893 893 int op;
894 894 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
895 895 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
896 896 op = 0xDD;
897 897 } else { // 32-bit store
898 898 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
899 899 op = 0xD9;
900 900 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
901 901 }
902 902
903 903 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
904 904 }
905 905
906 906 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
907 907 // Get registers to move
908 908 OptoReg::Name src_second = ra_->get_reg_second(in(1));
909 909 OptoReg::Name src_first = ra_->get_reg_first(in(1));
910 910 OptoReg::Name dst_second = ra_->get_reg_second(this );
911 911 OptoReg::Name dst_first = ra_->get_reg_first(this );
912 912
913 913 enum RC src_second_rc = rc_class(src_second);
914 914 enum RC src_first_rc = rc_class(src_first);
915 915 enum RC dst_second_rc = rc_class(dst_second);
916 916 enum RC dst_first_rc = rc_class(dst_first);
917 917
918 918 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
919 919
920 920 // Generate spill code!
921 921 int size = 0;
922 922
923 923 if( src_first == dst_first && src_second == dst_second )
924 924 return size; // Self copy, no move
925 925
926 926 // --------------------------------------
927 927 // Check for mem-mem move. push/pop to move.
928 928 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
929 929 if( src_second == dst_first ) { // overlapping stack copy ranges
930 930 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
931 931 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
932 932 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
933 933 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
934 934 }
935 935 // move low bits
936 936 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
937 937 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
938 938 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
939 939 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
940 940 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
941 941 }
942 942 return size;
943 943 }
944 944
945 945 // --------------------------------------
946 946 // Check for integer reg-reg copy
947 947 if( src_first_rc == rc_int && dst_first_rc == rc_int )
948 948 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
949 949
950 950 // Check for integer store
951 951 if( src_first_rc == rc_int && dst_first_rc == rc_stack )
952 952 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
953 953
954 954 // Check for integer load
955 955 if( dst_first_rc == rc_int && src_first_rc == rc_stack )
956 956 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
957 957
958 958 // Check for integer reg-xmm reg copy
959 959 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
960 960 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
961 961 "no 64 bit integer-float reg moves" );
962 962 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
963 963 }
964 964 // --------------------------------------
965 965 // Check for float reg-reg copy
966 966 if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
967 967 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
968 968 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
969 969 if( cbuf ) {
970 970
971 971 // Note the mucking with the register encode to compensate for the 0/1
972 972 // indexing issue mentioned in a comment in the reg_def sections
973 973 // for FPR registers many lines above here.
974 974
975 975 if( src_first != FPR1L_num ) {
976 976 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i)
977 977 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
978 978 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
979 979 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
980 980 } else {
981 981 emit_opcode (*cbuf, 0xDD ); // FST ST(i)
982 982 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
983 983 }
984 984 #ifndef PRODUCT
985 985 } else if( !do_size ) {
986 986 if( size != 0 ) st->print("\n\t");
987 987 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
988 988 else st->print( "FST %s", Matcher::regName[dst_first]);
989 989 #endif
990 990 }
991 991 return size + ((src_first != FPR1L_num) ? 2+2 : 2);
992 992 }
993 993
994 994 // Check for float store
995 995 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
996 996 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
997 997 }
998 998
999 999 // Check for float load
1000 1000 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1001 1001 int offset = ra_->reg2offset(src_first);
1002 1002 const char *op_str;
1003 1003 int op;
1004 1004 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1005 1005 op_str = "FLD_D";
1006 1006 op = 0xDD;
1007 1007 } else { // 32-bit load
1008 1008 op_str = "FLD_S";
1009 1009 op = 0xD9;
1010 1010 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1011 1011 }
1012 1012 if( cbuf ) {
1013 1013 emit_opcode (*cbuf, op );
1014 1014 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
1015 1015 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
1016 1016 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1017 1017 #ifndef PRODUCT
1018 1018 } else if( !do_size ) {
1019 1019 if( size != 0 ) st->print("\n\t");
1020 1020 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
1021 1021 #endif
1022 1022 }
1023 1023 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1024 1024 return size + 3+offset_size+2;
1025 1025 }
1026 1026
1027 1027 // Check for xmm reg-reg copy
1028 1028 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1029 1029 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1030 1030 (src_first+1 == src_second && dst_first+1 == dst_second),
1031 1031 "no non-adjacent float-moves" );
1032 1032 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1033 1033 }
1034 1034
1035 1035 // Check for xmm reg-integer reg copy
1036 1036 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1037 1037 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1038 1038 "no 64 bit float-integer reg moves" );
1039 1039 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1040 1040 }
1041 1041
1042 1042 // Check for xmm store
1043 1043 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1044 1044 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1045 1045 }
1046 1046
1047 1047 // Check for float xmm load
1048 1048 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1049 1049 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1050 1050 }
1051 1051
1052 1052 // Copy from float reg to xmm reg
1053 1053 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1054 1054 // copy to the top of stack from floating point reg
1055 1055 // and use LEA to preserve flags
1056 1056 if( cbuf ) {
1057 1057 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8]
1058 1058 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1059 1059 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1060 1060 emit_d8(*cbuf,0xF8);
1061 1061 #ifndef PRODUCT
1062 1062 } else if( !do_size ) {
1063 1063 if( size != 0 ) st->print("\n\t");
1064 1064 st->print("LEA ESP,[ESP-8]");
1065 1065 #endif
1066 1066 }
1067 1067 size += 4;
1068 1068
1069 1069 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1070 1070
1071 1071 // Copy from the temp memory to the xmm reg.
1072 1072 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1073 1073
1074 1074 if( cbuf ) {
1075 1075 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
1076 1076 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1077 1077 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1078 1078 emit_d8(*cbuf,0x08);
1079 1079 #ifndef PRODUCT
1080 1080 } else if( !do_size ) {
1081 1081 if( size != 0 ) st->print("\n\t");
1082 1082 st->print("LEA ESP,[ESP+8]");
1083 1083 #endif
1084 1084 }
1085 1085 size += 4;
1086 1086 return size;
1087 1087 }
1088 1088
1089 1089 assert( size > 0, "missed a case" );
1090 1090
1091 1091 // --------------------------------------------------------------------
1092 1092 // Check for second bits still needing moving.
1093 1093 if( src_second == dst_second )
1094 1094 return size; // Self copy; no move
1095 1095 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1096 1096
1097 1097 // Check for second word int-int move
1098 1098 if( src_second_rc == rc_int && dst_second_rc == rc_int )
1099 1099 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1100 1100
1101 1101 // Check for second word integer store
1102 1102 if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1103 1103 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1104 1104
1105 1105 // Check for second word integer load
1106 1106 if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1107 1107 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1108 1108
1109 1109
1110 1110 Unimplemented();
1111 1111 }
1112 1112
1113 1113 #ifndef PRODUCT
1114 1114 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1115 1115 implementation( NULL, ra_, false, st );
1116 1116 }
1117 1117 #endif
1118 1118
1119 1119 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1120 1120 implementation( &cbuf, ra_, false, NULL );
1121 1121 }
1122 1122
1123 1123 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1124 1124 return implementation( NULL, ra_, true, NULL );
1125 1125 }
1126 1126
1127 1127
1128 1128 //=============================================================================
1129 1129 #ifndef PRODUCT
1130 1130 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1131 1131 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1132 1132 int reg = ra_->get_reg_first(this);
1133 1133 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
1134 1134 }
1135 1135 #endif
1136 1136
1137 1137 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1138 1138 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1139 1139 int reg = ra_->get_encode(this);
1140 1140 if( offset >= 128 ) {
1141 1141 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1142 1142 emit_rm(cbuf, 0x2, reg, 0x04);
1143 1143 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1144 1144 emit_d32(cbuf, offset);
1145 1145 }
1146 1146 else {
1147 1147 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1148 1148 emit_rm(cbuf, 0x1, reg, 0x04);
1149 1149 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1150 1150 emit_d8(cbuf, offset);
1151 1151 }
1152 1152 }
1153 1153
1154 1154 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1155 1155 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1156 1156 if( offset >= 128 ) {
1157 1157 return 7;
1158 1158 }
1159 1159 else {
1160 1160 return 4;
1161 1161 }
1162 1162 }
1163 1163
1164 1164 //=============================================================================
1165 1165
1166 1166 // emit call stub, compiled java to interpreter
1167 1167 void emit_java_to_interp(CodeBuffer &cbuf ) {
1168 1168 // Stub is fixed up when the corresponding call is converted from calling
1169 1169 // compiled code to calling interpreted code.
1170 1170 // mov rbx,0
1171 1171 // jmp -1
1172 1172
1173 1173 address mark = cbuf.insts_mark(); // get mark within main instrs section
1174 1174
1175 1175 // Note that the code buffer's insts_mark is always relative to insts.
1176 1176 // That's why we must use the macroassembler to generate a stub.
1177 1177 MacroAssembler _masm(&cbuf);
1178 1178
1179 1179 address base =
1180 1180 __ start_a_stub(Compile::MAX_stubs_size);
1181 1181 if (base == NULL) return; // CodeBuffer::expand failed
1182 1182 // static stub relocation stores the instruction address of the call
1183 1183 __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1184 1184 // static stub relocation also tags the methodOop in the code-stream.
1185 1185 __ movoop(rbx, (jobject)NULL); // method is zapped till fixup time
1186 1186 // This is recognized as unresolved by relocs/nativeInst/ic code
1187 1187 __ jump(RuntimeAddress(__ pc()));
1188 1188
1189 1189 __ end_a_stub();
1190 1190 // Update current stubs pointer and restore insts_end.
1191 1191 }
1192 1192 // size of call stub, compiled java to interpretor
1193 1193 uint size_java_to_interp() {
1194 1194 return 10; // movl; jmp
1195 1195 }
1196 1196 // relocation entries for call stub, compiled java to interpretor
1197 1197 uint reloc_java_to_interp() {
1198 1198 return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1199 1199 }
1200 1200
1201 1201 //=============================================================================
1202 1202 #ifndef PRODUCT
1203 1203 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1204 1204 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
1205 1205 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
1206 1206 st->print_cr("\tNOP");
1207 1207 st->print_cr("\tNOP");
1208 1208 if( !OptoBreakpoint )
1209 1209 st->print_cr("\tNOP");
1210 1210 }
1211 1211 #endif
1212 1212
1213 1213 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1214 1214 MacroAssembler masm(&cbuf);
1215 1215 #ifdef ASSERT
1216 1216 uint insts_size = cbuf.insts_size();
1217 1217 #endif
1218 1218 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1219 1219 masm.jump_cc(Assembler::notEqual,
1220 1220 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1221 1221 /* WARNING these NOPs are critical so that verified entry point is properly
1222 1222 aligned for patching by NativeJump::patch_verified_entry() */
1223 1223 int nops_cnt = 2;
1224 1224 if( !OptoBreakpoint ) // Leave space for int3
1225 1225 nops_cnt += 1;
1226 1226 masm.nop(nops_cnt);
1227 1227
1228 1228 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1229 1229 }
1230 1230
1231 1231 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1232 1232 return OptoBreakpoint ? 11 : 12;
1233 1233 }
1234 1234
1235 1235
1236 1236 //=============================================================================
1237 1237 uint size_exception_handler() {
1238 1238 // NativeCall instruction size is the same as NativeJump.
1239 1239 // exception handler starts out as jump and can be patched to
1240 1240 // a call be deoptimization. (4932387)
1241 1241 // Note that this value is also credited (in output.cpp) to
1242 1242 // the size of the code section.
1243 1243 return NativeJump::instruction_size;
1244 1244 }
1245 1245
1246 1246 // Emit exception handler code. Stuff framesize into a register
1247 1247 // and call a VM stub routine.
1248 1248 int emit_exception_handler(CodeBuffer& cbuf) {
1249 1249
1250 1250 // Note that the code buffer's insts_mark is always relative to insts.
1251 1251 // That's why we must use the macroassembler to generate a handler.
1252 1252 MacroAssembler _masm(&cbuf);
1253 1253 address base =
1254 1254 __ start_a_stub(size_exception_handler());
1255 1255 if (base == NULL) return 0; // CodeBuffer::expand failed
1256 1256 int offset = __ offset();
1257 1257 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1258 1258 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1259 1259 __ end_a_stub();
1260 1260 return offset;
1261 1261 }
1262 1262
1263 1263 uint size_deopt_handler() {
1264 1264 // NativeCall instruction size is the same as NativeJump.
1265 1265 // exception handler starts out as jump and can be patched to
1266 1266 // a call be deoptimization. (4932387)
1267 1267 // Note that this value is also credited (in output.cpp) to
1268 1268 // the size of the code section.
1269 1269 return 5 + NativeJump::instruction_size; // pushl(); jmp;
1270 1270 }
1271 1271
1272 1272 // Emit deopt handler code.
1273 1273 int emit_deopt_handler(CodeBuffer& cbuf) {
1274 1274
1275 1275 // Note that the code buffer's insts_mark is always relative to insts.
1276 1276 // That's why we must use the macroassembler to generate a handler.
1277 1277 MacroAssembler _masm(&cbuf);
1278 1278 address base =
1279 1279 __ start_a_stub(size_exception_handler());
1280 1280 if (base == NULL) return 0; // CodeBuffer::expand failed
1281 1281 int offset = __ offset();
1282 1282 InternalAddress here(__ pc());
1283 1283 __ pushptr(here.addr());
1284 1284
1285 1285 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1286 1286 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1287 1287 __ end_a_stub();
1288 1288 return offset;
1289 1289 }
1290 1290
1291 1291
1292 1292 const bool Matcher::match_rule_supported(int opcode) {
1293 1293 if (!has_match_rule(opcode))
1294 1294 return false;
1295 1295
1296 1296 switch (opcode) {
1297 1297 case Op_PopCountI:
1298 1298 case Op_PopCountL:
1299 1299 if (!UsePopCountInstruction)
1300 1300 return false;
1301 1301 break;
1302 1302 }
1303 1303
1304 1304 return true; // Per default match rules are supported.
1305 1305 }
1306 1306
1307 1307 int Matcher::regnum_to_fpu_offset(int regnum) {
1308 1308 return regnum - 32; // The FP registers are in the second chunk
1309 1309 }
1310 1310
1311 1311 // This is UltraSparc specific, true just means we have fast l2f conversion
1312 1312 const bool Matcher::convL2FSupported(void) {
1313 1313 return true;
1314 1314 }
1315 1315
1316 1316 // Vector width in bytes
1317 1317 const uint Matcher::vector_width_in_bytes(void) {
1318 1318 return UseSSE >= 2 ? 8 : 0;
1319 1319 }
1320 1320
1321 1321 // Vector ideal reg
1322 1322 const uint Matcher::vector_ideal_reg(void) {
1323 1323 return Op_RegD;
1324 1324 }
1325 1325
1326 1326 // Is this branch offset short enough that a short branch can be used?
1327 1327 //
1328 1328 // NOTE: If the platform does not provide any short branch variants, then
1329 1329 // this method should return false for offset 0.
1330 1330 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1331 1331 // The passed offset is relative to address of the branch.
1332 1332 // On 86 a branch displacement is calculated relative to address
1333 1333 // of a next instruction.
1334 1334 offset -= br_size;
1335 1335
1336 1336 // the short version of jmpConUCF2 contains multiple branches,
1337 1337 // making the reach slightly less
1338 1338 if (rule == jmpConUCF2_rule)
1339 1339 return (-126 <= offset && offset <= 125);
1340 1340 return (-128 <= offset && offset <= 127);
1341 1341 }
1342 1342
1343 1343 const bool Matcher::isSimpleConstant64(jlong value) {
1344 1344 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1345 1345 return false;
1346 1346 }
1347 1347
1348 1348 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1349 1349 const bool Matcher::init_array_count_is_in_bytes = false;
1350 1350
1351 1351 // Threshold size for cleararray.
1352 1352 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1353 1353
1354 1354 // Needs 2 CMOV's for longs.
1355 1355 const int Matcher::long_cmove_cost() { return 1; }
1356 1356
1357 1357 // No CMOVF/CMOVD with SSE/SSE2
1358 1358 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1359 1359
1360 1360 // Should the Matcher clone shifts on addressing modes, expecting them to
1361 1361 // be subsumed into complex addressing expressions or compute them into
1362 1362 // registers? True for Intel but false for most RISCs
1363 1363 const bool Matcher::clone_shift_expressions = true;
1364 1364
1365 1365 // Do we need to mask the count passed to shift instructions or does
1366 1366 // the cpu only look at the lower 5/6 bits anyway?
1367 1367 const bool Matcher::need_masked_shift_count = false;
1368 1368
1369 1369 bool Matcher::narrow_oop_use_complex_address() {
1370 1370 ShouldNotCallThis();
1371 1371 return true;
1372 1372 }
1373 1373
1374 1374
1375 1375 // Is it better to copy float constants, or load them directly from memory?
1376 1376 // Intel can load a float constant from a direct address, requiring no
1377 1377 // extra registers. Most RISCs will have to materialize an address into a
1378 1378 // register first, so they would do better to copy the constant from stack.
1379 1379 const bool Matcher::rematerialize_float_constants = true;
1380 1380
1381 1381 // If CPU can load and store mis-aligned doubles directly then no fixup is
1382 1382 // needed. Else we split the double into 2 integer pieces and move it
1383 1383 // piece-by-piece. Only happens when passing doubles into C code as the
1384 1384 // Java calling convention forces doubles to be aligned.
1385 1385 const bool Matcher::misaligned_doubles_ok = true;
1386 1386
1387 1387
1388 1388 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1389 1389 // Get the memory operand from the node
1390 1390 uint numopnds = node->num_opnds(); // Virtual call for number of operands
1391 1391 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far
1392 1392 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1393 1393 uint opcnt = 1; // First operand
1394 1394 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1395 1395 while( idx >= skipped+num_edges ) {
1396 1396 skipped += num_edges;
1397 1397 opcnt++; // Bump operand count
1398 1398 assert( opcnt < numopnds, "Accessing non-existent operand" );
1399 1399 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1400 1400 }
1401 1401
1402 1402 MachOper *memory = node->_opnds[opcnt];
1403 1403 MachOper *new_memory = NULL;
1404 1404 switch (memory->opcode()) {
1405 1405 case DIRECT:
1406 1406 case INDOFFSET32X:
1407 1407 // No transformation necessary.
1408 1408 return;
1409 1409 case INDIRECT:
1410 1410 new_memory = new (C) indirect_win95_safeOper( );
1411 1411 break;
1412 1412 case INDOFFSET8:
1413 1413 new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1414 1414 break;
1415 1415 case INDOFFSET32:
1416 1416 new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1417 1417 break;
1418 1418 case INDINDEXOFFSET:
1419 1419 new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1420 1420 break;
1421 1421 case INDINDEXSCALE:
1422 1422 new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1423 1423 break;
1424 1424 case INDINDEXSCALEOFFSET:
1425 1425 new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1426 1426 break;
1427 1427 case LOAD_LONG_INDIRECT:
1428 1428 case LOAD_LONG_INDOFFSET32:
1429 1429 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1430 1430 return;
1431 1431 default:
1432 1432 assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1433 1433 return;
1434 1434 }
1435 1435 node->_opnds[opcnt] = new_memory;
1436 1436 }
1437 1437
1438 1438 // Advertise here if the CPU requires explicit rounding operations
1439 1439 // to implement the UseStrictFP mode.
1440 1440 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1441 1441
1442 1442 // Are floats conerted to double when stored to stack during deoptimization?
1443 1443 // On x32 it is stored with convertion only when FPU is used for floats.
1444 1444 bool Matcher::float_in_double() { return (UseSSE == 0); }
1445 1445
1446 1446 // Do ints take an entire long register or just half?
1447 1447 const bool Matcher::int_in_long = false;
1448 1448
1449 1449 // Return whether or not this register is ever used as an argument. This
1450 1450 // function is used on startup to build the trampoline stubs in generateOptoStub.
1451 1451 // Registers not mentioned will be killed by the VM call in the trampoline, and
1452 1452 // arguments in those registers not be available to the callee.
1453 1453 bool Matcher::can_be_java_arg( int reg ) {
1454 1454 if( reg == ECX_num || reg == EDX_num ) return true;
1455 1455 if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1456 1456 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1457 1457 return false;
1458 1458 }
1459 1459
1460 1460 bool Matcher::is_spillable_arg( int reg ) {
1461 1461 return can_be_java_arg(reg);
1462 1462 }
1463 1463
1464 1464 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1465 1465 // Use hardware integer DIV instruction when
1466 1466 // it is faster than a code which use multiply.
1467 1467 // Only when constant divisor fits into 32 bit
1468 1468 // (min_jint is excluded to get only correct
1469 1469 // positive 32 bit values from negative).
1470 1470 return VM_Version::has_fast_idiv() &&
1471 1471 (divisor == (int)divisor && divisor != min_jint);
1472 1472 }
1473 1473
1474 1474 // Register for DIVI projection of divmodI
1475 1475 RegMask Matcher::divI_proj_mask() {
1476 1476 return EAX_REG_mask();
1477 1477 }
1478 1478
1479 1479 // Register for MODI projection of divmodI
1480 1480 RegMask Matcher::modI_proj_mask() {
1481 1481 return EDX_REG_mask();
1482 1482 }
1483 1483
1484 1484 // Register for DIVL projection of divmodL
1485 1485 RegMask Matcher::divL_proj_mask() {
1486 1486 ShouldNotReachHere();
1487 1487 return RegMask();
1488 1488 }
1489 1489
1490 1490 // Register for MODL projection of divmodL
1491 1491 RegMask Matcher::modL_proj_mask() {
1492 1492 ShouldNotReachHere();
1493 1493 return RegMask();
1494 1494 }
1495 1495
1496 1496 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1497 1497 return EBP_REG_mask();
1498 1498 }
1499 1499
1500 1500 // Returns true if the high 32 bits of the value is known to be zero.
1501 1501 bool is_operand_hi32_zero(Node* n) {
1502 1502 int opc = n->Opcode();
1503 1503 if (opc == Op_LoadUI2L) {
1504 1504 return true;
1505 1505 }
1506 1506 if (opc == Op_AndL) {
1507 1507 Node* o2 = n->in(2);
1508 1508 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1509 1509 return true;
1510 1510 }
1511 1511 }
1512 1512 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1513 1513 return true;
1514 1514 }
1515 1515 return false;
1516 1516 }
1517 1517
1518 1518 %}
1519 1519
1520 1520 //----------ENCODING BLOCK-----------------------------------------------------
1521 1521 // This block specifies the encoding classes used by the compiler to output
1522 1522 // byte streams. Encoding classes generate functions which are called by
1523 1523 // Machine Instruction Nodes in order to generate the bit encoding of the
1524 1524 // instruction. Operands specify their base encoding interface with the
1525 1525 // interface keyword. There are currently supported four interfaces,
1526 1526 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
1527 1527 // operand to generate a function which returns its register number when
1528 1528 // queried. CONST_INTER causes an operand to generate a function which
1529 1529 // returns the value of the constant when queried. MEMORY_INTER causes an
1530 1530 // operand to generate four functions which return the Base Register, the
1531 1531 // Index Register, the Scale Value, and the Offset Value of the operand when
1532 1532 // queried. COND_INTER causes an operand to generate six functions which
1533 1533 // return the encoding code (ie - encoding bits for the instruction)
1534 1534 // associated with each basic boolean condition for a conditional instruction.
1535 1535 // Instructions specify two basic values for encoding. They use the
1536 1536 // ins_encode keyword to specify their encoding class (which must be one of
1537 1537 // the class names specified in the encoding block), and they use the
1538 1538 // opcode keyword to specify, in order, their primary, secondary, and
1539 1539 // tertiary opcode. Only the opcode sections which a particular instruction
1540 1540 // needs for encoding need to be specified.
1541 1541 encode %{
1542 1542 // Build emit functions for each basic byte or larger field in the intel
1543 1543 // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1544 1544 // code in the enc_class source block. Emit functions will live in the
1545 1545 // main source block for now. In future, we can generalize this by
1546 1546 // adding a syntax that specifies the sizes of fields in an order,
1547 1547 // so that the adlc can build the emit functions automagically
1548 1548
1549 1549 // Emit primary opcode
1550 1550 enc_class OpcP %{
1551 1551 emit_opcode(cbuf, $primary);
1552 1552 %}
1553 1553
1554 1554 // Emit secondary opcode
1555 1555 enc_class OpcS %{
1556 1556 emit_opcode(cbuf, $secondary);
1557 1557 %}
1558 1558
1559 1559 // Emit opcode directly
1560 1560 enc_class Opcode(immI d8) %{
1561 1561 emit_opcode(cbuf, $d8$$constant);
1562 1562 %}
1563 1563
1564 1564 enc_class SizePrefix %{
1565 1565 emit_opcode(cbuf,0x66);
1566 1566 %}
1567 1567
1568 1568 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
1569 1569 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1570 1570 %}
1571 1571
1572 1572 enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many)
1573 1573 emit_opcode(cbuf,$opcode$$constant);
1574 1574 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1575 1575 %}
1576 1576
1577 1577 enc_class mov_r32_imm0( eRegI dst ) %{
1578 1578 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
1579 1579 emit_d32 ( cbuf, 0x0 ); // imm32==0x0
1580 1580 %}
1581 1581
1582 1582 enc_class cdq_enc %{
1583 1583 // Full implementation of Java idiv and irem; checks for
1584 1584 // special case as described in JVM spec., p.243 & p.271.
1585 1585 //
1586 1586 // normal case special case
1587 1587 //
1588 1588 // input : rax,: dividend min_int
1589 1589 // reg: divisor -1
1590 1590 //
1591 1591 // output: rax,: quotient (= rax, idiv reg) min_int
1592 1592 // rdx: remainder (= rax, irem reg) 0
1593 1593 //
1594 1594 // Code sequnce:
1595 1595 //
1596 1596 // 81 F8 00 00 00 80 cmp rax,80000000h
1597 1597 // 0F 85 0B 00 00 00 jne normal_case
1598 1598 // 33 D2 xor rdx,edx
1599 1599 // 83 F9 FF cmp rcx,0FFh
1600 1600 // 0F 84 03 00 00 00 je done
1601 1601 // normal_case:
1602 1602 // 99 cdq
1603 1603 // F7 F9 idiv rax,ecx
1604 1604 // done:
1605 1605 //
1606 1606 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1607 1607 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1608 1608 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h
1609 1609 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1610 1610 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1611 1611 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case
1612 1612 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx
1613 1613 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1614 1614 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1615 1615 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1616 1616 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done
1617 1617 // normal_case:
1618 1618 emit_opcode(cbuf,0x99); // cdq
1619 1619 // idiv (note: must be emitted by the user of this rule)
1620 1620 // normal:
1621 1621 %}
1622 1622
1623 1623 // Dense encoding for older common ops
1624 1624 enc_class Opc_plus(immI opcode, eRegI reg) %{
1625 1625 emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1626 1626 %}
1627 1627
1628 1628
1629 1629 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1630 1630 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1631 1631 // Check for 8-bit immediate, and set sign extend bit in opcode
1632 1632 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1633 1633 emit_opcode(cbuf, $primary | 0x02);
1634 1634 }
1635 1635 else { // If 32-bit immediate
1636 1636 emit_opcode(cbuf, $primary);
1637 1637 }
1638 1638 %}
1639 1639
1640 1640 enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m
1641 1641 // Emit primary opcode and set sign-extend bit
1642 1642 // Check for 8-bit immediate, and set sign extend bit in opcode
1643 1643 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1644 1644 emit_opcode(cbuf, $primary | 0x02); }
1645 1645 else { // If 32-bit immediate
1646 1646 emit_opcode(cbuf, $primary);
1647 1647 }
1648 1648 // Emit r/m byte with secondary opcode, after primary opcode.
1649 1649 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1650 1650 %}
1651 1651
1652 1652 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
1653 1653 // Check for 8-bit immediate, and set sign extend bit in opcode
1654 1654 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1655 1655 $$$emit8$imm$$constant;
1656 1656 }
1657 1657 else { // If 32-bit immediate
1658 1658 // Output immediate
1659 1659 $$$emit32$imm$$constant;
1660 1660 }
1661 1661 %}
1662 1662
1663 1663 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1664 1664 // Emit primary opcode and set sign-extend bit
1665 1665 // Check for 8-bit immediate, and set sign extend bit in opcode
1666 1666 int con = (int)$imm$$constant; // Throw away top bits
1667 1667 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1668 1668 // Emit r/m byte with secondary opcode, after primary opcode.
1669 1669 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1670 1670 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1671 1671 else emit_d32(cbuf,con);
1672 1672 %}
1673 1673
1674 1674 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1675 1675 // Emit primary opcode and set sign-extend bit
1676 1676 // Check for 8-bit immediate, and set sign extend bit in opcode
1677 1677 int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1678 1678 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1679 1679 // Emit r/m byte with tertiary opcode, after primary opcode.
1680 1680 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1681 1681 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1682 1682 else emit_d32(cbuf,con);
1683 1683 %}
1684 1684
1685 1685 enc_class OpcSReg (eRegI dst) %{ // BSWAP
1686 1686 emit_cc(cbuf, $secondary, $dst$$reg );
1687 1687 %}
1688 1688
1689 1689 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1690 1690 int destlo = $dst$$reg;
1691 1691 int desthi = HIGH_FROM_LOW(destlo);
1692 1692 // bswap lo
1693 1693 emit_opcode(cbuf, 0x0F);
1694 1694 emit_cc(cbuf, 0xC8, destlo);
1695 1695 // bswap hi
1696 1696 emit_opcode(cbuf, 0x0F);
1697 1697 emit_cc(cbuf, 0xC8, desthi);
1698 1698 // xchg lo and hi
1699 1699 emit_opcode(cbuf, 0x87);
1700 1700 emit_rm(cbuf, 0x3, destlo, desthi);
1701 1701 %}
1702 1702
1703 1703 enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1704 1704 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1705 1705 %}
1706 1706
1707 1707 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1708 1708 $$$emit8$primary;
1709 1709 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1710 1710 %}
1711 1711
1712 1712 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1713 1713 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1714 1714 emit_d8(cbuf, op >> 8 );
1715 1715 emit_d8(cbuf, op & 255);
1716 1716 %}
1717 1717
1718 1718 // emulate a CMOV with a conditional branch around a MOV
1719 1719 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1720 1720 // Invert sense of branch from sense of CMOV
1721 1721 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1722 1722 emit_d8( cbuf, $brOffs$$constant );
1723 1723 %}
1724 1724
1725 1725 enc_class enc_PartialSubtypeCheck( ) %{
1726 1726 Register Redi = as_Register(EDI_enc); // result register
1727 1727 Register Reax = as_Register(EAX_enc); // super class
1728 1728 Register Recx = as_Register(ECX_enc); // killed
1729 1729 Register Resi = as_Register(ESI_enc); // sub class
1730 1730 Label miss;
1731 1731
1732 1732 MacroAssembler _masm(&cbuf);
1733 1733 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1734 1734 NULL, &miss,
1735 1735 /*set_cond_codes:*/ true);
1736 1736 if ($primary) {
1737 1737 __ xorptr(Redi, Redi);
1738 1738 }
1739 1739 __ bind(miss);
1740 1740 %}
1741 1741
1742 1742 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
1743 1743 MacroAssembler masm(&cbuf);
1744 1744 int start = masm.offset();
1745 1745 if (UseSSE >= 2) {
1746 1746 if (VerifyFPU) {
1747 1747 masm.verify_FPU(0, "must be empty in SSE2+ mode");
1748 1748 }
1749 1749 } else {
1750 1750 // External c_calling_convention expects the FPU stack to be 'clean'.
1751 1751 // Compiled code leaves it dirty. Do cleanup now.
1752 1752 masm.empty_FPU_stack();
1753 1753 }
1754 1754 if (sizeof_FFree_Float_Stack_All == -1) {
1755 1755 sizeof_FFree_Float_Stack_All = masm.offset() - start;
1756 1756 } else {
1757 1757 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1758 1758 }
1759 1759 %}
1760 1760
1761 1761 enc_class Verify_FPU_For_Leaf %{
1762 1762 if( VerifyFPU ) {
1763 1763 MacroAssembler masm(&cbuf);
1764 1764 masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1765 1765 }
1766 1766 %}
1767 1767
1768 1768 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1769 1769 // This is the instruction starting address for relocation info.
1770 1770 cbuf.set_insts_mark();
1771 1771 $$$emit8$primary;
1772 1772 // CALL directly to the runtime
1773 1773 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1774 1774 runtime_call_Relocation::spec(), RELOC_IMM32 );
1775 1775
1776 1776 if (UseSSE >= 2) {
1777 1777 MacroAssembler _masm(&cbuf);
1778 1778 BasicType rt = tf()->return_type();
1779 1779
1780 1780 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1781 1781 // A C runtime call where the return value is unused. In SSE2+
1782 1782 // mode the result needs to be removed from the FPU stack. It's
1783 1783 // likely that this function call could be removed by the
1784 1784 // optimizer if the C function is a pure function.
1785 1785 __ ffree(0);
1786 1786 } else if (rt == T_FLOAT) {
1787 1787 __ lea(rsp, Address(rsp, -4));
1788 1788 __ fstp_s(Address(rsp, 0));
1789 1789 __ movflt(xmm0, Address(rsp, 0));
1790 1790 __ lea(rsp, Address(rsp, 4));
1791 1791 } else if (rt == T_DOUBLE) {
1792 1792 __ lea(rsp, Address(rsp, -8));
1793 1793 __ fstp_d(Address(rsp, 0));
1794 1794 __ movdbl(xmm0, Address(rsp, 0));
1795 1795 __ lea(rsp, Address(rsp, 8));
1796 1796 }
1797 1797 }
1798 1798 %}
1799 1799
1800 1800
1801 1801 enc_class pre_call_FPU %{
1802 1802 // If method sets FPU control word restore it here
1803 1803 debug_only(int off0 = cbuf.insts_size());
1804 1804 if( Compile::current()->in_24_bit_fp_mode() ) {
1805 1805 MacroAssembler masm(&cbuf);
1806 1806 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1807 1807 }
1808 1808 debug_only(int off1 = cbuf.insts_size());
1809 1809 assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1810 1810 %}
1811 1811
1812 1812 enc_class post_call_FPU %{
1813 1813 // If method sets FPU control word do it here also
1814 1814 if( Compile::current()->in_24_bit_fp_mode() ) {
1815 1815 MacroAssembler masm(&cbuf);
1816 1816 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1817 1817 }
1818 1818 %}
1819 1819
1820 1820 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
1821 1821 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1822 1822 // who we intended to call.
1823 1823 cbuf.set_insts_mark();
1824 1824 $$$emit8$primary;
1825 1825 if ( !_method ) {
1826 1826 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1827 1827 runtime_call_Relocation::spec(), RELOC_IMM32 );
1828 1828 } else if(_optimized_virtual) {
1829 1829 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1830 1830 opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1831 1831 } else {
1832 1832 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1833 1833 static_call_Relocation::spec(), RELOC_IMM32 );
1834 1834 }
1835 1835 if( _method ) { // Emit stub for static call
1836 1836 emit_java_to_interp(cbuf);
1837 1837 }
1838 1838 %}
1839 1839
1840 1840 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
1841 1841 // !!!!!
1842 1842 // Generate "Mov EAX,0x00", placeholder instruction to load oop-info
1843 1843 // emit_call_dynamic_prologue( cbuf );
1844 1844 cbuf.set_insts_mark();
1845 1845 emit_opcode(cbuf, 0xB8 + EAX_enc); // mov EAX,-1
1846 1846 emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1847 1847 address virtual_call_oop_addr = cbuf.insts_mark();
1848 1848 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1849 1849 // who we intended to call.
1850 1850 cbuf.set_insts_mark();
1851 1851 $$$emit8$primary;
1852 1852 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1853 1853 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1854 1854 %}
1855 1855
1856 1856 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1857 1857 int disp = in_bytes(methodOopDesc::from_compiled_offset());
1858 1858 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1859 1859
1860 1860 // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1861 1861 cbuf.set_insts_mark();
1862 1862 $$$emit8$primary;
1863 1863 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1864 1864 emit_d8(cbuf, disp); // Displacement
1865 1865
1866 1866 %}
1867 1867
1868 1868 // Following encoding is no longer used, but may be restored if calling
1869 1869 // convention changes significantly.
1870 1870 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1871 1871 //
1872 1872 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
1873 1873 // // int ic_reg = Matcher::inline_cache_reg();
1874 1874 // // int ic_encode = Matcher::_regEncode[ic_reg];
1875 1875 // // int imo_reg = Matcher::interpreter_method_oop_reg();
1876 1876 // // int imo_encode = Matcher::_regEncode[imo_reg];
1877 1877 //
1878 1878 // // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1879 1879 // // // so we load it immediately before the call
1880 1880 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop
1881 1881 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1882 1882 //
1883 1883 // // xor rbp,ebp
1884 1884 // emit_opcode(cbuf, 0x33);
1885 1885 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1886 1886 //
1887 1887 // // CALL to interpreter.
1888 1888 // cbuf.set_insts_mark();
1889 1889 // $$$emit8$primary;
1890 1890 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1891 1891 // runtime_call_Relocation::spec(), RELOC_IMM32 );
1892 1892 // %}
1893 1893
1894 1894 enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR
1895 1895 $$$emit8$primary;
1896 1896 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1897 1897 $$$emit8$shift$$constant;
1898 1898 %}
1899 1899
1900 1900 enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate
1901 1901 // Load immediate does not have a zero or sign extended version
1902 1902 // for 8-bit immediates
1903 1903 emit_opcode(cbuf, 0xB8 + $dst$$reg);
1904 1904 $$$emit32$src$$constant;
1905 1905 %}
1906 1906
1907 1907 enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate
1908 1908 // Load immediate does not have a zero or sign extended version
1909 1909 // for 8-bit immediates
1910 1910 emit_opcode(cbuf, $primary + $dst$$reg);
1911 1911 $$$emit32$src$$constant;
1912 1912 %}
1913 1913
1914 1914 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
1915 1915 // Load immediate does not have a zero or sign extended version
1916 1916 // for 8-bit immediates
1917 1917 int dst_enc = $dst$$reg;
1918 1918 int src_con = $src$$constant & 0x0FFFFFFFFL;
1919 1919 if (src_con == 0) {
1920 1920 // xor dst, dst
1921 1921 emit_opcode(cbuf, 0x33);
1922 1922 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1923 1923 } else {
1924 1924 emit_opcode(cbuf, $primary + dst_enc);
1925 1925 emit_d32(cbuf, src_con);
1926 1926 }
1927 1927 %}
1928 1928
1929 1929 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
1930 1930 // Load immediate does not have a zero or sign extended version
1931 1931 // for 8-bit immediates
1932 1932 int dst_enc = $dst$$reg + 2;
1933 1933 int src_con = ((julong)($src$$constant)) >> 32;
1934 1934 if (src_con == 0) {
1935 1935 // xor dst, dst
1936 1936 emit_opcode(cbuf, 0x33);
1937 1937 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1938 1938 } else {
1939 1939 emit_opcode(cbuf, $primary + dst_enc);
1940 1940 emit_d32(cbuf, src_con);
1941 1941 }
1942 1942 %}
1943 1943
1944 1944
1945 1945 // Encode a reg-reg copy. If it is useless, then empty encoding.
1946 1946 enc_class enc_Copy( eRegI dst, eRegI src ) %{
1947 1947 encode_Copy( cbuf, $dst$$reg, $src$$reg );
1948 1948 %}
1949 1949
1950 1950 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
1951 1951 encode_Copy( cbuf, $dst$$reg, $src$$reg );
1952 1952 %}
1953 1953
1954 1954 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
1955 1955 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1956 1956 %}
1957 1957
1958 1958 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
1959 1959 $$$emit8$primary;
1960 1960 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1961 1961 %}
1962 1962
1963 1963 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
1964 1964 $$$emit8$secondary;
1965 1965 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
1966 1966 %}
1967 1967
1968 1968 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
1969 1969 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1970 1970 %}
1971 1971
1972 1972 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
1973 1973 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
1974 1974 %}
1975 1975
1976 1976 enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
1977 1977 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
1978 1978 %}
1979 1979
1980 1980 enc_class Con32 (immI src) %{ // Con32(storeImmI)
1981 1981 // Output immediate
1982 1982 $$$emit32$src$$constant;
1983 1983 %}
1984 1984
1985 1985 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm
1986 1986 // Output Float immediate bits
1987 1987 jfloat jf = $src$$constant;
1988 1988 int jf_as_bits = jint_cast( jf );
1989 1989 emit_d32(cbuf, jf_as_bits);
1990 1990 %}
1991 1991
1992 1992 enc_class Con32F_as_bits(immF src) %{ // storeX_imm
1993 1993 // Output Float immediate bits
1994 1994 jfloat jf = $src$$constant;
1995 1995 int jf_as_bits = jint_cast( jf );
1996 1996 emit_d32(cbuf, jf_as_bits);
1997 1997 %}
1998 1998
1999 1999 enc_class Con16 (immI src) %{ // Con16(storeImmI)
2000 2000 // Output immediate
2001 2001 $$$emit16$src$$constant;
2002 2002 %}
2003 2003
2004 2004 enc_class Con_d32(immI src) %{
2005 2005 emit_d32(cbuf,$src$$constant);
2006 2006 %}
2007 2007
2008 2008 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
2009 2009 // Output immediate memory reference
2010 2010 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2011 2011 emit_d32(cbuf, 0x00);
2012 2012 %}
2013 2013
2014 2014 enc_class lock_prefix( ) %{
2015 2015 if( os::is_MP() )
2016 2016 emit_opcode(cbuf,0xF0); // [Lock]
2017 2017 %}
2018 2018
2019 2019 // Cmp-xchg long value.
2020 2020 // Note: we need to swap rbx, and rcx before and after the
2021 2021 // cmpxchg8 instruction because the instruction uses
2022 2022 // rcx as the high order word of the new value to store but
2023 2023 // our register encoding uses rbx,.
2024 2024 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2025 2025
2026 2026 // XCHG rbx,ecx
2027 2027 emit_opcode(cbuf,0x87);
2028 2028 emit_opcode(cbuf,0xD9);
2029 2029 // [Lock]
2030 2030 if( os::is_MP() )
2031 2031 emit_opcode(cbuf,0xF0);
2032 2032 // CMPXCHG8 [Eptr]
2033 2033 emit_opcode(cbuf,0x0F);
2034 2034 emit_opcode(cbuf,0xC7);
2035 2035 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2036 2036 // XCHG rbx,ecx
2037 2037 emit_opcode(cbuf,0x87);
2038 2038 emit_opcode(cbuf,0xD9);
2039 2039 %}
2040 2040
2041 2041 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2042 2042 // [Lock]
2043 2043 if( os::is_MP() )
2044 2044 emit_opcode(cbuf,0xF0);
2045 2045
2046 2046 // CMPXCHG [Eptr]
2047 2047 emit_opcode(cbuf,0x0F);
2048 2048 emit_opcode(cbuf,0xB1);
2049 2049 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2050 2050 %}
2051 2051
2052 2052 enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2053 2053 int res_encoding = $res$$reg;
2054 2054
2055 2055 // MOV res,0
2056 2056 emit_opcode( cbuf, 0xB8 + res_encoding);
2057 2057 emit_d32( cbuf, 0 );
2058 2058 // JNE,s fail
2059 2059 emit_opcode(cbuf,0x75);
2060 2060 emit_d8(cbuf, 5 );
2061 2061 // MOV res,1
2062 2062 emit_opcode( cbuf, 0xB8 + res_encoding);
2063 2063 emit_d32( cbuf, 1 );
2064 2064 // fail:
2065 2065 %}
2066 2066
2067 2067 enc_class set_instruction_start( ) %{
2068 2068 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2069 2069 %}
2070 2070
2071 2071 enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem
2072 2072 int reg_encoding = $ereg$$reg;
2073 2073 int base = $mem$$base;
2074 2074 int index = $mem$$index;
2075 2075 int scale = $mem$$scale;
2076 2076 int displace = $mem$$disp;
2077 2077 bool disp_is_oop = $mem->disp_is_oop();
2078 2078 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2079 2079 %}
2080 2080
2081 2081 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
2082 2082 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo
2083 2083 int base = $mem$$base;
2084 2084 int index = $mem$$index;
2085 2085 int scale = $mem$$scale;
2086 2086 int displace = $mem$$disp + 4; // Offset is 4 further in memory
2087 2087 assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2088 2088 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2089 2089 %}
2090 2090
2091 2091 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2092 2092 int r1, r2;
2093 2093 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2094 2094 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2095 2095 emit_opcode(cbuf,0x0F);
2096 2096 emit_opcode(cbuf,$tertiary);
2097 2097 emit_rm(cbuf, 0x3, r1, r2);
2098 2098 emit_d8(cbuf,$cnt$$constant);
2099 2099 emit_d8(cbuf,$primary);
2100 2100 emit_rm(cbuf, 0x3, $secondary, r1);
2101 2101 emit_d8(cbuf,$cnt$$constant);
2102 2102 %}
2103 2103
2104 2104 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2105 2105 emit_opcode( cbuf, 0x8B ); // Move
2106 2106 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2107 2107 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2108 2108 emit_d8(cbuf,$primary);
2109 2109 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2110 2110 emit_d8(cbuf,$cnt$$constant-32);
2111 2111 }
2112 2112 emit_d8(cbuf,$primary);
2113 2113 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2114 2114 emit_d8(cbuf,31);
2115 2115 %}
2116 2116
2117 2117 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2118 2118 int r1, r2;
2119 2119 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2120 2120 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2121 2121
2122 2122 emit_opcode( cbuf, 0x8B ); // Move r1,r2
2123 2123 emit_rm(cbuf, 0x3, r1, r2);
2124 2124 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2125 2125 emit_opcode(cbuf,$primary);
2126 2126 emit_rm(cbuf, 0x3, $secondary, r1);
2127 2127 emit_d8(cbuf,$cnt$$constant-32);
2128 2128 }
2129 2129 emit_opcode(cbuf,0x33); // XOR r2,r2
2130 2130 emit_rm(cbuf, 0x3, r2, r2);
2131 2131 %}
2132 2132
2133 2133 // Clone of RegMem but accepts an extra parameter to access each
2134 2134 // half of a double in memory; it never needs relocation info.
2135 2135 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2136 2136 emit_opcode(cbuf,$opcode$$constant);
2137 2137 int reg_encoding = $rm_reg$$reg;
2138 2138 int base = $mem$$base;
2139 2139 int index = $mem$$index;
2140 2140 int scale = $mem$$scale;
2141 2141 int displace = $mem$$disp + $disp_for_half$$constant;
2142 2142 bool disp_is_oop = false;
2143 2143 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2144 2144 %}
2145 2145
2146 2146 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2147 2147 //
2148 2148 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2149 2149 // and it never needs relocation information.
2150 2150 // Frequently used to move data between FPU's Stack Top and memory.
2151 2151 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2152 2152 int rm_byte_opcode = $rm_opcode$$constant;
2153 2153 int base = $mem$$base;
2154 2154 int index = $mem$$index;
2155 2155 int scale = $mem$$scale;
2156 2156 int displace = $mem$$disp;
2157 2157 assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2158 2158 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2159 2159 %}
2160 2160
2161 2161 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2162 2162 int rm_byte_opcode = $rm_opcode$$constant;
2163 2163 int base = $mem$$base;
2164 2164 int index = $mem$$index;
2165 2165 int scale = $mem$$scale;
2166 2166 int displace = $mem$$disp;
2167 2167 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2168 2168 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2169 2169 %}
2170 2170
2171 2171 enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea
2172 2172 int reg_encoding = $dst$$reg;
2173 2173 int base = $src0$$reg; // 0xFFFFFFFF indicates no base
2174 2174 int index = 0x04; // 0x04 indicates no index
2175 2175 int scale = 0x00; // 0x00 indicates no scale
2176 2176 int displace = $src1$$constant; // 0x00 indicates no displacement
2177 2177 bool disp_is_oop = false;
2178 2178 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2179 2179 %}
2180 2180
2181 2181 enc_class min_enc (eRegI dst, eRegI src) %{ // MIN
2182 2182 // Compare dst,src
2183 2183 emit_opcode(cbuf,0x3B);
2184 2184 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2185 2185 // jmp dst < src around move
2186 2186 emit_opcode(cbuf,0x7C);
2187 2187 emit_d8(cbuf,2);
2188 2188 // move dst,src
2189 2189 emit_opcode(cbuf,0x8B);
2190 2190 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2191 2191 %}
2192 2192
2193 2193 enc_class max_enc (eRegI dst, eRegI src) %{ // MAX
2194 2194 // Compare dst,src
2195 2195 emit_opcode(cbuf,0x3B);
2196 2196 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2197 2197 // jmp dst > src around move
2198 2198 emit_opcode(cbuf,0x7F);
2199 2199 emit_d8(cbuf,2);
2200 2200 // move dst,src
2201 2201 emit_opcode(cbuf,0x8B);
2202 2202 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2203 2203 %}
2204 2204
2205 2205 enc_class enc_FPR_store(memory mem, regDPR src) %{
2206 2206 // If src is FPR1, we can just FST to store it.
2207 2207 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2208 2208 int reg_encoding = 0x2; // Just store
2209 2209 int base = $mem$$base;
2210 2210 int index = $mem$$index;
2211 2211 int scale = $mem$$scale;
2212 2212 int displace = $mem$$disp;
2213 2213 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2214 2214 if( $src$$reg != FPR1L_enc ) {
2215 2215 reg_encoding = 0x3; // Store & pop
2216 2216 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2217 2217 emit_d8( cbuf, 0xC0-1+$src$$reg );
2218 2218 }
2219 2219 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2220 2220 emit_opcode(cbuf,$primary);
2221 2221 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2222 2222 %}
2223 2223
2224 2224 enc_class neg_reg(eRegI dst) %{
2225 2225 // NEG $dst
2226 2226 emit_opcode(cbuf,0xF7);
2227 2227 emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2228 2228 %}
2229 2229
2230 2230 enc_class setLT_reg(eCXRegI dst) %{
2231 2231 // SETLT $dst
2232 2232 emit_opcode(cbuf,0x0F);
2233 2233 emit_opcode(cbuf,0x9C);
2234 2234 emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2235 2235 %}
2236 2236
2237 2237 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
2238 2238 int tmpReg = $tmp$$reg;
2239 2239
2240 2240 // SUB $p,$q
2241 2241 emit_opcode(cbuf,0x2B);
2242 2242 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2243 2243 // SBB $tmp,$tmp
2244 2244 emit_opcode(cbuf,0x1B);
2245 2245 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2246 2246 // AND $tmp,$y
2247 2247 emit_opcode(cbuf,0x23);
2248 2248 emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2249 2249 // ADD $p,$tmp
2250 2250 emit_opcode(cbuf,0x03);
2251 2251 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2252 2252 %}
2253 2253
2254 2254 enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT
2255 2255 int tmpReg = $tmp$$reg;
2256 2256
2257 2257 // SUB $p,$q
2258 2258 emit_opcode(cbuf,0x2B);
2259 2259 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2260 2260 // SBB $tmp,$tmp
2261 2261 emit_opcode(cbuf,0x1B);
2262 2262 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2263 2263 // AND $tmp,$y
2264 2264 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2265 2265 emit_opcode(cbuf,0x23);
2266 2266 int reg_encoding = tmpReg;
2267 2267 int base = $mem$$base;
2268 2268 int index = $mem$$index;
2269 2269 int scale = $mem$$scale;
2270 2270 int displace = $mem$$disp;
2271 2271 bool disp_is_oop = $mem->disp_is_oop();
2272 2272 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2273 2273 // ADD $p,$tmp
2274 2274 emit_opcode(cbuf,0x03);
2275 2275 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2276 2276 %}
2277 2277
2278 2278 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2279 2279 // TEST shift,32
2280 2280 emit_opcode(cbuf,0xF7);
2281 2281 emit_rm(cbuf, 0x3, 0, ECX_enc);
2282 2282 emit_d32(cbuf,0x20);
2283 2283 // JEQ,s small
2284 2284 emit_opcode(cbuf, 0x74);
2285 2285 emit_d8(cbuf, 0x04);
2286 2286 // MOV $dst.hi,$dst.lo
2287 2287 emit_opcode( cbuf, 0x8B );
2288 2288 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2289 2289 // CLR $dst.lo
2290 2290 emit_opcode(cbuf, 0x33);
2291 2291 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2292 2292 // small:
2293 2293 // SHLD $dst.hi,$dst.lo,$shift
2294 2294 emit_opcode(cbuf,0x0F);
2295 2295 emit_opcode(cbuf,0xA5);
2296 2296 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2297 2297 // SHL $dst.lo,$shift"
2298 2298 emit_opcode(cbuf,0xD3);
2299 2299 emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2300 2300 %}
2301 2301
2302 2302 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2303 2303 // TEST shift,32
2304 2304 emit_opcode(cbuf,0xF7);
2305 2305 emit_rm(cbuf, 0x3, 0, ECX_enc);
2306 2306 emit_d32(cbuf,0x20);
2307 2307 // JEQ,s small
2308 2308 emit_opcode(cbuf, 0x74);
2309 2309 emit_d8(cbuf, 0x04);
2310 2310 // MOV $dst.lo,$dst.hi
2311 2311 emit_opcode( cbuf, 0x8B );
2312 2312 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2313 2313 // CLR $dst.hi
2314 2314 emit_opcode(cbuf, 0x33);
2315 2315 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2316 2316 // small:
2317 2317 // SHRD $dst.lo,$dst.hi,$shift
2318 2318 emit_opcode(cbuf,0x0F);
2319 2319 emit_opcode(cbuf,0xAD);
2320 2320 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2321 2321 // SHR $dst.hi,$shift"
2322 2322 emit_opcode(cbuf,0xD3);
2323 2323 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2324 2324 %}
2325 2325
2326 2326 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2327 2327 // TEST shift,32
2328 2328 emit_opcode(cbuf,0xF7);
2329 2329 emit_rm(cbuf, 0x3, 0, ECX_enc);
2330 2330 emit_d32(cbuf,0x20);
2331 2331 // JEQ,s small
2332 2332 emit_opcode(cbuf, 0x74);
2333 2333 emit_d8(cbuf, 0x05);
2334 2334 // MOV $dst.lo,$dst.hi
2335 2335 emit_opcode( cbuf, 0x8B );
2336 2336 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2337 2337 // SAR $dst.hi,31
2338 2338 emit_opcode(cbuf, 0xC1);
2339 2339 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2340 2340 emit_d8(cbuf, 0x1F );
2341 2341 // small:
2342 2342 // SHRD $dst.lo,$dst.hi,$shift
2343 2343 emit_opcode(cbuf,0x0F);
2344 2344 emit_opcode(cbuf,0xAD);
2345 2345 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2346 2346 // SAR $dst.hi,$shift"
2347 2347 emit_opcode(cbuf,0xD3);
2348 2348 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2349 2349 %}
2350 2350
2351 2351
2352 2352 // ----------------- Encodings for floating point unit -----------------
2353 2353 // May leave result in FPU-TOS or FPU reg depending on opcodes
2354 2354 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV
2355 2355 $$$emit8$primary;
2356 2356 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2357 2357 %}
2358 2358
2359 2359 // Pop argument in FPR0 with FSTP ST(0)
2360 2360 enc_class PopFPU() %{
2361 2361 emit_opcode( cbuf, 0xDD );
2362 2362 emit_d8( cbuf, 0xD8 );
2363 2363 %}
2364 2364
2365 2365 // !!!!! equivalent to Pop_Reg_F
2366 2366 enc_class Pop_Reg_DPR( regDPR dst ) %{
2367 2367 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2368 2368 emit_d8( cbuf, 0xD8+$dst$$reg );
2369 2369 %}
2370 2370
2371 2371 enc_class Push_Reg_DPR( regDPR dst ) %{
2372 2372 emit_opcode( cbuf, 0xD9 );
2373 2373 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2374 2374 %}
2375 2375
2376 2376 enc_class strictfp_bias1( regDPR dst ) %{
2377 2377 emit_opcode( cbuf, 0xDB ); // FLD m80real
2378 2378 emit_opcode( cbuf, 0x2D );
2379 2379 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2380 2380 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2381 2381 emit_opcode( cbuf, 0xC8+$dst$$reg );
2382 2382 %}
2383 2383
2384 2384 enc_class strictfp_bias2( regDPR dst ) %{
2385 2385 emit_opcode( cbuf, 0xDB ); // FLD m80real
2386 2386 emit_opcode( cbuf, 0x2D );
2387 2387 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2388 2388 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2389 2389 emit_opcode( cbuf, 0xC8+$dst$$reg );
2390 2390 %}
2391 2391
2392 2392 // Special case for moving an integer register to a stack slot.
2393 2393 enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2394 2394 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2395 2395 %}
2396 2396
2397 2397 // Special case for moving a register to a stack slot.
2398 2398 enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2399 2399 // Opcode already emitted
2400 2400 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
2401 2401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
2402 2402 emit_d32(cbuf, $dst$$disp); // Displacement
2403 2403 %}
2404 2404
2405 2405 // Push the integer in stackSlot 'src' onto FP-stack
2406 2406 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2407 2407 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2408 2408 %}
2409 2409
2410 2410 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2411 2411 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2412 2412 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2413 2413 %}
2414 2414
2415 2415 // Same as Pop_Mem_F except for opcode
2416 2416 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2417 2417 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2418 2418 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2419 2419 %}
2420 2420
2421 2421 enc_class Pop_Reg_FPR( regFPR dst ) %{
2422 2422 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2423 2423 emit_d8( cbuf, 0xD8+$dst$$reg );
2424 2424 %}
2425 2425
2426 2426 enc_class Push_Reg_FPR( regFPR dst ) %{
2427 2427 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2428 2428 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2429 2429 %}
2430 2430
2431 2431 // Push FPU's float to a stack-slot, and pop FPU-stack
2432 2432 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2433 2433 int pop = 0x02;
2434 2434 if ($src$$reg != FPR1L_enc) {
2435 2435 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2436 2436 emit_d8( cbuf, 0xC0-1+$src$$reg );
2437 2437 pop = 0x03;
2438 2438 }
2439 2439 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2440 2440 %}
2441 2441
2442 2442 // Push FPU's double to a stack-slot, and pop FPU-stack
2443 2443 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2444 2444 int pop = 0x02;
2445 2445 if ($src$$reg != FPR1L_enc) {
2446 2446 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2447 2447 emit_d8( cbuf, 0xC0-1+$src$$reg );
2448 2448 pop = 0x03;
2449 2449 }
2450 2450 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2451 2451 %}
2452 2452
2453 2453 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2454 2454 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2455 2455 int pop = 0xD0 - 1; // -1 since we skip FLD
2456 2456 if ($src$$reg != FPR1L_enc) {
2457 2457 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2458 2458 emit_d8( cbuf, 0xC0-1+$src$$reg );
2459 2459 pop = 0xD8;
2460 2460 }
2461 2461 emit_opcode( cbuf, 0xDD );
2462 2462 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2463 2463 %}
2464 2464
2465 2465
2466 2466 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2467 2467 // load dst in FPR0
2468 2468 emit_opcode( cbuf, 0xD9 );
2469 2469 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2470 2470 if ($src$$reg != FPR1L_enc) {
2471 2471 // fincstp
2472 2472 emit_opcode (cbuf, 0xD9);
2473 2473 emit_opcode (cbuf, 0xF7);
2474 2474 // swap src with FPR1:
2475 2475 // FXCH FPR1 with src
2476 2476 emit_opcode(cbuf, 0xD9);
2477 2477 emit_d8(cbuf, 0xC8-1+$src$$reg );
2478 2478 // fdecstp
2479 2479 emit_opcode (cbuf, 0xD9);
2480 2480 emit_opcode (cbuf, 0xF6);
2481 2481 }
2482 2482 %}
2483 2483
2484 2484 enc_class Push_ModD_encoding(regD src0, regD src1) %{
2485 2485 MacroAssembler _masm(&cbuf);
2486 2486 __ subptr(rsp, 8);
2487 2487 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2488 2488 __ fld_d(Address(rsp, 0));
2489 2489 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2490 2490 __ fld_d(Address(rsp, 0));
2491 2491 %}
2492 2492
2493 2493 enc_class Push_ModF_encoding(regF src0, regF src1) %{
2494 2494 MacroAssembler _masm(&cbuf);
2495 2495 __ subptr(rsp, 4);
2496 2496 __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2497 2497 __ fld_s(Address(rsp, 0));
2498 2498 __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2499 2499 __ fld_s(Address(rsp, 0));
2500 2500 %}
2501 2501
2502 2502 enc_class Push_ResultD(regD dst) %{
2503 2503 MacroAssembler _masm(&cbuf);
2504 2504 __ fstp_d(Address(rsp, 0));
2505 2505 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2506 2506 __ addptr(rsp, 8);
2507 2507 %}
2508 2508
2509 2509 enc_class Push_ResultF(regF dst, immI d8) %{
2510 2510 MacroAssembler _masm(&cbuf);
2511 2511 __ fstp_s(Address(rsp, 0));
2512 2512 __ movflt($dst$$XMMRegister, Address(rsp, 0));
2513 2513 __ addptr(rsp, $d8$$constant);
2514 2514 %}
2515 2515
2516 2516 enc_class Push_SrcD(regD src) %{
2517 2517 MacroAssembler _masm(&cbuf);
2518 2518 __ subptr(rsp, 8);
2519 2519 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2520 2520 __ fld_d(Address(rsp, 0));
2521 2521 %}
2522 2522
2523 2523 enc_class push_stack_temp_qword() %{
2524 2524 MacroAssembler _masm(&cbuf);
2525 2525 __ subptr(rsp, 8);
2526 2526 %}
2527 2527
2528 2528 enc_class pop_stack_temp_qword() %{
2529 2529 MacroAssembler _masm(&cbuf);
2530 2530 __ addptr(rsp, 8);
2531 2531 %}
2532 2532
2533 2533 enc_class push_xmm_to_fpr1(regD src) %{
2534 2534 MacroAssembler _masm(&cbuf);
2535 2535 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2536 2536 __ fld_d(Address(rsp, 0));
2537 2537 %}
2538 2538
2539 2539 enc_class Push_Result_Mod_DPR( regDPR src) %{
2540 2540 if ($src$$reg != FPR1L_enc) {
2541 2541 // fincstp
2542 2542 emit_opcode (cbuf, 0xD9);
2543 2543 emit_opcode (cbuf, 0xF7);
2544 2544 // FXCH FPR1 with src
2545 2545 emit_opcode(cbuf, 0xD9);
2546 2546 emit_d8(cbuf, 0xC8-1+$src$$reg );
2547 2547 // fdecstp
2548 2548 emit_opcode (cbuf, 0xD9);
2549 2549 emit_opcode (cbuf, 0xF6);
2550 2550 }
2551 2551 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2552 2552 // // FSTP FPR$dst$$reg
2553 2553 // emit_opcode( cbuf, 0xDD );
2554 2554 // emit_d8( cbuf, 0xD8+$dst$$reg );
2555 2555 %}
2556 2556
2557 2557 enc_class fnstsw_sahf_skip_parity() %{
2558 2558 // fnstsw ax
2559 2559 emit_opcode( cbuf, 0xDF );
2560 2560 emit_opcode( cbuf, 0xE0 );
2561 2561 // sahf
2562 2562 emit_opcode( cbuf, 0x9E );
2563 2563 // jnp ::skip
2564 2564 emit_opcode( cbuf, 0x7B );
2565 2565 emit_opcode( cbuf, 0x05 );
2566 2566 %}
2567 2567
2568 2568 enc_class emitModDPR() %{
2569 2569 // fprem must be iterative
2570 2570 // :: loop
2571 2571 // fprem
2572 2572 emit_opcode( cbuf, 0xD9 );
2573 2573 emit_opcode( cbuf, 0xF8 );
2574 2574 // wait
2575 2575 emit_opcode( cbuf, 0x9b );
2576 2576 // fnstsw ax
2577 2577 emit_opcode( cbuf, 0xDF );
2578 2578 emit_opcode( cbuf, 0xE0 );
2579 2579 // sahf
2580 2580 emit_opcode( cbuf, 0x9E );
2581 2581 // jp ::loop
2582 2582 emit_opcode( cbuf, 0x0F );
2583 2583 emit_opcode( cbuf, 0x8A );
2584 2584 emit_opcode( cbuf, 0xF4 );
2585 2585 emit_opcode( cbuf, 0xFF );
2586 2586 emit_opcode( cbuf, 0xFF );
2587 2587 emit_opcode( cbuf, 0xFF );
2588 2588 %}
2589 2589
2590 2590 enc_class fpu_flags() %{
2591 2591 // fnstsw_ax
2592 2592 emit_opcode( cbuf, 0xDF);
2593 2593 emit_opcode( cbuf, 0xE0);
2594 2594 // test ax,0x0400
2595 2595 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate
2596 2596 emit_opcode( cbuf, 0xA9 );
2597 2597 emit_d16 ( cbuf, 0x0400 );
2598 2598 // // // This sequence works, but stalls for 12-16 cycles on PPro
2599 2599 // // test rax,0x0400
2600 2600 // emit_opcode( cbuf, 0xA9 );
2601 2601 // emit_d32 ( cbuf, 0x00000400 );
2602 2602 //
2603 2603 // jz exit (no unordered comparison)
2604 2604 emit_opcode( cbuf, 0x74 );
2605 2605 emit_d8 ( cbuf, 0x02 );
2606 2606 // mov ah,1 - treat as LT case (set carry flag)
2607 2607 emit_opcode( cbuf, 0xB4 );
2608 2608 emit_d8 ( cbuf, 0x01 );
2609 2609 // sahf
2610 2610 emit_opcode( cbuf, 0x9E);
2611 2611 %}
2612 2612
2613 2613 enc_class cmpF_P6_fixup() %{
2614 2614 // Fixup the integer flags in case comparison involved a NaN
2615 2615 //
2616 2616 // JNP exit (no unordered comparison, P-flag is set by NaN)
2617 2617 emit_opcode( cbuf, 0x7B );
2618 2618 emit_d8 ( cbuf, 0x03 );
2619 2619 // MOV AH,1 - treat as LT case (set carry flag)
2620 2620 emit_opcode( cbuf, 0xB4 );
2621 2621 emit_d8 ( cbuf, 0x01 );
2622 2622 // SAHF
2623 2623 emit_opcode( cbuf, 0x9E);
2624 2624 // NOP // target for branch to avoid branch to branch
2625 2625 emit_opcode( cbuf, 0x90);
2626 2626 %}
2627 2627
2628 2628 // fnstsw_ax();
2629 2629 // sahf();
2630 2630 // movl(dst, nan_result);
2631 2631 // jcc(Assembler::parity, exit);
2632 2632 // movl(dst, less_result);
2633 2633 // jcc(Assembler::below, exit);
2634 2634 // movl(dst, equal_result);
2635 2635 // jcc(Assembler::equal, exit);
2636 2636 // movl(dst, greater_result);
2637 2637
2638 2638 // less_result = 1;
2639 2639 // greater_result = -1;
2640 2640 // equal_result = 0;
2641 2641 // nan_result = -1;
2642 2642
2643 2643 enc_class CmpF_Result(eRegI dst) %{
2644 2644 // fnstsw_ax();
2645 2645 emit_opcode( cbuf, 0xDF);
2646 2646 emit_opcode( cbuf, 0xE0);
2647 2647 // sahf
2648 2648 emit_opcode( cbuf, 0x9E);
2649 2649 // movl(dst, nan_result);
2650 2650 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2651 2651 emit_d32( cbuf, -1 );
2652 2652 // jcc(Assembler::parity, exit);
2653 2653 emit_opcode( cbuf, 0x7A );
2654 2654 emit_d8 ( cbuf, 0x13 );
2655 2655 // movl(dst, less_result);
2656 2656 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2657 2657 emit_d32( cbuf, -1 );
2658 2658 // jcc(Assembler::below, exit);
2659 2659 emit_opcode( cbuf, 0x72 );
2660 2660 emit_d8 ( cbuf, 0x0C );
2661 2661 // movl(dst, equal_result);
2662 2662 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2663 2663 emit_d32( cbuf, 0 );
2664 2664 // jcc(Assembler::equal, exit);
2665 2665 emit_opcode( cbuf, 0x74 );
2666 2666 emit_d8 ( cbuf, 0x05 );
2667 2667 // movl(dst, greater_result);
2668 2668 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2669 2669 emit_d32( cbuf, 1 );
2670 2670 %}
2671 2671
2672 2672
2673 2673 // Compare the longs and set flags
2674 2674 // BROKEN! Do Not use as-is
2675 2675 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2676 2676 // CMP $src1.hi,$src2.hi
2677 2677 emit_opcode( cbuf, 0x3B );
2678 2678 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2679 2679 // JNE,s done
2680 2680 emit_opcode(cbuf,0x75);
2681 2681 emit_d8(cbuf, 2 );
2682 2682 // CMP $src1.lo,$src2.lo
2683 2683 emit_opcode( cbuf, 0x3B );
2684 2684 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2685 2685 // done:
2686 2686 %}
2687 2687
2688 2688 enc_class convert_int_long( regL dst, eRegI src ) %{
2689 2689 // mov $dst.lo,$src
2690 2690 int dst_encoding = $dst$$reg;
2691 2691 int src_encoding = $src$$reg;
2692 2692 encode_Copy( cbuf, dst_encoding , src_encoding );
2693 2693 // mov $dst.hi,$src
2694 2694 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2695 2695 // sar $dst.hi,31
2696 2696 emit_opcode( cbuf, 0xC1 );
2697 2697 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2698 2698 emit_d8(cbuf, 0x1F );
2699 2699 %}
2700 2700
2701 2701 enc_class convert_long_double( eRegL src ) %{
2702 2702 // push $src.hi
2703 2703 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2704 2704 // push $src.lo
2705 2705 emit_opcode(cbuf, 0x50+$src$$reg );
2706 2706 // fild 64-bits at [SP]
2707 2707 emit_opcode(cbuf,0xdf);
2708 2708 emit_d8(cbuf, 0x6C);
2709 2709 emit_d8(cbuf, 0x24);
2710 2710 emit_d8(cbuf, 0x00);
2711 2711 // pop stack
2712 2712 emit_opcode(cbuf, 0x83); // add SP, #8
2713 2713 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2714 2714 emit_d8(cbuf, 0x8);
2715 2715 %}
2716 2716
2717 2717 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2718 2718 // IMUL EDX:EAX,$src1
2719 2719 emit_opcode( cbuf, 0xF7 );
2720 2720 emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2721 2721 // SAR EDX,$cnt-32
2722 2722 int shift_count = ((int)$cnt$$constant) - 32;
2723 2723 if (shift_count > 0) {
2724 2724 emit_opcode(cbuf, 0xC1);
2725 2725 emit_rm(cbuf, 0x3, 7, $dst$$reg );
2726 2726 emit_d8(cbuf, shift_count);
2727 2727 }
2728 2728 %}
2729 2729
2730 2730 // this version doesn't have add sp, 8
2731 2731 enc_class convert_long_double2( eRegL src ) %{
2732 2732 // push $src.hi
2733 2733 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2734 2734 // push $src.lo
2735 2735 emit_opcode(cbuf, 0x50+$src$$reg );
2736 2736 // fild 64-bits at [SP]
2737 2737 emit_opcode(cbuf,0xdf);
2738 2738 emit_d8(cbuf, 0x6C);
2739 2739 emit_d8(cbuf, 0x24);
2740 2740 emit_d8(cbuf, 0x00);
2741 2741 %}
2742 2742
2743 2743 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2744 2744 // Basic idea: long = (long)int * (long)int
2745 2745 // IMUL EDX:EAX, src
2746 2746 emit_opcode( cbuf, 0xF7 );
2747 2747 emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2748 2748 %}
2749 2749
2750 2750 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2751 2751 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
2752 2752 // MUL EDX:EAX, src
2753 2753 emit_opcode( cbuf, 0xF7 );
2754 2754 emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2755 2755 %}
2756 2756
2757 2757 enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
2758 2758 // Basic idea: lo(result) = lo(x_lo * y_lo)
2759 2759 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2760 2760 // MOV $tmp,$src.lo
2761 2761 encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2762 2762 // IMUL $tmp,EDX
2763 2763 emit_opcode( cbuf, 0x0F );
2764 2764 emit_opcode( cbuf, 0xAF );
2765 2765 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2766 2766 // MOV EDX,$src.hi
2767 2767 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2768 2768 // IMUL EDX,EAX
2769 2769 emit_opcode( cbuf, 0x0F );
2770 2770 emit_opcode( cbuf, 0xAF );
2771 2771 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2772 2772 // ADD $tmp,EDX
2773 2773 emit_opcode( cbuf, 0x03 );
2774 2774 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2775 2775 // MUL EDX:EAX,$src.lo
2776 2776 emit_opcode( cbuf, 0xF7 );
2777 2777 emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2778 2778 // ADD EDX,ESI
2779 2779 emit_opcode( cbuf, 0x03 );
2780 2780 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2781 2781 %}
2782 2782
2783 2783 enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
2784 2784 // Basic idea: lo(result) = lo(src * y_lo)
2785 2785 // hi(result) = hi(src * y_lo) + lo(src * y_hi)
2786 2786 // IMUL $tmp,EDX,$src
2787 2787 emit_opcode( cbuf, 0x6B );
2788 2788 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2789 2789 emit_d8( cbuf, (int)$src$$constant );
2790 2790 // MOV EDX,$src
2791 2791 emit_opcode(cbuf, 0xB8 + EDX_enc);
2792 2792 emit_d32( cbuf, (int)$src$$constant );
2793 2793 // MUL EDX:EAX,EDX
2794 2794 emit_opcode( cbuf, 0xF7 );
2795 2795 emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2796 2796 // ADD EDX,ESI
2797 2797 emit_opcode( cbuf, 0x03 );
2798 2798 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2799 2799 %}
2800 2800
2801 2801 enc_class long_div( eRegL src1, eRegL src2 ) %{
2802 2802 // PUSH src1.hi
2803 2803 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2804 2804 // PUSH src1.lo
2805 2805 emit_opcode(cbuf, 0x50+$src1$$reg );
2806 2806 // PUSH src2.hi
2807 2807 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2808 2808 // PUSH src2.lo
2809 2809 emit_opcode(cbuf, 0x50+$src2$$reg );
2810 2810 // CALL directly to the runtime
2811 2811 cbuf.set_insts_mark();
2812 2812 emit_opcode(cbuf,0xE8); // Call into runtime
2813 2813 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2814 2814 // Restore stack
2815 2815 emit_opcode(cbuf, 0x83); // add SP, #framesize
2816 2816 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2817 2817 emit_d8(cbuf, 4*4);
2818 2818 %}
2819 2819
2820 2820 enc_class long_mod( eRegL src1, eRegL src2 ) %{
2821 2821 // PUSH src1.hi
2822 2822 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2823 2823 // PUSH src1.lo
2824 2824 emit_opcode(cbuf, 0x50+$src1$$reg );
2825 2825 // PUSH src2.hi
2826 2826 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2827 2827 // PUSH src2.lo
2828 2828 emit_opcode(cbuf, 0x50+$src2$$reg );
2829 2829 // CALL directly to the runtime
2830 2830 cbuf.set_insts_mark();
2831 2831 emit_opcode(cbuf,0xE8); // Call into runtime
2832 2832 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2833 2833 // Restore stack
2834 2834 emit_opcode(cbuf, 0x83); // add SP, #framesize
2835 2835 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2836 2836 emit_d8(cbuf, 4*4);
2837 2837 %}
2838 2838
2839 2839 enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
2840 2840 // MOV $tmp,$src.lo
2841 2841 emit_opcode(cbuf, 0x8B);
2842 2842 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2843 2843 // OR $tmp,$src.hi
2844 2844 emit_opcode(cbuf, 0x0B);
2845 2845 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2846 2846 %}
2847 2847
2848 2848 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2849 2849 // CMP $src1.lo,$src2.lo
2850 2850 emit_opcode( cbuf, 0x3B );
2851 2851 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2852 2852 // JNE,s skip
2853 2853 emit_cc(cbuf, 0x70, 0x5);
2854 2854 emit_d8(cbuf,2);
2855 2855 // CMP $src1.hi,$src2.hi
2856 2856 emit_opcode( cbuf, 0x3B );
2857 2857 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2858 2858 %}
2859 2859
2860 2860 enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
2861 2861 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2862 2862 emit_opcode( cbuf, 0x3B );
2863 2863 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2864 2864 // MOV $tmp,$src1.hi
2865 2865 emit_opcode( cbuf, 0x8B );
2866 2866 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2867 2867 // SBB $tmp,$src2.hi\t! Compute flags for long compare
2868 2868 emit_opcode( cbuf, 0x1B );
2869 2869 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2870 2870 %}
2871 2871
2872 2872 enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
2873 2873 // XOR $tmp,$tmp
2874 2874 emit_opcode(cbuf,0x33); // XOR
2875 2875 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2876 2876 // CMP $tmp,$src.lo
2877 2877 emit_opcode( cbuf, 0x3B );
2878 2878 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2879 2879 // SBB $tmp,$src.hi
2880 2880 emit_opcode( cbuf, 0x1B );
2881 2881 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2882 2882 %}
2883 2883
2884 2884 // Sniff, sniff... smells like Gnu Superoptimizer
2885 2885 enc_class neg_long( eRegL dst ) %{
2886 2886 emit_opcode(cbuf,0xF7); // NEG hi
2887 2887 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2888 2888 emit_opcode(cbuf,0xF7); // NEG lo
2889 2889 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
2890 2890 emit_opcode(cbuf,0x83); // SBB hi,0
2891 2891 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2892 2892 emit_d8 (cbuf,0 );
2893 2893 %}
2894 2894
2895 2895
2896 2896 // Because the transitions from emitted code to the runtime
2897 2897 // monitorenter/exit helper stubs are so slow it's critical that
2898 2898 // we inline both the stack-locking fast-path and the inflated fast path.
2899 2899 //
2900 2900 // See also: cmpFastLock and cmpFastUnlock.
2901 2901 //
2902 2902 // What follows is a specialized inline transliteration of the code
2903 2903 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2904 2904 // another option would be to emit TrySlowEnter and TrySlowExit methods
2905 2905 // at startup-time. These methods would accept arguments as
2906 2906 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
2907 2907 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2908 2908 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2909 2909 // In practice, however, the # of lock sites is bounded and is usually small.
2910 2910 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2911 2911 // if the processor uses simple bimodal branch predictors keyed by EIP
2912 2912 // Since the helper routines would be called from multiple synchronization
2913 2913 // sites.
2914 2914 //
2915 2915 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2916 2916 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2917 2917 // to those specialized methods. That'd give us a mostly platform-independent
2918 2918 // implementation that the JITs could optimize and inline at their pleasure.
2919 2919 // Done correctly, the only time we'd need to cross to native could would be
2920 2920 // to park() or unpark() threads. We'd also need a few more unsafe operators
2921 2921 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2922 2922 // (b) explicit barriers or fence operations.
2923 2923 //
2924 2924 // TODO:
2925 2925 //
2926 2926 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2927 2927 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2928 2928 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2929 2929 // the lock operators would typically be faster than reifying Self.
2930 2930 //
2931 2931 // * Ideally I'd define the primitives as:
2932 2932 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
2933 2933 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
2934 2934 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2935 2935 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2936 2936 // Furthermore the register assignments are overconstrained, possibly resulting in
2937 2937 // sub-optimal code near the synchronization site.
2938 2938 //
2939 2939 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2940 2940 // Alternately, use a better sp-proximity test.
2941 2941 //
2942 2942 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2943 2943 // Either one is sufficient to uniquely identify a thread.
2944 2944 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2945 2945 //
2946 2946 // * Intrinsify notify() and notifyAll() for the common cases where the
2947 2947 // object is locked by the calling thread but the waitlist is empty.
2948 2948 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2949 2949 //
2950 2950 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2951 2951 // But beware of excessive branch density on AMD Opterons.
2952 2952 //
2953 2953 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2954 2954 // or failure of the fast-path. If the fast-path fails then we pass
2955 2955 // control to the slow-path, typically in C. In Fast_Lock and
2956 2956 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2957 2957 // will emit a conditional branch immediately after the node.
2958 2958 // So we have branches to branches and lots of ICC.ZF games.
2959 2959 // Instead, it might be better to have C2 pass a "FailureLabel"
2960 2960 // into Fast_Lock and Fast_Unlock. In the case of success, control
2961 2961 // will drop through the node. ICC.ZF is undefined at exit.
2962 2962 // In the case of failure, the node will branch directly to the
2963 2963 // FailureLabel
2964 2964
2965 2965
2966 2966 // obj: object to lock
2967 2967 // box: on-stack box address (displaced header location) - KILLED
2968 2968 // rax,: tmp -- KILLED
2969 2969 // scr: tmp -- KILLED
2970 2970 enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
2971 2971
2972 2972 Register objReg = as_Register($obj$$reg);
2973 2973 Register boxReg = as_Register($box$$reg);
2974 2974 Register tmpReg = as_Register($tmp$$reg);
2975 2975 Register scrReg = as_Register($scr$$reg);
2976 2976
2977 2977 // Ensure the register assignents are disjoint
2978 2978 guarantee (objReg != boxReg, "") ;
2979 2979 guarantee (objReg != tmpReg, "") ;
2980 2980 guarantee (objReg != scrReg, "") ;
2981 2981 guarantee (boxReg != tmpReg, "") ;
2982 2982 guarantee (boxReg != scrReg, "") ;
2983 2983 guarantee (tmpReg == as_Register(EAX_enc), "") ;
2984 2984
2985 2985 MacroAssembler masm(&cbuf);
2986 2986
2987 2987 if (_counters != NULL) {
2988 2988 masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
2989 2989 }
2990 2990 if (EmitSync & 1) {
2991 2991 // set box->dhw = unused_mark (3)
2992 2992 // Force all sync thru slow-path: slow_enter() and slow_exit()
2993 2993 masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;
2994 2994 masm.cmpptr (rsp, (int32_t)0) ;
2995 2995 } else
2996 2996 if (EmitSync & 2) {
2997 2997 Label DONE_LABEL ;
2998 2998 if (UseBiasedLocking) {
2999 2999 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3000 3000 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3001 3001 }
3002 3002
3003 3003 masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword
3004 3004 masm.orptr (tmpReg, 0x1);
3005 3005 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
3006 3006 if (os::is_MP()) { masm.lock(); }
3007 3007 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3008 3008 masm.jcc(Assembler::equal, DONE_LABEL);
3009 3009 // Recursive locking
3010 3010 masm.subptr(tmpReg, rsp);
3011 3011 masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3012 3012 masm.movptr(Address(boxReg, 0), tmpReg);
3013 3013 masm.bind(DONE_LABEL) ;
3014 3014 } else {
3015 3015 // Possible cases that we'll encounter in fast_lock
3016 3016 // ------------------------------------------------
3017 3017 // * Inflated
3018 3018 // -- unlocked
3019 3019 // -- Locked
3020 3020 // = by self
3021 3021 // = by other
3022 3022 // * biased
3023 3023 // -- by Self
3024 3024 // -- by other
3025 3025 // * neutral
3026 3026 // * stack-locked
3027 3027 // -- by self
3028 3028 // = sp-proximity test hits
3029 3029 // = sp-proximity test generates false-negative
3030 3030 // -- by other
3031 3031 //
3032 3032
3033 3033 Label IsInflated, DONE_LABEL, PopDone ;
3034 3034
3035 3035 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3036 3036 // order to reduce the number of conditional branches in the most common cases.
3037 3037 // Beware -- there's a subtle invariant that fetch of the markword
3038 3038 // at [FETCH], below, will never observe a biased encoding (*101b).
3039 3039 // If this invariant is not held we risk exclusion (safety) failure.
3040 3040 if (UseBiasedLocking && !UseOptoBiasInlining) {
3041 3041 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3042 3042 }
3043 3043
3044 3044 masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
3045 3045 masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral)
3046 3046 masm.jccb (Assembler::notZero, IsInflated) ;
3047 3047
3048 3048 // Attempt stack-locking ...
3049 3049 masm.orptr (tmpReg, 0x1);
3050 3050 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
3051 3051 if (os::is_MP()) { masm.lock(); }
3052 3052 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3053 3053 if (_counters != NULL) {
3054 3054 masm.cond_inc32(Assembler::equal,
3055 3055 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3056 3056 }
3057 3057 masm.jccb (Assembler::equal, DONE_LABEL);
3058 3058
3059 3059 // Recursive locking
3060 3060 masm.subptr(tmpReg, rsp);
3061 3061 masm.andptr(tmpReg, 0xFFFFF003 );
3062 3062 masm.movptr(Address(boxReg, 0), tmpReg);
3063 3063 if (_counters != NULL) {
3064 3064 masm.cond_inc32(Assembler::equal,
3065 3065 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3066 3066 }
3067 3067 masm.jmp (DONE_LABEL) ;
3068 3068
3069 3069 masm.bind (IsInflated) ;
3070 3070
3071 3071 // The object is inflated.
3072 3072 //
3073 3073 // TODO-FIXME: eliminate the ugly use of manifest constants:
3074 3074 // Use markOopDesc::monitor_value instead of "2".
3075 3075 // use markOop::unused_mark() instead of "3".
3076 3076 // The tmpReg value is an objectMonitor reference ORed with
3077 3077 // markOopDesc::monitor_value (2). We can either convert tmpReg to an
3078 3078 // objectmonitor pointer by masking off the "2" bit or we can just
3079 3079 // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3080 3080 // field offsets with "-2" to compensate for and annul the low-order tag bit.
3081 3081 //
3082 3082 // I use the latter as it avoids AGI stalls.
3083 3083 // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3084 3084 // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3085 3085 //
3086 3086 #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3087 3087
3088 3088 // boxReg refers to the on-stack BasicLock in the current frame.
3089 3089 // We'd like to write:
3090 3090 // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
3091 3091 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
3092 3092 // additional latency as we have another ST in the store buffer that must drain.
3093 3093
3094 3094 if (EmitSync & 8192) {
3095 3095 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
3096 3096 masm.get_thread (scrReg) ;
3097 3097 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
3098 3098 masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov
3099 3099 if (os::is_MP()) { masm.lock(); }
3100 3100 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3101 3101 } else
3102 3102 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
3103 3103 masm.movptr(scrReg, boxReg) ;
3104 3104 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
3105 3105
3106 3106 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3107 3107 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3108 3108 // prefetchw [eax + Offset(_owner)-2]
3109 3109 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3110 3110 }
3111 3111
3112 3112 if ((EmitSync & 64) == 0) {
3113 3113 // Optimistic form: consider XORL tmpReg,tmpReg
3114 3114 masm.movptr(tmpReg, NULL_WORD) ;
3115 3115 } else {
3116 3116 // Can suffer RTS->RTO upgrades on shared or cold $ lines
3117 3117 // Test-And-CAS instead of CAS
3118 3118 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
3119 3119 masm.testptr(tmpReg, tmpReg) ; // Locked ?
3120 3120 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3121 3121 }
3122 3122
3123 3123 // Appears unlocked - try to swing _owner from null to non-null.
3124 3124 // Ideally, I'd manifest "Self" with get_thread and then attempt
3125 3125 // to CAS the register containing Self into m->Owner.
3126 3126 // But we don't have enough registers, so instead we can either try to CAS
3127 3127 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
3128 3128 // we later store "Self" into m->Owner. Transiently storing a stack address
3129 3129 // (rsp or the address of the box) into m->owner is harmless.
3130 3130 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
3131 3131 if (os::is_MP()) { masm.lock(); }
3132 3132 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3133 3133 masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3
3134 3134 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3135 3135 masm.get_thread (scrReg) ; // beware: clobbers ICCs
3136 3136 masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
3137 3137 masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success
3138 3138
3139 3139 // If the CAS fails we can either retry or pass control to the slow-path.
3140 3140 // We use the latter tactic.
3141 3141 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3142 3142 // If the CAS was successful ...
3143 3143 // Self has acquired the lock
3144 3144 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3145 3145 // Intentional fall-through into DONE_LABEL ...
3146 3146 } else {
3147 3147 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
3148 3148 masm.movptr(boxReg, tmpReg) ;
3149 3149
3150 3150 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3151 3151 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3152 3152 // prefetchw [eax + Offset(_owner)-2]
3153 3153 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3154 3154 }
3155 3155
3156 3156 if ((EmitSync & 64) == 0) {
3157 3157 // Optimistic form
3158 3158 masm.xorptr (tmpReg, tmpReg) ;
3159 3159 } else {
3160 3160 // Can suffer RTS->RTO upgrades on shared or cold $ lines
3161 3161 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
3162 3162 masm.testptr(tmpReg, tmpReg) ; // Locked ?
3163 3163 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3164 3164 }
3165 3165
3166 3166 // Appears unlocked - try to swing _owner from null to non-null.
3167 3167 // Use either "Self" (in scr) or rsp as thread identity in _owner.
3168 3168 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
3169 3169 masm.get_thread (scrReg) ;
3170 3170 if (os::is_MP()) { masm.lock(); }
3171 3171 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3172 3172
3173 3173 // If the CAS fails we can either retry or pass control to the slow-path.
3174 3174 // We use the latter tactic.
3175 3175 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3176 3176 // If the CAS was successful ...
3177 3177 // Self has acquired the lock
3178 3178 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3179 3179 // Intentional fall-through into DONE_LABEL ...
3180 3180 }
3181 3181
3182 3182 // DONE_LABEL is a hot target - we'd really like to place it at the
3183 3183 // start of cache line by padding with NOPs.
3184 3184 // See the AMD and Intel software optimization manuals for the
3185 3185 // most efficient "long" NOP encodings.
3186 3186 // Unfortunately none of our alignment mechanisms suffice.
3187 3187 masm.bind(DONE_LABEL);
3188 3188
3189 3189 // Avoid branch-to-branch on AMD processors
3190 3190 // This appears to be superstition.
3191 3191 if (EmitSync & 32) masm.nop() ;
3192 3192
3193 3193
3194 3194 // At DONE_LABEL the icc ZFlag is set as follows ...
3195 3195 // Fast_Unlock uses the same protocol.
3196 3196 // ZFlag == 1 -> Success
3197 3197 // ZFlag == 0 -> Failure - force control through the slow-path
3198 3198 }
3199 3199 %}
3200 3200
3201 3201 // obj: object to unlock
3202 3202 // box: box address (displaced header location), killed. Must be EAX.
3203 3203 // rbx,: killed tmp; cannot be obj nor box.
3204 3204 //
3205 3205 // Some commentary on balanced locking:
3206 3206 //
3207 3207 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3208 3208 // Methods that don't have provably balanced locking are forced to run in the
3209 3209 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3210 3210 // The interpreter provides two properties:
3211 3211 // I1: At return-time the interpreter automatically and quietly unlocks any
3212 3212 // objects acquired the current activation (frame). Recall that the
3213 3213 // interpreter maintains an on-stack list of locks currently held by
3214 3214 // a frame.
3215 3215 // I2: If a method attempts to unlock an object that is not held by the
3216 3216 // the frame the interpreter throws IMSX.
3217 3217 //
3218 3218 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3219 3219 // B() doesn't have provably balanced locking so it runs in the interpreter.
3220 3220 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3221 3221 // is still locked by A().
3222 3222 //
3223 3223 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3224 3224 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3225 3225 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3226 3226 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3227 3227
3228 3228 enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3229 3229
3230 3230 Register objReg = as_Register($obj$$reg);
3231 3231 Register boxReg = as_Register($box$$reg);
3232 3232 Register tmpReg = as_Register($tmp$$reg);
3233 3233
3234 3234 guarantee (objReg != boxReg, "") ;
3235 3235 guarantee (objReg != tmpReg, "") ;
3236 3236 guarantee (boxReg != tmpReg, "") ;
3237 3237 guarantee (boxReg == as_Register(EAX_enc), "") ;
3238 3238 MacroAssembler masm(&cbuf);
3239 3239
3240 3240 if (EmitSync & 4) {
3241 3241 // Disable - inhibit all inlining. Force control through the slow-path
3242 3242 masm.cmpptr (rsp, 0) ;
3243 3243 } else
3244 3244 if (EmitSync & 8) {
3245 3245 Label DONE_LABEL ;
3246 3246 if (UseBiasedLocking) {
3247 3247 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3248 3248 }
3249 3249 // classic stack-locking code ...
3250 3250 masm.movptr(tmpReg, Address(boxReg, 0)) ;
3251 3251 masm.testptr(tmpReg, tmpReg) ;
3252 3252 masm.jcc (Assembler::zero, DONE_LABEL) ;
3253 3253 if (os::is_MP()) { masm.lock(); }
3254 3254 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3255 3255 masm.bind(DONE_LABEL);
3256 3256 } else {
3257 3257 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3258 3258
3259 3259 // Critically, the biased locking test must have precedence over
3260 3260 // and appear before the (box->dhw == 0) recursive stack-lock test.
3261 3261 if (UseBiasedLocking && !UseOptoBiasInlining) {
3262 3262 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3263 3263 }
3264 3264
3265 3265 masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header
3266 3266 masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3267 3267 masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock
3268 3268
3269 3269 masm.testptr(tmpReg, 0x02) ; // Inflated?
3270 3270 masm.jccb (Assembler::zero, Stacked) ;
3271 3271
3272 3272 masm.bind (Inflated) ;
3273 3273 // It's inflated.
3274 3274 // Despite our balanced locking property we still check that m->_owner == Self
3275 3275 // as java routines or native JNI code called by this thread might
3276 3276 // have released the lock.
3277 3277 // Refer to the comments in synchronizer.cpp for how we might encode extra
3278 3278 // state in _succ so we can avoid fetching EntryList|cxq.
3279 3279 //
3280 3280 // I'd like to add more cases in fast_lock() and fast_unlock() --
3281 3281 // such as recursive enter and exit -- but we have to be wary of
3282 3282 // I$ bloat, T$ effects and BP$ effects.
3283 3283 //
3284 3284 // If there's no contention try a 1-0 exit. That is, exit without
3285 3285 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3286 3286 // we detect and recover from the race that the 1-0 exit admits.
3287 3287 //
3288 3288 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3289 3289 // before it STs null into _owner, releasing the lock. Updates
3290 3290 // to data protected by the critical section must be visible before
3291 3291 // we drop the lock (and thus before any other thread could acquire
3292 3292 // the lock and observe the fields protected by the lock).
3293 3293 // IA32's memory-model is SPO, so STs are ordered with respect to
3294 3294 // each other and there's no need for an explicit barrier (fence).
3295 3295 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3296 3296
3297 3297 masm.get_thread (boxReg) ;
3298 3298 if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3299 3299 // prefetchw [ebx + Offset(_owner)-2]
3300 3300 masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3301 3301 }
3302 3302
3303 3303 // Note that we could employ various encoding schemes to reduce
3304 3304 // the number of loads below (currently 4) to just 2 or 3.
3305 3305 // Refer to the comments in synchronizer.cpp.
3306 3306 // In practice the chain of fetches doesn't seem to impact performance, however.
3307 3307 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3308 3308 // Attempt to reduce branch density - AMD's branch predictor.
3309 3309 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3310 3310 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3311 3311 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3312 3312 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3313 3313 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3314 3314 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3315 3315 masm.jmpb (DONE_LABEL) ;
3316 3316 } else {
3317 3317 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3318 3318 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3319 3319 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3320 3320 masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3321 3321 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3322 3322 masm.jccb (Assembler::notZero, CheckSucc) ;
3323 3323 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3324 3324 masm.jmpb (DONE_LABEL) ;
3325 3325 }
3326 3326
3327 3327 // The Following code fragment (EmitSync & 65536) improves the performance of
3328 3328 // contended applications and contended synchronization microbenchmarks.
3329 3329 // Unfortunately the emission of the code - even though not executed - causes regressions
3330 3330 // in scimark and jetstream, evidently because of $ effects. Replacing the code
3331 3331 // with an equal number of never-executed NOPs results in the same regression.
3332 3332 // We leave it off by default.
3333 3333
3334 3334 if ((EmitSync & 65536) != 0) {
3335 3335 Label LSuccess, LGoSlowPath ;
3336 3336
3337 3337 masm.bind (CheckSucc) ;
3338 3338
3339 3339 // Optional pre-test ... it's safe to elide this
3340 3340 if ((EmitSync & 16) == 0) {
3341 3341 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3342 3342 masm.jccb (Assembler::zero, LGoSlowPath) ;
3343 3343 }
3344 3344
3345 3345 // We have a classic Dekker-style idiom:
3346 3346 // ST m->_owner = 0 ; MEMBAR; LD m->_succ
3347 3347 // There are a number of ways to implement the barrier:
3348 3348 // (1) lock:andl &m->_owner, 0
3349 3349 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3350 3350 // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3351 3351 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3352 3352 // (2) If supported, an explicit MFENCE is appealing.
3353 3353 // In older IA32 processors MFENCE is slower than lock:add or xchg
3354 3354 // particularly if the write-buffer is full as might be the case if
3355 3355 // if stores closely precede the fence or fence-equivalent instruction.
3356 3356 // In more modern implementations MFENCE appears faster, however.
3357 3357 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3358 3358 // The $lines underlying the top-of-stack should be in M-state.
3359 3359 // The locked add instruction is serializing, of course.
3360 3360 // (4) Use xchg, which is serializing
3361 3361 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3362 3362 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3363 3363 // The integer condition codes will tell us if succ was 0.
3364 3364 // Since _succ and _owner should reside in the same $line and
3365 3365 // we just stored into _owner, it's likely that the $line
3366 3366 // remains in M-state for the lock:orl.
3367 3367 //
3368 3368 // We currently use (3), although it's likely that switching to (2)
3369 3369 // is correct for the future.
3370 3370
3371 3371 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3372 3372 if (os::is_MP()) {
3373 3373 if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
3374 3374 masm.mfence();
3375 3375 } else {
3376 3376 masm.lock () ; masm.addptr(Address(rsp, 0), 0) ;
3377 3377 }
3378 3378 }
3379 3379 // Ratify _succ remains non-null
3380 3380 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3381 3381 masm.jccb (Assembler::notZero, LSuccess) ;
3382 3382
3383 3383 masm.xorptr(boxReg, boxReg) ; // box is really EAX
3384 3384 if (os::is_MP()) { masm.lock(); }
3385 3385 masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3386 3386 masm.jccb (Assembler::notEqual, LSuccess) ;
3387 3387 // Since we're low on registers we installed rsp as a placeholding in _owner.
3388 3388 // Now install Self over rsp. This is safe as we're transitioning from
3389 3389 // non-null to non=null
3390 3390 masm.get_thread (boxReg) ;
3391 3391 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3392 3392 // Intentional fall-through into LGoSlowPath ...
3393 3393
3394 3394 masm.bind (LGoSlowPath) ;
3395 3395 masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure
3396 3396 masm.jmpb (DONE_LABEL) ;
3397 3397
3398 3398 masm.bind (LSuccess) ;
3399 3399 masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success
3400 3400 masm.jmpb (DONE_LABEL) ;
3401 3401 }
3402 3402
3403 3403 masm.bind (Stacked) ;
3404 3404 // It's not inflated and it's not recursively stack-locked and it's not biased.
3405 3405 // It must be stack-locked.
3406 3406 // Try to reset the header to displaced header.
3407 3407 // The "box" value on the stack is stable, so we can reload
3408 3408 // and be assured we observe the same value as above.
3409 3409 masm.movptr(tmpReg, Address(boxReg, 0)) ;
3410 3410 if (os::is_MP()) { masm.lock(); }
3411 3411 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3412 3412 // Intention fall-thru into DONE_LABEL
3413 3413
3414 3414
3415 3415 // DONE_LABEL is a hot target - we'd really like to place it at the
3416 3416 // start of cache line by padding with NOPs.
3417 3417 // See the AMD and Intel software optimization manuals for the
3418 3418 // most efficient "long" NOP encodings.
3419 3419 // Unfortunately none of our alignment mechanisms suffice.
3420 3420 if ((EmitSync & 65536) == 0) {
3421 3421 masm.bind (CheckSucc) ;
3422 3422 }
3423 3423 masm.bind(DONE_LABEL);
3424 3424
3425 3425 // Avoid branch to branch on AMD processors
3426 3426 if (EmitSync & 32768) { masm.nop() ; }
3427 3427 }
3428 3428 %}
3429 3429
3430 3430
3431 3431 enc_class enc_pop_rdx() %{
3432 3432 emit_opcode(cbuf,0x5A);
3433 3433 %}
3434 3434
3435 3435 enc_class enc_rethrow() %{
3436 3436 cbuf.set_insts_mark();
3437 3437 emit_opcode(cbuf, 0xE9); // jmp entry
3438 3438 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3439 3439 runtime_call_Relocation::spec(), RELOC_IMM32 );
3440 3440 %}
3441 3441
3442 3442
3443 3443 // Convert a double to an int. Java semantics require we do complex
3444 3444 // manglelations in the corner cases. So we set the rounding mode to
3445 3445 // 'zero', store the darned double down as an int, and reset the
3446 3446 // rounding mode to 'nearest'. The hardware throws an exception which
3447 3447 // patches up the correct value directly to the stack.
3448 3448 enc_class DPR2I_encoding( regDPR src ) %{
3449 3449 // Flip to round-to-zero mode. We attempted to allow invalid-op
3450 3450 // exceptions here, so that a NAN or other corner-case value will
3451 3451 // thrown an exception (but normal values get converted at full speed).
3452 3452 // However, I2C adapters and other float-stack manglers leave pending
3453 3453 // invalid-op exceptions hanging. We would have to clear them before
3454 3454 // enabling them and that is more expensive than just testing for the
3455 3455 // invalid value Intel stores down in the corner cases.
3456 3456 emit_opcode(cbuf,0xD9); // FLDCW trunc
3457 3457 emit_opcode(cbuf,0x2D);
3458 3458 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3459 3459 // Allocate a word
3460 3460 emit_opcode(cbuf,0x83); // SUB ESP,4
3461 3461 emit_opcode(cbuf,0xEC);
3462 3462 emit_d8(cbuf,0x04);
3463 3463 // Encoding assumes a double has been pushed into FPR0.
3464 3464 // Store down the double as an int, popping the FPU stack
3465 3465 emit_opcode(cbuf,0xDB); // FISTP [ESP]
3466 3466 emit_opcode(cbuf,0x1C);
3467 3467 emit_d8(cbuf,0x24);
3468 3468 // Restore the rounding mode; mask the exception
3469 3469 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3470 3470 emit_opcode(cbuf,0x2D);
3471 3471 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3472 3472 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3473 3473 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3474 3474
3475 3475 // Load the converted int; adjust CPU stack
3476 3476 emit_opcode(cbuf,0x58); // POP EAX
3477 3477 emit_opcode(cbuf,0x3D); // CMP EAX,imm
3478 3478 emit_d32 (cbuf,0x80000000); // 0x80000000
3479 3479 emit_opcode(cbuf,0x75); // JNE around_slow_call
3480 3480 emit_d8 (cbuf,0x07); // Size of slow_call
3481 3481 // Push src onto stack slow-path
3482 3482 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3483 3483 emit_d8 (cbuf,0xC0-1+$src$$reg );
3484 3484 // CALL directly to the runtime
3485 3485 cbuf.set_insts_mark();
3486 3486 emit_opcode(cbuf,0xE8); // Call into runtime
3487 3487 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3488 3488 // Carry on here...
3489 3489 %}
3490 3490
3491 3491 enc_class DPR2L_encoding( regDPR src ) %{
3492 3492 emit_opcode(cbuf,0xD9); // FLDCW trunc
3493 3493 emit_opcode(cbuf,0x2D);
3494 3494 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3495 3495 // Allocate a word
3496 3496 emit_opcode(cbuf,0x83); // SUB ESP,8
3497 3497 emit_opcode(cbuf,0xEC);
3498 3498 emit_d8(cbuf,0x08);
3499 3499 // Encoding assumes a double has been pushed into FPR0.
3500 3500 // Store down the double as a long, popping the FPU stack
3501 3501 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3502 3502 emit_opcode(cbuf,0x3C);
3503 3503 emit_d8(cbuf,0x24);
3504 3504 // Restore the rounding mode; mask the exception
3505 3505 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3506 3506 emit_opcode(cbuf,0x2D);
3507 3507 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3508 3508 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3509 3509 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3510 3510
3511 3511 // Load the converted int; adjust CPU stack
3512 3512 emit_opcode(cbuf,0x58); // POP EAX
3513 3513 emit_opcode(cbuf,0x5A); // POP EDX
3514 3514 emit_opcode(cbuf,0x81); // CMP EDX,imm
3515 3515 emit_d8 (cbuf,0xFA); // rdx
3516 3516 emit_d32 (cbuf,0x80000000); // 0x80000000
3517 3517 emit_opcode(cbuf,0x75); // JNE around_slow_call
3518 3518 emit_d8 (cbuf,0x07+4); // Size of slow_call
3519 3519 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3520 3520 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3521 3521 emit_opcode(cbuf,0x75); // JNE around_slow_call
3522 3522 emit_d8 (cbuf,0x07); // Size of slow_call
3523 3523 // Push src onto stack slow-path
3524 3524 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3525 3525 emit_d8 (cbuf,0xC0-1+$src$$reg );
3526 3526 // CALL directly to the runtime
3527 3527 cbuf.set_insts_mark();
3528 3528 emit_opcode(cbuf,0xE8); // Call into runtime
3529 3529 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3530 3530 // Carry on here...
3531 3531 %}
3532 3532
3533 3533 enc_class FMul_ST_reg( eRegFPR src1 ) %{
3534 3534 // Operand was loaded from memory into fp ST (stack top)
3535 3535 // FMUL ST,$src /* D8 C8+i */
3536 3536 emit_opcode(cbuf, 0xD8);
3537 3537 emit_opcode(cbuf, 0xC8 + $src1$$reg);
3538 3538 %}
3539 3539
3540 3540 enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3541 3541 // FADDP ST,src2 /* D8 C0+i */
3542 3542 emit_opcode(cbuf, 0xD8);
3543 3543 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3544 3544 //could use FADDP src2,fpST /* DE C0+i */
3545 3545 %}
3546 3546
3547 3547 enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3548 3548 // FADDP src2,ST /* DE C0+i */
3549 3549 emit_opcode(cbuf, 0xDE);
3550 3550 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3551 3551 %}
3552 3552
3553 3553 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3554 3554 // Operand has been loaded into fp ST (stack top)
3555 3555 // FSUB ST,$src1
3556 3556 emit_opcode(cbuf, 0xD8);
3557 3557 emit_opcode(cbuf, 0xE0 + $src1$$reg);
3558 3558
3559 3559 // FDIV
3560 3560 emit_opcode(cbuf, 0xD8);
3561 3561 emit_opcode(cbuf, 0xF0 + $src2$$reg);
3562 3562 %}
3563 3563
3564 3564 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3565 3565 // Operand was loaded from memory into fp ST (stack top)
3566 3566 // FADD ST,$src /* D8 C0+i */
3567 3567 emit_opcode(cbuf, 0xD8);
3568 3568 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3569 3569
3570 3570 // FMUL ST,src2 /* D8 C*+i */
3571 3571 emit_opcode(cbuf, 0xD8);
3572 3572 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3573 3573 %}
3574 3574
3575 3575
3576 3576 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3577 3577 // Operand was loaded from memory into fp ST (stack top)
3578 3578 // FADD ST,$src /* D8 C0+i */
3579 3579 emit_opcode(cbuf, 0xD8);
3580 3580 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3581 3581
3582 3582 // FMULP src2,ST /* DE C8+i */
3583 3583 emit_opcode(cbuf, 0xDE);
3584 3584 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3585 3585 %}
3586 3586
3587 3587 // Atomically load the volatile long
3588 3588 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3589 3589 emit_opcode(cbuf,0xDF);
3590 3590 int rm_byte_opcode = 0x05;
3591 3591 int base = $mem$$base;
3592 3592 int index = $mem$$index;
3593 3593 int scale = $mem$$scale;
3594 3594 int displace = $mem$$disp;
3595 3595 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
3596 3596 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
3597 3597 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3598 3598 %}
3599 3599
3600 3600 // Volatile Store Long. Must be atomic, so move it into
3601 3601 // the FP TOS and then do a 64-bit FIST. Has to probe the
3602 3602 // target address before the store (for null-ptr checks)
3603 3603 // so the memory operand is used twice in the encoding.
3604 3604 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3605 3605 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3606 3606 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop
3607 3607 emit_opcode(cbuf,0xDF);
3608 3608 int rm_byte_opcode = 0x07;
3609 3609 int base = $mem$$base;
3610 3610 int index = $mem$$index;
3611 3611 int scale = $mem$$scale;
3612 3612 int displace = $mem$$disp;
3613 3613 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
3614 3614 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
3615 3615 %}
3616 3616
3617 3617 // Safepoint Poll. This polls the safepoint page, and causes an
3618 3618 // exception if it is not readable. Unfortunately, it kills the condition code
3619 3619 // in the process
3620 3620 // We current use TESTL [spp],EDI
3621 3621 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3622 3622
3623 3623 enc_class Safepoint_Poll() %{
3624 3624 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3625 3625 emit_opcode(cbuf,0x85);
3626 3626 emit_rm (cbuf, 0x0, 0x7, 0x5);
3627 3627 emit_d32(cbuf, (intptr_t)os::get_polling_page());
3628 3628 %}
3629 3629 %}
3630 3630
3631 3631
3632 3632 //----------FRAME--------------------------------------------------------------
3633 3633 // Definition of frame structure and management information.
3634 3634 //
3635 3635 // S T A C K L A Y O U T Allocators stack-slot number
3636 3636 // | (to get allocators register number
3637 3637 // G Owned by | | v add OptoReg::stack0())
3638 3638 // r CALLER | |
3639 3639 // o | +--------+ pad to even-align allocators stack-slot
3640 3640 // w V | pad0 | numbers; owned by CALLER
3641 3641 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
3642 3642 // h ^ | in | 5
3643 3643 // | | args | 4 Holes in incoming args owned by SELF
3644 3644 // | | | | 3
3645 3645 // | | +--------+
3646 3646 // V | | old out| Empty on Intel, window on Sparc
3647 3647 // | old |preserve| Must be even aligned.
3648 3648 // | SP-+--------+----> Matcher::_old_SP, even aligned
3649 3649 // | | in | 3 area for Intel ret address
3650 3650 // Owned by |preserve| Empty on Sparc.
3651 3651 // SELF +--------+
3652 3652 // | | pad2 | 2 pad to align old SP
3653 3653 // | +--------+ 1
3654 3654 // | | locks | 0
3655 3655 // | +--------+----> OptoReg::stack0(), even aligned
3656 3656 // | | pad1 | 11 pad to align new SP
3657 3657 // | +--------+
3658 3658 // | | | 10
3659 3659 // | | spills | 9 spills
3660 3660 // V | | 8 (pad0 slot for callee)
3661 3661 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
3662 3662 // ^ | out | 7
3663 3663 // | | args | 6 Holes in outgoing args owned by CALLEE
3664 3664 // Owned by +--------+
3665 3665 // CALLEE | new out| 6 Empty on Intel, window on Sparc
3666 3666 // | new |preserve| Must be even-aligned.
3667 3667 // | SP-+--------+----> Matcher::_new_SP, even aligned
3668 3668 // | | |
3669 3669 //
3670 3670 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
3671 3671 // known from SELF's arguments and the Java calling convention.
3672 3672 // Region 6-7 is determined per call site.
3673 3673 // Note 2: If the calling convention leaves holes in the incoming argument
3674 3674 // area, those holes are owned by SELF. Holes in the outgoing area
3675 3675 // are owned by the CALLEE. Holes should not be nessecary in the
3676 3676 // incoming area, as the Java calling convention is completely under
3677 3677 // the control of the AD file. Doubles can be sorted and packed to
3678 3678 // avoid holes. Holes in the outgoing arguments may be nessecary for
3679 3679 // varargs C calling conventions.
3680 3680 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
3681 3681 // even aligned with pad0 as needed.
3682 3682 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
3683 3683 // region 6-11 is even aligned; it may be padded out more so that
3684 3684 // the region from SP to FP meets the minimum stack alignment.
3685 3685
3686 3686 frame %{
3687 3687 // What direction does stack grow in (assumed to be same for C & Java)
3688 3688 stack_direction(TOWARDS_LOW);
3689 3689
3690 3690 // These three registers define part of the calling convention
3691 3691 // between compiled code and the interpreter.
3692 3692 inline_cache_reg(EAX); // Inline Cache Register
3693 3693 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter
3694 3694
3695 3695 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3696 3696 cisc_spilling_operand_name(indOffset32);
3697 3697
3698 3698 // Number of stack slots consumed by locking an object
3699 3699 sync_stack_slots(1);
3700 3700
3701 3701 // Compiled code's Frame Pointer
3702 3702 frame_pointer(ESP);
3703 3703 // Interpreter stores its frame pointer in a register which is
3704 3704 // stored to the stack by I2CAdaptors.
3705 3705 // I2CAdaptors convert from interpreted java to compiled java.
3706 3706 interpreter_frame_pointer(EBP);
3707 3707
3708 3708 // Stack alignment requirement
3709 3709 // Alignment size in bytes (128-bit -> 16 bytes)
3710 3710 stack_alignment(StackAlignmentInBytes);
3711 3711
3712 3712 // Number of stack slots between incoming argument block and the start of
3713 3713 // a new frame. The PROLOG must add this many slots to the stack. The
3714 3714 // EPILOG must remove this many slots. Intel needs one slot for
3715 3715 // return address and one for rbp, (must save rbp)
3716 3716 in_preserve_stack_slots(2+VerifyStackAtCalls);
3717 3717
3718 3718 // Number of outgoing stack slots killed above the out_preserve_stack_slots
3719 3719 // for calls to C. Supports the var-args backing area for register parms.
3720 3720 varargs_C_out_slots_killed(0);
3721 3721
3722 3722 // The after-PROLOG location of the return address. Location of
3723 3723 // return address specifies a type (REG or STACK) and a number
3724 3724 // representing the register number (i.e. - use a register name) or
3725 3725 // stack slot.
3726 3726 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3727 3727 // Otherwise, it is above the locks and verification slot and alignment word
3728 3728 return_addr(STACK - 1 +
3729 3729 round_to((Compile::current()->in_preserve_stack_slots() +
3730 3730 Compile::current()->fixed_slots()),
3731 3731 stack_alignment_in_slots()));
3732 3732
3733 3733 // Body of function which returns an integer array locating
3734 3734 // arguments either in registers or in stack slots. Passed an array
3735 3735 // of ideal registers called "sig" and a "length" count. Stack-slot
3736 3736 // offsets are based on outgoing arguments, i.e. a CALLER setting up
3737 3737 // arguments for a CALLEE. Incoming stack arguments are
3738 3738 // automatically biased by the preserve_stack_slots field above.
3739 3739 calling_convention %{
3740 3740 // No difference between ingoing/outgoing just pass false
3741 3741 SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3742 3742 %}
3743 3743
3744 3744
3745 3745 // Body of function which returns an integer array locating
3746 3746 // arguments either in registers or in stack slots. Passed an array
3747 3747 // of ideal registers called "sig" and a "length" count. Stack-slot
3748 3748 // offsets are based on outgoing arguments, i.e. a CALLER setting up
3749 3749 // arguments for a CALLEE. Incoming stack arguments are
3750 3750 // automatically biased by the preserve_stack_slots field above.
3751 3751 c_calling_convention %{
3752 3752 // This is obviously always outgoing
3753 3753 (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
3754 3754 %}
3755 3755
3756 3756 // Location of C & interpreter return values
3757 3757 c_return_value %{
3758 3758 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3759 3759 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3760 3760 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3761 3761
3762 3762 // in SSE2+ mode we want to keep the FPU stack clean so pretend
3763 3763 // that C functions return float and double results in XMM0.
3764 3764 if( ideal_reg == Op_RegD && UseSSE>=2 )
3765 3765 return OptoRegPair(XMM0b_num,XMM0a_num);
3766 3766 if( ideal_reg == Op_RegF && UseSSE>=2 )
3767 3767 return OptoRegPair(OptoReg::Bad,XMM0a_num);
3768 3768
3769 3769 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3770 3770 %}
3771 3771
3772 3772 // Location of return values
3773 3773 return_value %{
3774 3774 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3775 3775 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3776 3776 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3777 3777 if( ideal_reg == Op_RegD && UseSSE>=2 )
3778 3778 return OptoRegPair(XMM0b_num,XMM0a_num);
3779 3779 if( ideal_reg == Op_RegF && UseSSE>=1 )
3780 3780 return OptoRegPair(OptoReg::Bad,XMM0a_num);
3781 3781 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3782 3782 %}
3783 3783
3784 3784 %}
3785 3785
3786 3786 //----------ATTRIBUTES---------------------------------------------------------
3787 3787 //----------Operand Attributes-------------------------------------------------
3788 3788 op_attrib op_cost(0); // Required cost attribute
3789 3789
3790 3790 //----------Instruction Attributes---------------------------------------------
3791 3791 ins_attrib ins_cost(100); // Required cost attribute
3792 3792 ins_attrib ins_size(8); // Required size attribute (in bits)
3793 3793 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3794 3794 // non-matching short branch variant of some
3795 3795 // long branch?
3796 3796 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2)
3797 3797 // specifies the alignment that some part of the instruction (not
3798 3798 // necessarily the start) requires. If > 1, a compute_padding()
3799 3799 // function must be provided for the instruction
3800 3800
3801 3801 //----------OPERANDS-----------------------------------------------------------
3802 3802 // Operand definitions must precede instruction definitions for correct parsing
3803 3803 // in the ADLC because operands constitute user defined types which are used in
3804 3804 // instruction definitions.
3805 3805
3806 3806 //----------Simple Operands----------------------------------------------------
3807 3807 // Immediate Operands
3808 3808 // Integer Immediate
3809 3809 operand immI() %{
3810 3810 match(ConI);
3811 3811
3812 3812 op_cost(10);
3813 3813 format %{ %}
3814 3814 interface(CONST_INTER);
3815 3815 %}
3816 3816
3817 3817 // Constant for test vs zero
3818 3818 operand immI0() %{
3819 3819 predicate(n->get_int() == 0);
3820 3820 match(ConI);
3821 3821
3822 3822 op_cost(0);
3823 3823 format %{ %}
3824 3824 interface(CONST_INTER);
3825 3825 %}
3826 3826
3827 3827 // Constant for increment
3828 3828 operand immI1() %{
3829 3829 predicate(n->get_int() == 1);
3830 3830 match(ConI);
3831 3831
3832 3832 op_cost(0);
3833 3833 format %{ %}
3834 3834 interface(CONST_INTER);
3835 3835 %}
3836 3836
3837 3837 // Constant for decrement
3838 3838 operand immI_M1() %{
3839 3839 predicate(n->get_int() == -1);
3840 3840 match(ConI);
3841 3841
3842 3842 op_cost(0);
3843 3843 format %{ %}
3844 3844 interface(CONST_INTER);
3845 3845 %}
3846 3846
3847 3847 // Valid scale values for addressing modes
3848 3848 operand immI2() %{
3849 3849 predicate(0 <= n->get_int() && (n->get_int() <= 3));
3850 3850 match(ConI);
3851 3851
3852 3852 format %{ %}
3853 3853 interface(CONST_INTER);
3854 3854 %}
3855 3855
3856 3856 operand immI8() %{
3857 3857 predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3858 3858 match(ConI);
3859 3859
3860 3860 op_cost(5);
3861 3861 format %{ %}
3862 3862 interface(CONST_INTER);
3863 3863 %}
3864 3864
3865 3865 operand immI16() %{
3866 3866 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3867 3867 match(ConI);
3868 3868
3869 3869 op_cost(10);
3870 3870 format %{ %}
3871 3871 interface(CONST_INTER);
3872 3872 %}
3873 3873
3874 3874 // Constant for long shifts
3875 3875 operand immI_32() %{
3876 3876 predicate( n->get_int() == 32 );
3877 3877 match(ConI);
3878 3878
3879 3879 op_cost(0);
3880 3880 format %{ %}
3881 3881 interface(CONST_INTER);
3882 3882 %}
3883 3883
3884 3884 operand immI_1_31() %{
3885 3885 predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3886 3886 match(ConI);
3887 3887
3888 3888 op_cost(0);
3889 3889 format %{ %}
3890 3890 interface(CONST_INTER);
3891 3891 %}
3892 3892
3893 3893 operand immI_32_63() %{
3894 3894 predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3895 3895 match(ConI);
3896 3896 op_cost(0);
3897 3897
3898 3898 format %{ %}
3899 3899 interface(CONST_INTER);
3900 3900 %}
3901 3901
3902 3902 operand immI_1() %{
3903 3903 predicate( n->get_int() == 1 );
3904 3904 match(ConI);
3905 3905
3906 3906 op_cost(0);
3907 3907 format %{ %}
3908 3908 interface(CONST_INTER);
3909 3909 %}
3910 3910
3911 3911 operand immI_2() %{
3912 3912 predicate( n->get_int() == 2 );
3913 3913 match(ConI);
3914 3914
3915 3915 op_cost(0);
3916 3916 format %{ %}
3917 3917 interface(CONST_INTER);
3918 3918 %}
3919 3919
3920 3920 operand immI_3() %{
3921 3921 predicate( n->get_int() == 3 );
3922 3922 match(ConI);
3923 3923
3924 3924 op_cost(0);
3925 3925 format %{ %}
3926 3926 interface(CONST_INTER);
3927 3927 %}
3928 3928
3929 3929 // Pointer Immediate
3930 3930 operand immP() %{
3931 3931 match(ConP);
3932 3932
3933 3933 op_cost(10);
3934 3934 format %{ %}
3935 3935 interface(CONST_INTER);
3936 3936 %}
3937 3937
3938 3938 // NULL Pointer Immediate
3939 3939 operand immP0() %{
3940 3940 predicate( n->get_ptr() == 0 );
3941 3941 match(ConP);
3942 3942 op_cost(0);
3943 3943
3944 3944 format %{ %}
3945 3945 interface(CONST_INTER);
3946 3946 %}
3947 3947
3948 3948 // Long Immediate
3949 3949 operand immL() %{
3950 3950 match(ConL);
3951 3951
3952 3952 op_cost(20);
3953 3953 format %{ %}
3954 3954 interface(CONST_INTER);
3955 3955 %}
3956 3956
3957 3957 // Long Immediate zero
3958 3958 operand immL0() %{
3959 3959 predicate( n->get_long() == 0L );
3960 3960 match(ConL);
3961 3961 op_cost(0);
3962 3962
3963 3963 format %{ %}
3964 3964 interface(CONST_INTER);
3965 3965 %}
3966 3966
3967 3967 // Long Immediate zero
3968 3968 operand immL_M1() %{
3969 3969 predicate( n->get_long() == -1L );
3970 3970 match(ConL);
3971 3971 op_cost(0);
3972 3972
3973 3973 format %{ %}
3974 3974 interface(CONST_INTER);
3975 3975 %}
3976 3976
3977 3977 // Long immediate from 0 to 127.
3978 3978 // Used for a shorter form of long mul by 10.
3979 3979 operand immL_127() %{
3980 3980 predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3981 3981 match(ConL);
3982 3982 op_cost(0);
3983 3983
3984 3984 format %{ %}
3985 3985 interface(CONST_INTER);
3986 3986 %}
3987 3987
3988 3988 // Long Immediate: low 32-bit mask
3989 3989 operand immL_32bits() %{
3990 3990 predicate(n->get_long() == 0xFFFFFFFFL);
3991 3991 match(ConL);
3992 3992 op_cost(0);
3993 3993
3994 3994 format %{ %}
3995 3995 interface(CONST_INTER);
3996 3996 %}
3997 3997
3998 3998 // Long Immediate: low 32-bit mask
3999 3999 operand immL32() %{
4000 4000 predicate(n->get_long() == (int)(n->get_long()));
4001 4001 match(ConL);
4002 4002 op_cost(20);
4003 4003
4004 4004 format %{ %}
4005 4005 interface(CONST_INTER);
4006 4006 %}
4007 4007
4008 4008 //Double Immediate zero
4009 4009 operand immDPR0() %{
4010 4010 // Do additional (and counter-intuitive) test against NaN to work around VC++
4011 4011 // bug that generates code such that NaNs compare equal to 0.0
4012 4012 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4013 4013 match(ConD);
4014 4014
4015 4015 op_cost(5);
4016 4016 format %{ %}
4017 4017 interface(CONST_INTER);
4018 4018 %}
4019 4019
4020 4020 // Double Immediate one
4021 4021 operand immDPR1() %{
4022 4022 predicate( UseSSE<=1 && n->getd() == 1.0 );
4023 4023 match(ConD);
4024 4024
4025 4025 op_cost(5);
4026 4026 format %{ %}
4027 4027 interface(CONST_INTER);
4028 4028 %}
4029 4029
4030 4030 // Double Immediate
4031 4031 operand immDPR() %{
4032 4032 predicate(UseSSE<=1);
4033 4033 match(ConD);
4034 4034
4035 4035 op_cost(5);
4036 4036 format %{ %}
4037 4037 interface(CONST_INTER);
4038 4038 %}
4039 4039
4040 4040 operand immD() %{
4041 4041 predicate(UseSSE>=2);
4042 4042 match(ConD);
4043 4043
4044 4044 op_cost(5);
4045 4045 format %{ %}
4046 4046 interface(CONST_INTER);
4047 4047 %}
4048 4048
4049 4049 // Double Immediate zero
4050 4050 operand immD0() %{
4051 4051 // Do additional (and counter-intuitive) test against NaN to work around VC++
4052 4052 // bug that generates code such that NaNs compare equal to 0.0 AND do not
4053 4053 // compare equal to -0.0.
4054 4054 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4055 4055 match(ConD);
4056 4056
4057 4057 format %{ %}
4058 4058 interface(CONST_INTER);
4059 4059 %}
4060 4060
4061 4061 // Float Immediate zero
4062 4062 operand immFPR0() %{
4063 4063 predicate(UseSSE == 0 && n->getf() == 0.0F);
4064 4064 match(ConF);
4065 4065
4066 4066 op_cost(5);
4067 4067 format %{ %}
4068 4068 interface(CONST_INTER);
4069 4069 %}
4070 4070
4071 4071 // Float Immediate one
4072 4072 operand immFPR1() %{
4073 4073 predicate(UseSSE == 0 && n->getf() == 1.0F);
4074 4074 match(ConF);
4075 4075
4076 4076 op_cost(5);
4077 4077 format %{ %}
4078 4078 interface(CONST_INTER);
4079 4079 %}
4080 4080
4081 4081 // Float Immediate
4082 4082 operand immFPR() %{
4083 4083 predicate( UseSSE == 0 );
4084 4084 match(ConF);
4085 4085
4086 4086 op_cost(5);
4087 4087 format %{ %}
4088 4088 interface(CONST_INTER);
4089 4089 %}
4090 4090
4091 4091 // Float Immediate
4092 4092 operand immF() %{
4093 4093 predicate(UseSSE >= 1);
4094 4094 match(ConF);
4095 4095
4096 4096 op_cost(5);
4097 4097 format %{ %}
4098 4098 interface(CONST_INTER);
4099 4099 %}
4100 4100
4101 4101 // Float Immediate zero. Zero and not -0.0
4102 4102 operand immF0() %{
4103 4103 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4104 4104 match(ConF);
4105 4105
4106 4106 op_cost(5);
4107 4107 format %{ %}
4108 4108 interface(CONST_INTER);
4109 4109 %}
4110 4110
4111 4111 // Immediates for special shifts (sign extend)
4112 4112
4113 4113 // Constants for increment
4114 4114 operand immI_16() %{
4115 4115 predicate( n->get_int() == 16 );
4116 4116 match(ConI);
4117 4117
4118 4118 format %{ %}
4119 4119 interface(CONST_INTER);
4120 4120 %}
4121 4121
4122 4122 operand immI_24() %{
4123 4123 predicate( n->get_int() == 24 );
4124 4124 match(ConI);
4125 4125
4126 4126 format %{ %}
4127 4127 interface(CONST_INTER);
4128 4128 %}
4129 4129
4130 4130 // Constant for byte-wide masking
4131 4131 operand immI_255() %{
4132 4132 predicate( n->get_int() == 255 );
4133 4133 match(ConI);
4134 4134
4135 4135 format %{ %}
4136 4136 interface(CONST_INTER);
4137 4137 %}
4138 4138
4139 4139 // Constant for short-wide masking
4140 4140 operand immI_65535() %{
4141 4141 predicate(n->get_int() == 65535);
4142 4142 match(ConI);
4143 4143
4144 4144 format %{ %}
4145 4145 interface(CONST_INTER);
4146 4146 %}
4147 4147
4148 4148 // Register Operands
4149 4149 // Integer Register
4150 4150 operand eRegI() %{
4151 4151 constraint(ALLOC_IN_RC(e_reg));
4152 4152 match(RegI);
4153 4153 match(xRegI);
4154 4154 match(eAXRegI);
4155 4155 match(eBXRegI);
4156 4156 match(eCXRegI);
4157 4157 match(eDXRegI);
4158 4158 match(eDIRegI);
4159 4159 match(eSIRegI);
4160 4160
4161 4161 format %{ %}
4162 4162 interface(REG_INTER);
4163 4163 %}
4164 4164
4165 4165 // Subset of Integer Register
4166 4166 operand xRegI(eRegI reg) %{
4167 4167 constraint(ALLOC_IN_RC(x_reg));
4168 4168 match(reg);
4169 4169 match(eAXRegI);
4170 4170 match(eBXRegI);
4171 4171 match(eCXRegI);
4172 4172 match(eDXRegI);
4173 4173
4174 4174 format %{ %}
4175 4175 interface(REG_INTER);
4176 4176 %}
4177 4177
4178 4178 // Special Registers
4179 4179 operand eAXRegI(xRegI reg) %{
4180 4180 constraint(ALLOC_IN_RC(eax_reg));
4181 4181 match(reg);
4182 4182 match(eRegI);
4183 4183
4184 4184 format %{ "EAX" %}
4185 4185 interface(REG_INTER);
4186 4186 %}
4187 4187
4188 4188 // Special Registers
4189 4189 operand eBXRegI(xRegI reg) %{
4190 4190 constraint(ALLOC_IN_RC(ebx_reg));
4191 4191 match(reg);
4192 4192 match(eRegI);
4193 4193
4194 4194 format %{ "EBX" %}
4195 4195 interface(REG_INTER);
4196 4196 %}
4197 4197
4198 4198 operand eCXRegI(xRegI reg) %{
4199 4199 constraint(ALLOC_IN_RC(ecx_reg));
4200 4200 match(reg);
4201 4201 match(eRegI);
4202 4202
4203 4203 format %{ "ECX" %}
4204 4204 interface(REG_INTER);
4205 4205 %}
4206 4206
4207 4207 operand eDXRegI(xRegI reg) %{
4208 4208 constraint(ALLOC_IN_RC(edx_reg));
4209 4209 match(reg);
4210 4210 match(eRegI);
4211 4211
4212 4212 format %{ "EDX" %}
4213 4213 interface(REG_INTER);
4214 4214 %}
4215 4215
4216 4216 operand eDIRegI(xRegI reg) %{
4217 4217 constraint(ALLOC_IN_RC(edi_reg));
4218 4218 match(reg);
4219 4219 match(eRegI);
4220 4220
4221 4221 format %{ "EDI" %}
4222 4222 interface(REG_INTER);
4223 4223 %}
4224 4224
4225 4225 operand naxRegI() %{
4226 4226 constraint(ALLOC_IN_RC(nax_reg));
4227 4227 match(RegI);
4228 4228 match(eCXRegI);
4229 4229 match(eDXRegI);
4230 4230 match(eSIRegI);
4231 4231 match(eDIRegI);
4232 4232
4233 4233 format %{ %}
4234 4234 interface(REG_INTER);
4235 4235 %}
4236 4236
4237 4237 operand nadxRegI() %{
4238 4238 constraint(ALLOC_IN_RC(nadx_reg));
4239 4239 match(RegI);
4240 4240 match(eBXRegI);
4241 4241 match(eCXRegI);
4242 4242 match(eSIRegI);
4243 4243 match(eDIRegI);
4244 4244
4245 4245 format %{ %}
4246 4246 interface(REG_INTER);
4247 4247 %}
4248 4248
4249 4249 operand ncxRegI() %{
4250 4250 constraint(ALLOC_IN_RC(ncx_reg));
4251 4251 match(RegI);
4252 4252 match(eAXRegI);
4253 4253 match(eDXRegI);
4254 4254 match(eSIRegI);
4255 4255 match(eDIRegI);
4256 4256
4257 4257 format %{ %}
4258 4258 interface(REG_INTER);
4259 4259 %}
4260 4260
4261 4261 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
4262 4262 // //
4263 4263 operand eSIRegI(xRegI reg) %{
4264 4264 constraint(ALLOC_IN_RC(esi_reg));
4265 4265 match(reg);
4266 4266 match(eRegI);
4267 4267
4268 4268 format %{ "ESI" %}
4269 4269 interface(REG_INTER);
4270 4270 %}
4271 4271
4272 4272 // Pointer Register
4273 4273 operand anyRegP() %{
4274 4274 constraint(ALLOC_IN_RC(any_reg));
4275 4275 match(RegP);
4276 4276 match(eAXRegP);
4277 4277 match(eBXRegP);
4278 4278 match(eCXRegP);
4279 4279 match(eDIRegP);
4280 4280 match(eRegP);
4281 4281
4282 4282 format %{ %}
4283 4283 interface(REG_INTER);
4284 4284 %}
4285 4285
4286 4286 operand eRegP() %{
4287 4287 constraint(ALLOC_IN_RC(e_reg));
4288 4288 match(RegP);
4289 4289 match(eAXRegP);
4290 4290 match(eBXRegP);
4291 4291 match(eCXRegP);
4292 4292 match(eDIRegP);
4293 4293
4294 4294 format %{ %}
4295 4295 interface(REG_INTER);
4296 4296 %}
4297 4297
4298 4298 // On windows95, EBP is not safe to use for implicit null tests.
4299 4299 operand eRegP_no_EBP() %{
4300 4300 constraint(ALLOC_IN_RC(e_reg_no_rbp));
4301 4301 match(RegP);
4302 4302 match(eAXRegP);
4303 4303 match(eBXRegP);
4304 4304 match(eCXRegP);
4305 4305 match(eDIRegP);
4306 4306
4307 4307 op_cost(100);
4308 4308 format %{ %}
4309 4309 interface(REG_INTER);
4310 4310 %}
4311 4311
4312 4312 operand naxRegP() %{
4313 4313 constraint(ALLOC_IN_RC(nax_reg));
4314 4314 match(RegP);
4315 4315 match(eBXRegP);
4316 4316 match(eDXRegP);
4317 4317 match(eCXRegP);
4318 4318 match(eSIRegP);
4319 4319 match(eDIRegP);
4320 4320
4321 4321 format %{ %}
4322 4322 interface(REG_INTER);
4323 4323 %}
4324 4324
4325 4325 operand nabxRegP() %{
4326 4326 constraint(ALLOC_IN_RC(nabx_reg));
4327 4327 match(RegP);
4328 4328 match(eCXRegP);
4329 4329 match(eDXRegP);
4330 4330 match(eSIRegP);
4331 4331 match(eDIRegP);
4332 4332
4333 4333 format %{ %}
4334 4334 interface(REG_INTER);
4335 4335 %}
4336 4336
4337 4337 operand pRegP() %{
4338 4338 constraint(ALLOC_IN_RC(p_reg));
4339 4339 match(RegP);
4340 4340 match(eBXRegP);
4341 4341 match(eDXRegP);
4342 4342 match(eSIRegP);
4343 4343 match(eDIRegP);
4344 4344
4345 4345 format %{ %}
4346 4346 interface(REG_INTER);
4347 4347 %}
4348 4348
4349 4349 // Special Registers
4350 4350 // Return a pointer value
4351 4351 operand eAXRegP(eRegP reg) %{
4352 4352 constraint(ALLOC_IN_RC(eax_reg));
4353 4353 match(reg);
4354 4354 format %{ "EAX" %}
4355 4355 interface(REG_INTER);
4356 4356 %}
4357 4357
4358 4358 // Used in AtomicAdd
4359 4359 operand eBXRegP(eRegP reg) %{
4360 4360 constraint(ALLOC_IN_RC(ebx_reg));
4361 4361 match(reg);
4362 4362 format %{ "EBX" %}
4363 4363 interface(REG_INTER);
4364 4364 %}
4365 4365
4366 4366 // Tail-call (interprocedural jump) to interpreter
4367 4367 operand eCXRegP(eRegP reg) %{
4368 4368 constraint(ALLOC_IN_RC(ecx_reg));
4369 4369 match(reg);
4370 4370 format %{ "ECX" %}
4371 4371 interface(REG_INTER);
4372 4372 %}
4373 4373
4374 4374 operand eSIRegP(eRegP reg) %{
4375 4375 constraint(ALLOC_IN_RC(esi_reg));
4376 4376 match(reg);
4377 4377 format %{ "ESI" %}
4378 4378 interface(REG_INTER);
4379 4379 %}
4380 4380
4381 4381 // Used in rep stosw
4382 4382 operand eDIRegP(eRegP reg) %{
4383 4383 constraint(ALLOC_IN_RC(edi_reg));
4384 4384 match(reg);
4385 4385 format %{ "EDI" %}
4386 4386 interface(REG_INTER);
4387 4387 %}
4388 4388
4389 4389 operand eBPRegP() %{
4390 4390 constraint(ALLOC_IN_RC(ebp_reg));
4391 4391 match(RegP);
4392 4392 format %{ "EBP" %}
4393 4393 interface(REG_INTER);
4394 4394 %}
4395 4395
4396 4396 operand eRegL() %{
4397 4397 constraint(ALLOC_IN_RC(long_reg));
4398 4398 match(RegL);
4399 4399 match(eADXRegL);
4400 4400
4401 4401 format %{ %}
4402 4402 interface(REG_INTER);
4403 4403 %}
4404 4404
4405 4405 operand eADXRegL( eRegL reg ) %{
4406 4406 constraint(ALLOC_IN_RC(eadx_reg));
4407 4407 match(reg);
4408 4408
4409 4409 format %{ "EDX:EAX" %}
4410 4410 interface(REG_INTER);
4411 4411 %}
4412 4412
4413 4413 operand eBCXRegL( eRegL reg ) %{
4414 4414 constraint(ALLOC_IN_RC(ebcx_reg));
4415 4415 match(reg);
4416 4416
4417 4417 format %{ "EBX:ECX" %}
4418 4418 interface(REG_INTER);
4419 4419 %}
4420 4420
4421 4421 // Special case for integer high multiply
4422 4422 operand eADXRegL_low_only() %{
4423 4423 constraint(ALLOC_IN_RC(eadx_reg));
4424 4424 match(RegL);
4425 4425
4426 4426 format %{ "EAX" %}
4427 4427 interface(REG_INTER);
4428 4428 %}
4429 4429
4430 4430 // Flags register, used as output of compare instructions
4431 4431 operand eFlagsReg() %{
4432 4432 constraint(ALLOC_IN_RC(int_flags));
4433 4433 match(RegFlags);
4434 4434
4435 4435 format %{ "EFLAGS" %}
4436 4436 interface(REG_INTER);
4437 4437 %}
4438 4438
4439 4439 // Flags register, used as output of FLOATING POINT compare instructions
4440 4440 operand eFlagsRegU() %{
4441 4441 constraint(ALLOC_IN_RC(int_flags));
4442 4442 match(RegFlags);
4443 4443
4444 4444 format %{ "EFLAGS_U" %}
4445 4445 interface(REG_INTER);
4446 4446 %}
4447 4447
4448 4448 operand eFlagsRegUCF() %{
4449 4449 constraint(ALLOC_IN_RC(int_flags));
4450 4450 match(RegFlags);
4451 4451 predicate(false);
4452 4452
4453 4453 format %{ "EFLAGS_U_CF" %}
4454 4454 interface(REG_INTER);
4455 4455 %}
4456 4456
4457 4457 // Condition Code Register used by long compare
4458 4458 operand flagsReg_long_LTGE() %{
4459 4459 constraint(ALLOC_IN_RC(int_flags));
4460 4460 match(RegFlags);
4461 4461 format %{ "FLAGS_LTGE" %}
4462 4462 interface(REG_INTER);
4463 4463 %}
4464 4464 operand flagsReg_long_EQNE() %{
4465 4465 constraint(ALLOC_IN_RC(int_flags));
4466 4466 match(RegFlags);
4467 4467 format %{ "FLAGS_EQNE" %}
4468 4468 interface(REG_INTER);
4469 4469 %}
4470 4470 operand flagsReg_long_LEGT() %{
4471 4471 constraint(ALLOC_IN_RC(int_flags));
4472 4472 match(RegFlags);
4473 4473 format %{ "FLAGS_LEGT" %}
4474 4474 interface(REG_INTER);
4475 4475 %}
4476 4476
4477 4477 // Float register operands
4478 4478 operand regDPR() %{
4479 4479 predicate( UseSSE < 2 );
4480 4480 constraint(ALLOC_IN_RC(dbl_reg));
4481 4481 match(RegD);
4482 4482 match(regDPR1);
4483 4483 match(regDPR2);
4484 4484 format %{ %}
4485 4485 interface(REG_INTER);
4486 4486 %}
4487 4487
4488 4488 operand regDPR1(regDPR reg) %{
4489 4489 predicate( UseSSE < 2 );
4490 4490 constraint(ALLOC_IN_RC(dbl_reg0));
4491 4491 match(reg);
4492 4492 format %{ "FPR1" %}
4493 4493 interface(REG_INTER);
4494 4494 %}
4495 4495
4496 4496 operand regDPR2(regDPR reg) %{
4497 4497 predicate( UseSSE < 2 );
4498 4498 constraint(ALLOC_IN_RC(dbl_reg1));
4499 4499 match(reg);
4500 4500 format %{ "FPR2" %}
4501 4501 interface(REG_INTER);
4502 4502 %}
4503 4503
4504 4504 operand regnotDPR1(regDPR reg) %{
4505 4505 predicate( UseSSE < 2 );
4506 4506 constraint(ALLOC_IN_RC(dbl_notreg0));
4507 4507 match(reg);
4508 4508 format %{ %}
4509 4509 interface(REG_INTER);
4510 4510 %}
4511 4511
4512 4512 // XMM Double register operands
4513 4513 operand regD() %{
4514 4514 predicate( UseSSE>=2 );
4515 4515 constraint(ALLOC_IN_RC(xdb_reg));
4516 4516 match(RegD);
4517 4517 match(regD6);
4518 4518 match(regD7);
4519 4519 format %{ %}
4520 4520 interface(REG_INTER);
4521 4521 %}
4522 4522
4523 4523 // XMM6 double register operands
4524 4524 operand regD6(regD reg) %{
4525 4525 predicate( UseSSE>=2 );
4526 4526 constraint(ALLOC_IN_RC(xdb_reg6));
4527 4527 match(reg);
4528 4528 format %{ "XMM6" %}
4529 4529 interface(REG_INTER);
4530 4530 %}
4531 4531
4532 4532 // XMM7 double register operands
4533 4533 operand regD7(regD reg) %{
4534 4534 predicate( UseSSE>=2 );
4535 4535 constraint(ALLOC_IN_RC(xdb_reg7));
4536 4536 match(reg);
4537 4537 format %{ "XMM7" %}
4538 4538 interface(REG_INTER);
4539 4539 %}
4540 4540
4541 4541 // Float register operands
4542 4542 operand regFPR() %{
4543 4543 predicate( UseSSE < 2 );
4544 4544 constraint(ALLOC_IN_RC(flt_reg));
4545 4545 match(RegF);
4546 4546 match(regFPR1);
4547 4547 format %{ %}
4548 4548 interface(REG_INTER);
4549 4549 %}
4550 4550
4551 4551 // Float register operands
4552 4552 operand regFPR1(regFPR reg) %{
4553 4553 predicate( UseSSE < 2 );
4554 4554 constraint(ALLOC_IN_RC(flt_reg0));
4555 4555 match(reg);
4556 4556 format %{ "FPR1" %}
4557 4557 interface(REG_INTER);
4558 4558 %}
4559 4559
4560 4560 // XMM register operands
4561 4561 operand regF() %{
4562 4562 predicate( UseSSE>=1 );
4563 4563 constraint(ALLOC_IN_RC(xmm_reg));
4564 4564 match(RegF);
4565 4565 format %{ %}
4566 4566 interface(REG_INTER);
4567 4567 %}
4568 4568
4569 4569
4570 4570 //----------Memory Operands----------------------------------------------------
4571 4571 // Direct Memory Operand
4572 4572 operand direct(immP addr) %{
4573 4573 match(addr);
4574 4574
4575 4575 format %{ "[$addr]" %}
4576 4576 interface(MEMORY_INTER) %{
4577 4577 base(0xFFFFFFFF);
4578 4578 index(0x4);
4579 4579 scale(0x0);
4580 4580 disp($addr);
4581 4581 %}
4582 4582 %}
4583 4583
4584 4584 // Indirect Memory Operand
4585 4585 operand indirect(eRegP reg) %{
4586 4586 constraint(ALLOC_IN_RC(e_reg));
4587 4587 match(reg);
4588 4588
4589 4589 format %{ "[$reg]" %}
4590 4590 interface(MEMORY_INTER) %{
4591 4591 base($reg);
4592 4592 index(0x4);
4593 4593 scale(0x0);
4594 4594 disp(0x0);
4595 4595 %}
4596 4596 %}
4597 4597
4598 4598 // Indirect Memory Plus Short Offset Operand
4599 4599 operand indOffset8(eRegP reg, immI8 off) %{
4600 4600 match(AddP reg off);
4601 4601
4602 4602 format %{ "[$reg + $off]" %}
4603 4603 interface(MEMORY_INTER) %{
4604 4604 base($reg);
4605 4605 index(0x4);
4606 4606 scale(0x0);
4607 4607 disp($off);
4608 4608 %}
4609 4609 %}
4610 4610
4611 4611 // Indirect Memory Plus Long Offset Operand
4612 4612 operand indOffset32(eRegP reg, immI off) %{
4613 4613 match(AddP reg off);
4614 4614
4615 4615 format %{ "[$reg + $off]" %}
4616 4616 interface(MEMORY_INTER) %{
4617 4617 base($reg);
4618 4618 index(0x4);
4619 4619 scale(0x0);
4620 4620 disp($off);
4621 4621 %}
4622 4622 %}
4623 4623
4624 4624 // Indirect Memory Plus Long Offset Operand
4625 4625 operand indOffset32X(eRegI reg, immP off) %{
4626 4626 match(AddP off reg);
4627 4627
4628 4628 format %{ "[$reg + $off]" %}
4629 4629 interface(MEMORY_INTER) %{
4630 4630 base($reg);
4631 4631 index(0x4);
4632 4632 scale(0x0);
4633 4633 disp($off);
4634 4634 %}
4635 4635 %}
4636 4636
4637 4637 // Indirect Memory Plus Index Register Plus Offset Operand
4638 4638 operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
4639 4639 match(AddP (AddP reg ireg) off);
4640 4640
4641 4641 op_cost(10);
4642 4642 format %{"[$reg + $off + $ireg]" %}
4643 4643 interface(MEMORY_INTER) %{
4644 4644 base($reg);
4645 4645 index($ireg);
4646 4646 scale(0x0);
4647 4647 disp($off);
4648 4648 %}
4649 4649 %}
4650 4650
4651 4651 // Indirect Memory Plus Index Register Plus Offset Operand
4652 4652 operand indIndex(eRegP reg, eRegI ireg) %{
4653 4653 match(AddP reg ireg);
4654 4654
4655 4655 op_cost(10);
4656 4656 format %{"[$reg + $ireg]" %}
4657 4657 interface(MEMORY_INTER) %{
4658 4658 base($reg);
4659 4659 index($ireg);
4660 4660 scale(0x0);
4661 4661 disp(0x0);
4662 4662 %}
4663 4663 %}
4664 4664
4665 4665 // // -------------------------------------------------------------------------
4666 4666 // // 486 architecture doesn't support "scale * index + offset" with out a base
4667 4667 // // -------------------------------------------------------------------------
4668 4668 // // Scaled Memory Operands
4669 4669 // // Indirect Memory Times Scale Plus Offset Operand
4670 4670 // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
4671 4671 // match(AddP off (LShiftI ireg scale));
4672 4672 //
4673 4673 // op_cost(10);
4674 4674 // format %{"[$off + $ireg << $scale]" %}
4675 4675 // interface(MEMORY_INTER) %{
4676 4676 // base(0x4);
4677 4677 // index($ireg);
4678 4678 // scale($scale);
4679 4679 // disp($off);
4680 4680 // %}
4681 4681 // %}
4682 4682
4683 4683 // Indirect Memory Times Scale Plus Index Register
4684 4684 operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
4685 4685 match(AddP reg (LShiftI ireg scale));
4686 4686
4687 4687 op_cost(10);
4688 4688 format %{"[$reg + $ireg << $scale]" %}
4689 4689 interface(MEMORY_INTER) %{
4690 4690 base($reg);
4691 4691 index($ireg);
4692 4692 scale($scale);
4693 4693 disp(0x0);
4694 4694 %}
4695 4695 %}
4696 4696
4697 4697 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4698 4698 operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
4699 4699 match(AddP (AddP reg (LShiftI ireg scale)) off);
4700 4700
4701 4701 op_cost(10);
4702 4702 format %{"[$reg + $off + $ireg << $scale]" %}
4703 4703 interface(MEMORY_INTER) %{
4704 4704 base($reg);
4705 4705 index($ireg);
4706 4706 scale($scale);
4707 4707 disp($off);
4708 4708 %}
4709 4709 %}
4710 4710
4711 4711 //----------Load Long Memory Operands------------------------------------------
4712 4712 // The load-long idiom will use it's address expression again after loading
4713 4713 // the first word of the long. If the load-long destination overlaps with
4714 4714 // registers used in the addressing expression, the 2nd half will be loaded
4715 4715 // from a clobbered address. Fix this by requiring that load-long use
4716 4716 // address registers that do not overlap with the load-long target.
4717 4717
4718 4718 // load-long support
4719 4719 operand load_long_RegP() %{
4720 4720 constraint(ALLOC_IN_RC(esi_reg));
4721 4721 match(RegP);
4722 4722 match(eSIRegP);
4723 4723 op_cost(100);
4724 4724 format %{ %}
4725 4725 interface(REG_INTER);
4726 4726 %}
4727 4727
4728 4728 // Indirect Memory Operand Long
4729 4729 operand load_long_indirect(load_long_RegP reg) %{
4730 4730 constraint(ALLOC_IN_RC(esi_reg));
4731 4731 match(reg);
4732 4732
4733 4733 format %{ "[$reg]" %}
4734 4734 interface(MEMORY_INTER) %{
4735 4735 base($reg);
4736 4736 index(0x4);
4737 4737 scale(0x0);
4738 4738 disp(0x0);
4739 4739 %}
4740 4740 %}
4741 4741
4742 4742 // Indirect Memory Plus Long Offset Operand
4743 4743 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4744 4744 match(AddP reg off);
4745 4745
4746 4746 format %{ "[$reg + $off]" %}
4747 4747 interface(MEMORY_INTER) %{
4748 4748 base($reg);
4749 4749 index(0x4);
4750 4750 scale(0x0);
4751 4751 disp($off);
4752 4752 %}
4753 4753 %}
4754 4754
4755 4755 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4756 4756
4757 4757
4758 4758 //----------Special Memory Operands--------------------------------------------
4759 4759 // Stack Slot Operand - This operand is used for loading and storing temporary
4760 4760 // values on the stack where a match requires a value to
4761 4761 // flow through memory.
4762 4762 operand stackSlotP(sRegP reg) %{
4763 4763 constraint(ALLOC_IN_RC(stack_slots));
4764 4764 // No match rule because this operand is only generated in matching
4765 4765 format %{ "[$reg]" %}
4766 4766 interface(MEMORY_INTER) %{
4767 4767 base(0x4); // ESP
4768 4768 index(0x4); // No Index
4769 4769 scale(0x0); // No Scale
4770 4770 disp($reg); // Stack Offset
4771 4771 %}
4772 4772 %}
4773 4773
4774 4774 operand stackSlotI(sRegI reg) %{
4775 4775 constraint(ALLOC_IN_RC(stack_slots));
4776 4776 // No match rule because this operand is only generated in matching
4777 4777 format %{ "[$reg]" %}
4778 4778 interface(MEMORY_INTER) %{
4779 4779 base(0x4); // ESP
4780 4780 index(0x4); // No Index
4781 4781 scale(0x0); // No Scale
4782 4782 disp($reg); // Stack Offset
4783 4783 %}
4784 4784 %}
4785 4785
4786 4786 operand stackSlotF(sRegF reg) %{
4787 4787 constraint(ALLOC_IN_RC(stack_slots));
4788 4788 // No match rule because this operand is only generated in matching
4789 4789 format %{ "[$reg]" %}
4790 4790 interface(MEMORY_INTER) %{
4791 4791 base(0x4); // ESP
4792 4792 index(0x4); // No Index
4793 4793 scale(0x0); // No Scale
4794 4794 disp($reg); // Stack Offset
4795 4795 %}
4796 4796 %}
4797 4797
4798 4798 operand stackSlotD(sRegD reg) %{
4799 4799 constraint(ALLOC_IN_RC(stack_slots));
4800 4800 // No match rule because this operand is only generated in matching
4801 4801 format %{ "[$reg]" %}
4802 4802 interface(MEMORY_INTER) %{
4803 4803 base(0x4); // ESP
4804 4804 index(0x4); // No Index
4805 4805 scale(0x0); // No Scale
4806 4806 disp($reg); // Stack Offset
4807 4807 %}
4808 4808 %}
4809 4809
4810 4810 operand stackSlotL(sRegL reg) %{
4811 4811 constraint(ALLOC_IN_RC(stack_slots));
4812 4812 // No match rule because this operand is only generated in matching
4813 4813 format %{ "[$reg]" %}
4814 4814 interface(MEMORY_INTER) %{
4815 4815 base(0x4); // ESP
4816 4816 index(0x4); // No Index
4817 4817 scale(0x0); // No Scale
4818 4818 disp($reg); // Stack Offset
4819 4819 %}
4820 4820 %}
4821 4821
4822 4822 //----------Memory Operands - Win95 Implicit Null Variants----------------
4823 4823 // Indirect Memory Operand
4824 4824 operand indirect_win95_safe(eRegP_no_EBP reg)
4825 4825 %{
4826 4826 constraint(ALLOC_IN_RC(e_reg));
4827 4827 match(reg);
4828 4828
4829 4829 op_cost(100);
4830 4830 format %{ "[$reg]" %}
4831 4831 interface(MEMORY_INTER) %{
4832 4832 base($reg);
4833 4833 index(0x4);
4834 4834 scale(0x0);
4835 4835 disp(0x0);
4836 4836 %}
4837 4837 %}
4838 4838
4839 4839 // Indirect Memory Plus Short Offset Operand
4840 4840 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4841 4841 %{
4842 4842 match(AddP reg off);
4843 4843
4844 4844 op_cost(100);
4845 4845 format %{ "[$reg + $off]" %}
4846 4846 interface(MEMORY_INTER) %{
4847 4847 base($reg);
4848 4848 index(0x4);
4849 4849 scale(0x0);
4850 4850 disp($off);
4851 4851 %}
4852 4852 %}
4853 4853
4854 4854 // Indirect Memory Plus Long Offset Operand
4855 4855 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4856 4856 %{
4857 4857 match(AddP reg off);
4858 4858
4859 4859 op_cost(100);
4860 4860 format %{ "[$reg + $off]" %}
4861 4861 interface(MEMORY_INTER) %{
4862 4862 base($reg);
4863 4863 index(0x4);
4864 4864 scale(0x0);
4865 4865 disp($off);
4866 4866 %}
4867 4867 %}
4868 4868
4869 4869 // Indirect Memory Plus Index Register Plus Offset Operand
4870 4870 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
4871 4871 %{
4872 4872 match(AddP (AddP reg ireg) off);
4873 4873
4874 4874 op_cost(100);
4875 4875 format %{"[$reg + $off + $ireg]" %}
4876 4876 interface(MEMORY_INTER) %{
4877 4877 base($reg);
4878 4878 index($ireg);
4879 4879 scale(0x0);
4880 4880 disp($off);
4881 4881 %}
4882 4882 %}
4883 4883
4884 4884 // Indirect Memory Times Scale Plus Index Register
4885 4885 operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
4886 4886 %{
4887 4887 match(AddP reg (LShiftI ireg scale));
4888 4888
4889 4889 op_cost(100);
4890 4890 format %{"[$reg + $ireg << $scale]" %}
4891 4891 interface(MEMORY_INTER) %{
4892 4892 base($reg);
4893 4893 index($ireg);
4894 4894 scale($scale);
4895 4895 disp(0x0);
4896 4896 %}
4897 4897 %}
4898 4898
4899 4899 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4900 4900 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
4901 4901 %{
4902 4902 match(AddP (AddP reg (LShiftI ireg scale)) off);
4903 4903
4904 4904 op_cost(100);
4905 4905 format %{"[$reg + $off + $ireg << $scale]" %}
4906 4906 interface(MEMORY_INTER) %{
4907 4907 base($reg);
4908 4908 index($ireg);
4909 4909 scale($scale);
4910 4910 disp($off);
4911 4911 %}
4912 4912 %}
4913 4913
4914 4914 //----------Conditional Branch Operands----------------------------------------
4915 4915 // Comparison Op - This is the operation of the comparison, and is limited to
4916 4916 // the following set of codes:
4917 4917 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4918 4918 //
4919 4919 // Other attributes of the comparison, such as unsignedness, are specified
4920 4920 // by the comparison instruction that sets a condition code flags register.
4921 4921 // That result is represented by a flags operand whose subtype is appropriate
4922 4922 // to the unsignedness (etc.) of the comparison.
4923 4923 //
4924 4924 // Later, the instruction which matches both the Comparison Op (a Bool) and
4925 4925 // the flags (produced by the Cmp) specifies the coding of the comparison op
4926 4926 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4927 4927
4928 4928 // Comparision Code
4929 4929 operand cmpOp() %{
4930 4930 match(Bool);
4931 4931
4932 4932 format %{ "" %}
4933 4933 interface(COND_INTER) %{
4934 4934 equal(0x4, "e");
4935 4935 not_equal(0x5, "ne");
4936 4936 less(0xC, "l");
4937 4937 greater_equal(0xD, "ge");
4938 4938 less_equal(0xE, "le");
4939 4939 greater(0xF, "g");
4940 4940 %}
4941 4941 %}
4942 4942
4943 4943 // Comparison Code, unsigned compare. Used by FP also, with
4944 4944 // C2 (unordered) turned into GT or LT already. The other bits
4945 4945 // C0 and C3 are turned into Carry & Zero flags.
4946 4946 operand cmpOpU() %{
4947 4947 match(Bool);
4948 4948
4949 4949 format %{ "" %}
4950 4950 interface(COND_INTER) %{
4951 4951 equal(0x4, "e");
4952 4952 not_equal(0x5, "ne");
4953 4953 less(0x2, "b");
4954 4954 greater_equal(0x3, "nb");
4955 4955 less_equal(0x6, "be");
4956 4956 greater(0x7, "nbe");
4957 4957 %}
4958 4958 %}
4959 4959
4960 4960 // Floating comparisons that don't require any fixup for the unordered case
4961 4961 operand cmpOpUCF() %{
4962 4962 match(Bool);
4963 4963 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4964 4964 n->as_Bool()->_test._test == BoolTest::ge ||
4965 4965 n->as_Bool()->_test._test == BoolTest::le ||
4966 4966 n->as_Bool()->_test._test == BoolTest::gt);
4967 4967 format %{ "" %}
4968 4968 interface(COND_INTER) %{
4969 4969 equal(0x4, "e");
4970 4970 not_equal(0x5, "ne");
4971 4971 less(0x2, "b");
4972 4972 greater_equal(0x3, "nb");
4973 4973 less_equal(0x6, "be");
4974 4974 greater(0x7, "nbe");
4975 4975 %}
4976 4976 %}
4977 4977
4978 4978
4979 4979 // Floating comparisons that can be fixed up with extra conditional jumps
4980 4980 operand cmpOpUCF2() %{
4981 4981 match(Bool);
4982 4982 predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4983 4983 n->as_Bool()->_test._test == BoolTest::eq);
4984 4984 format %{ "" %}
4985 4985 interface(COND_INTER) %{
4986 4986 equal(0x4, "e");
4987 4987 not_equal(0x5, "ne");
4988 4988 less(0x2, "b");
4989 4989 greater_equal(0x3, "nb");
4990 4990 less_equal(0x6, "be");
4991 4991 greater(0x7, "nbe");
4992 4992 %}
4993 4993 %}
4994 4994
4995 4995 // Comparison Code for FP conditional move
4996 4996 operand cmpOp_fcmov() %{
4997 4997 match(Bool);
4998 4998
4999 4999 format %{ "" %}
5000 5000 interface(COND_INTER) %{
5001 5001 equal (0x0C8);
5002 5002 not_equal (0x1C8);
5003 5003 less (0x0C0);
5004 5004 greater_equal(0x1C0);
5005 5005 less_equal (0x0D0);
5006 5006 greater (0x1D0);
5007 5007 %}
5008 5008 %}
5009 5009
5010 5010 // Comparision Code used in long compares
5011 5011 operand cmpOp_commute() %{
5012 5012 match(Bool);
5013 5013
5014 5014 format %{ "" %}
5015 5015 interface(COND_INTER) %{
5016 5016 equal(0x4, "e");
5017 5017 not_equal(0x5, "ne");
5018 5018 less(0xF, "g");
5019 5019 greater_equal(0xE, "le");
5020 5020 less_equal(0xD, "ge");
5021 5021 greater(0xC, "l");
5022 5022 %}
5023 5023 %}
5024 5024
5025 5025 //----------OPERAND CLASSES----------------------------------------------------
5026 5026 // Operand Classes are groups of operands that are used as to simplify
5027 5027 // instruction definitions by not requiring the AD writer to specify separate
5028 5028 // instructions for every form of operand when the instruction accepts
5029 5029 // multiple operand types with the same basic encoding and format. The classic
5030 5030 // case of this is memory operands.
5031 5031
5032 5032 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5033 5033 indIndex, indIndexScale, indIndexScaleOffset);
5034 5034
5035 5035 // Long memory operations are encoded in 2 instructions and a +4 offset.
5036 5036 // This means some kind of offset is always required and you cannot use
5037 5037 // an oop as the offset (done when working on static globals).
5038 5038 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5039 5039 indIndex, indIndexScale, indIndexScaleOffset);
5040 5040
5041 5041
5042 5042 //----------PIPELINE-----------------------------------------------------------
5043 5043 // Rules which define the behavior of the target architectures pipeline.
5044 5044 pipeline %{
5045 5045
5046 5046 //----------ATTRIBUTES---------------------------------------------------------
5047 5047 attributes %{
5048 5048 variable_size_instructions; // Fixed size instructions
5049 5049 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
5050 5050 instruction_unit_size = 1; // An instruction is 1 bytes long
5051 5051 instruction_fetch_unit_size = 16; // The processor fetches one line
5052 5052 instruction_fetch_units = 1; // of 16 bytes
5053 5053
5054 5054 // List of nop instructions
5055 5055 nops( MachNop );
5056 5056 %}
5057 5057
5058 5058 //----------RESOURCES----------------------------------------------------------
5059 5059 // Resources are the functional units available to the machine
5060 5060
5061 5061 // Generic P2/P3 pipeline
5062 5062 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5063 5063 // 3 instructions decoded per cycle.
5064 5064 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5065 5065 // 2 ALU op, only ALU0 handles mul/div instructions.
5066 5066 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5067 5067 MS0, MS1, MEM = MS0 | MS1,
5068 5068 BR, FPU,
5069 5069 ALU0, ALU1, ALU = ALU0 | ALU1 );
5070 5070
5071 5071 //----------PIPELINE DESCRIPTION-----------------------------------------------
5072 5072 // Pipeline Description specifies the stages in the machine's pipeline
5073 5073
5074 5074 // Generic P2/P3 pipeline
5075 5075 pipe_desc(S0, S1, S2, S3, S4, S5);
5076 5076
5077 5077 //----------PIPELINE CLASSES---------------------------------------------------
5078 5078 // Pipeline Classes describe the stages in which input and output are
5079 5079 // referenced by the hardware pipeline.
5080 5080
5081 5081 // Naming convention: ialu or fpu
5082 5082 // Then: _reg
5083 5083 // Then: _reg if there is a 2nd register
5084 5084 // Then: _long if it's a pair of instructions implementing a long
5085 5085 // Then: _fat if it requires the big decoder
5086 5086 // Or: _mem if it requires the big decoder and a memory unit.
5087 5087
5088 5088 // Integer ALU reg operation
5089 5089 pipe_class ialu_reg(eRegI dst) %{
5090 5090 single_instruction;
5091 5091 dst : S4(write);
5092 5092 dst : S3(read);
5093 5093 DECODE : S0; // any decoder
5094 5094 ALU : S3; // any alu
5095 5095 %}
5096 5096
5097 5097 // Long ALU reg operation
5098 5098 pipe_class ialu_reg_long(eRegL dst) %{
5099 5099 instruction_count(2);
5100 5100 dst : S4(write);
5101 5101 dst : S3(read);
5102 5102 DECODE : S0(2); // any 2 decoders
5103 5103 ALU : S3(2); // both alus
5104 5104 %}
5105 5105
5106 5106 // Integer ALU reg operation using big decoder
5107 5107 pipe_class ialu_reg_fat(eRegI dst) %{
5108 5108 single_instruction;
5109 5109 dst : S4(write);
5110 5110 dst : S3(read);
5111 5111 D0 : S0; // big decoder only
5112 5112 ALU : S3; // any alu
5113 5113 %}
5114 5114
5115 5115 // Long ALU reg operation using big decoder
5116 5116 pipe_class ialu_reg_long_fat(eRegL dst) %{
5117 5117 instruction_count(2);
5118 5118 dst : S4(write);
5119 5119 dst : S3(read);
5120 5120 D0 : S0(2); // big decoder only; twice
5121 5121 ALU : S3(2); // any 2 alus
5122 5122 %}
5123 5123
5124 5124 // Integer ALU reg-reg operation
5125 5125 pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5126 5126 single_instruction;
5127 5127 dst : S4(write);
5128 5128 src : S3(read);
5129 5129 DECODE : S0; // any decoder
5130 5130 ALU : S3; // any alu
5131 5131 %}
5132 5132
5133 5133 // Long ALU reg-reg operation
5134 5134 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5135 5135 instruction_count(2);
5136 5136 dst : S4(write);
5137 5137 src : S3(read);
5138 5138 DECODE : S0(2); // any 2 decoders
5139 5139 ALU : S3(2); // both alus
5140 5140 %}
5141 5141
5142 5142 // Integer ALU reg-reg operation
5143 5143 pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5144 5144 single_instruction;
5145 5145 dst : S4(write);
5146 5146 src : S3(read);
5147 5147 D0 : S0; // big decoder only
5148 5148 ALU : S3; // any alu
5149 5149 %}
5150 5150
5151 5151 // Long ALU reg-reg operation
5152 5152 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5153 5153 instruction_count(2);
5154 5154 dst : S4(write);
5155 5155 src : S3(read);
5156 5156 D0 : S0(2); // big decoder only; twice
5157 5157 ALU : S3(2); // both alus
5158 5158 %}
5159 5159
5160 5160 // Integer ALU reg-mem operation
5161 5161 pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5162 5162 single_instruction;
5163 5163 dst : S5(write);
5164 5164 mem : S3(read);
5165 5165 D0 : S0; // big decoder only
5166 5166 ALU : S4; // any alu
5167 5167 MEM : S3; // any mem
5168 5168 %}
5169 5169
5170 5170 // Long ALU reg-mem operation
5171 5171 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5172 5172 instruction_count(2);
5173 5173 dst : S5(write);
5174 5174 mem : S3(read);
5175 5175 D0 : S0(2); // big decoder only; twice
5176 5176 ALU : S4(2); // any 2 alus
5177 5177 MEM : S3(2); // both mems
5178 5178 %}
5179 5179
5180 5180 // Integer mem operation (prefetch)
5181 5181 pipe_class ialu_mem(memory mem)
5182 5182 %{
5183 5183 single_instruction;
5184 5184 mem : S3(read);
5185 5185 D0 : S0; // big decoder only
5186 5186 MEM : S3; // any mem
5187 5187 %}
5188 5188
5189 5189 // Integer Store to Memory
5190 5190 pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5191 5191 single_instruction;
5192 5192 mem : S3(read);
5193 5193 src : S5(read);
5194 5194 D0 : S0; // big decoder only
5195 5195 ALU : S4; // any alu
5196 5196 MEM : S3;
5197 5197 %}
5198 5198
5199 5199 // Long Store to Memory
5200 5200 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5201 5201 instruction_count(2);
5202 5202 mem : S3(read);
5203 5203 src : S5(read);
5204 5204 D0 : S0(2); // big decoder only; twice
5205 5205 ALU : S4(2); // any 2 alus
5206 5206 MEM : S3(2); // Both mems
5207 5207 %}
5208 5208
5209 5209 // Integer Store to Memory
5210 5210 pipe_class ialu_mem_imm(memory mem) %{
5211 5211 single_instruction;
5212 5212 mem : S3(read);
5213 5213 D0 : S0; // big decoder only
5214 5214 ALU : S4; // any alu
5215 5215 MEM : S3;
5216 5216 %}
5217 5217
5218 5218 // Integer ALU0 reg-reg operation
5219 5219 pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5220 5220 single_instruction;
5221 5221 dst : S4(write);
5222 5222 src : S3(read);
5223 5223 D0 : S0; // Big decoder only
5224 5224 ALU0 : S3; // only alu0
5225 5225 %}
5226 5226
5227 5227 // Integer ALU0 reg-mem operation
5228 5228 pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
5229 5229 single_instruction;
5230 5230 dst : S5(write);
5231 5231 mem : S3(read);
5232 5232 D0 : S0; // big decoder only
5233 5233 ALU0 : S4; // ALU0 only
5234 5234 MEM : S3; // any mem
5235 5235 %}
5236 5236
5237 5237 // Integer ALU reg-reg operation
5238 5238 pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
5239 5239 single_instruction;
5240 5240 cr : S4(write);
5241 5241 src1 : S3(read);
5242 5242 src2 : S3(read);
5243 5243 DECODE : S0; // any decoder
5244 5244 ALU : S3; // any alu
5245 5245 %}
5246 5246
5247 5247 // Integer ALU reg-imm operation
5248 5248 pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
5249 5249 single_instruction;
5250 5250 cr : S4(write);
5251 5251 src1 : S3(read);
5252 5252 DECODE : S0; // any decoder
5253 5253 ALU : S3; // any alu
5254 5254 %}
5255 5255
5256 5256 // Integer ALU reg-mem operation
5257 5257 pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
5258 5258 single_instruction;
5259 5259 cr : S4(write);
5260 5260 src1 : S3(read);
5261 5261 src2 : S3(read);
5262 5262 D0 : S0; // big decoder only
5263 5263 ALU : S4; // any alu
5264 5264 MEM : S3;
5265 5265 %}
5266 5266
5267 5267 // Conditional move reg-reg
5268 5268 pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
5269 5269 instruction_count(4);
5270 5270 y : S4(read);
5271 5271 q : S3(read);
5272 5272 p : S3(read);
5273 5273 DECODE : S0(4); // any decoder
5274 5274 %}
5275 5275
5276 5276 // Conditional move reg-reg
5277 5277 pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
5278 5278 single_instruction;
5279 5279 dst : S4(write);
5280 5280 src : S3(read);
5281 5281 cr : S3(read);
5282 5282 DECODE : S0; // any decoder
5283 5283 %}
5284 5284
5285 5285 // Conditional move reg-mem
5286 5286 pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
5287 5287 single_instruction;
5288 5288 dst : S4(write);
5289 5289 src : S3(read);
5290 5290 cr : S3(read);
5291 5291 DECODE : S0; // any decoder
5292 5292 MEM : S3;
5293 5293 %}
5294 5294
5295 5295 // Conditional move reg-reg long
5296 5296 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
5297 5297 single_instruction;
5298 5298 dst : S4(write);
5299 5299 src : S3(read);
5300 5300 cr : S3(read);
5301 5301 DECODE : S0(2); // any 2 decoders
5302 5302 %}
5303 5303
5304 5304 // Conditional move double reg-reg
5305 5305 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
5306 5306 single_instruction;
5307 5307 dst : S4(write);
5308 5308 src : S3(read);
5309 5309 cr : S3(read);
5310 5310 DECODE : S0; // any decoder
5311 5311 %}
5312 5312
5313 5313 // Float reg-reg operation
5314 5314 pipe_class fpu_reg(regDPR dst) %{
5315 5315 instruction_count(2);
5316 5316 dst : S3(read);
5317 5317 DECODE : S0(2); // any 2 decoders
5318 5318 FPU : S3;
5319 5319 %}
5320 5320
5321 5321 // Float reg-reg operation
5322 5322 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
5323 5323 instruction_count(2);
5324 5324 dst : S4(write);
5325 5325 src : S3(read);
5326 5326 DECODE : S0(2); // any 2 decoders
5327 5327 FPU : S3;
5328 5328 %}
5329 5329
5330 5330 // Float reg-reg operation
5331 5331 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
5332 5332 instruction_count(3);
5333 5333 dst : S4(write);
5334 5334 src1 : S3(read);
5335 5335 src2 : S3(read);
5336 5336 DECODE : S0(3); // any 3 decoders
5337 5337 FPU : S3(2);
5338 5338 %}
5339 5339
5340 5340 // Float reg-reg operation
5341 5341 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
5342 5342 instruction_count(4);
5343 5343 dst : S4(write);
5344 5344 src1 : S3(read);
5345 5345 src2 : S3(read);
5346 5346 src3 : S3(read);
5347 5347 DECODE : S0(4); // any 3 decoders
5348 5348 FPU : S3(2);
5349 5349 %}
5350 5350
5351 5351 // Float reg-reg operation
5352 5352 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5353 5353 instruction_count(4);
5354 5354 dst : S4(write);
5355 5355 src1 : S3(read);
5356 5356 src2 : S3(read);
5357 5357 src3 : S3(read);
5358 5358 DECODE : S1(3); // any 3 decoders
5359 5359 D0 : S0; // Big decoder only
5360 5360 FPU : S3(2);
5361 5361 MEM : S3;
5362 5362 %}
5363 5363
5364 5364 // Float reg-mem operation
5365 5365 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5366 5366 instruction_count(2);
5367 5367 dst : S5(write);
5368 5368 mem : S3(read);
5369 5369 D0 : S0; // big decoder only
5370 5370 DECODE : S1; // any decoder for FPU POP
5371 5371 FPU : S4;
5372 5372 MEM : S3; // any mem
5373 5373 %}
5374 5374
5375 5375 // Float reg-mem operation
5376 5376 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5377 5377 instruction_count(3);
5378 5378 dst : S5(write);
5379 5379 src1 : S3(read);
5380 5380 mem : S3(read);
5381 5381 D0 : S0; // big decoder only
5382 5382 DECODE : S1(2); // any decoder for FPU POP
5383 5383 FPU : S4;
5384 5384 MEM : S3; // any mem
5385 5385 %}
5386 5386
5387 5387 // Float mem-reg operation
5388 5388 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5389 5389 instruction_count(2);
5390 5390 src : S5(read);
5391 5391 mem : S3(read);
5392 5392 DECODE : S0; // any decoder for FPU PUSH
5393 5393 D0 : S1; // big decoder only
5394 5394 FPU : S4;
5395 5395 MEM : S3; // any mem
5396 5396 %}
5397 5397
5398 5398 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5399 5399 instruction_count(3);
5400 5400 src1 : S3(read);
5401 5401 src2 : S3(read);
5402 5402 mem : S3(read);
5403 5403 DECODE : S0(2); // any decoder for FPU PUSH
5404 5404 D0 : S1; // big decoder only
5405 5405 FPU : S4;
5406 5406 MEM : S3; // any mem
5407 5407 %}
5408 5408
5409 5409 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5410 5410 instruction_count(3);
5411 5411 src1 : S3(read);
5412 5412 src2 : S3(read);
5413 5413 mem : S4(read);
5414 5414 DECODE : S0; // any decoder for FPU PUSH
5415 5415 D0 : S0(2); // big decoder only
5416 5416 FPU : S4;
5417 5417 MEM : S3(2); // any mem
5418 5418 %}
5419 5419
5420 5420 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5421 5421 instruction_count(2);
5422 5422 src1 : S3(read);
5423 5423 dst : S4(read);
5424 5424 D0 : S0(2); // big decoder only
5425 5425 MEM : S3(2); // any mem
5426 5426 %}
5427 5427
5428 5428 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5429 5429 instruction_count(3);
5430 5430 src1 : S3(read);
5431 5431 src2 : S3(read);
5432 5432 dst : S4(read);
5433 5433 D0 : S0(3); // big decoder only
5434 5434 FPU : S4;
5435 5435 MEM : S3(3); // any mem
5436 5436 %}
5437 5437
5438 5438 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5439 5439 instruction_count(3);
5440 5440 src1 : S4(read);
5441 5441 mem : S4(read);
5442 5442 DECODE : S0; // any decoder for FPU PUSH
5443 5443 D0 : S0(2); // big decoder only
5444 5444 FPU : S4;
5445 5445 MEM : S3(2); // any mem
5446 5446 %}
5447 5447
5448 5448 // Float load constant
5449 5449 pipe_class fpu_reg_con(regDPR dst) %{
5450 5450 instruction_count(2);
5451 5451 dst : S5(write);
5452 5452 D0 : S0; // big decoder only for the load
5453 5453 DECODE : S1; // any decoder for FPU POP
5454 5454 FPU : S4;
5455 5455 MEM : S3; // any mem
5456 5456 %}
5457 5457
5458 5458 // Float load constant
5459 5459 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5460 5460 instruction_count(3);
5461 5461 dst : S5(write);
5462 5462 src : S3(read);
5463 5463 D0 : S0; // big decoder only for the load
5464 5464 DECODE : S1(2); // any decoder for FPU POP
5465 5465 FPU : S4;
5466 5466 MEM : S3; // any mem
5467 5467 %}
5468 5468
5469 5469 // UnConditional branch
5470 5470 pipe_class pipe_jmp( label labl ) %{
5471 5471 single_instruction;
5472 5472 BR : S3;
5473 5473 %}
5474 5474
5475 5475 // Conditional branch
5476 5476 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5477 5477 single_instruction;
5478 5478 cr : S1(read);
5479 5479 BR : S3;
5480 5480 %}
5481 5481
5482 5482 // Allocation idiom
5483 5483 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5484 5484 instruction_count(1); force_serialization;
5485 5485 fixed_latency(6);
5486 5486 heap_ptr : S3(read);
5487 5487 DECODE : S0(3);
5488 5488 D0 : S2;
5489 5489 MEM : S3;
5490 5490 ALU : S3(2);
5491 5491 dst : S5(write);
5492 5492 BR : S5;
5493 5493 %}
5494 5494
5495 5495 // Generic big/slow expanded idiom
5496 5496 pipe_class pipe_slow( ) %{
5497 5497 instruction_count(10); multiple_bundles; force_serialization;
5498 5498 fixed_latency(100);
5499 5499 D0 : S0(2);
5500 5500 MEM : S3(2);
5501 5501 %}
5502 5502
5503 5503 // The real do-nothing guy
5504 5504 pipe_class empty( ) %{
5505 5505 instruction_count(0);
5506 5506 %}
5507 5507
5508 5508 // Define the class for the Nop node
5509 5509 define %{
5510 5510 MachNop = empty;
5511 5511 %}
5512 5512
5513 5513 %}
5514 5514
5515 5515 //----------INSTRUCTIONS-------------------------------------------------------
5516 5516 //
5517 5517 // match -- States which machine-independent subtree may be replaced
5518 5518 // by this instruction.
5519 5519 // ins_cost -- The estimated cost of this instruction is used by instruction
5520 5520 // selection to identify a minimum cost tree of machine
5521 5521 // instructions that matches a tree of machine-independent
5522 5522 // instructions.
5523 5523 // format -- A string providing the disassembly for this instruction.
5524 5524 // The value of an instruction's operand may be inserted
5525 5525 // by referring to it with a '$' prefix.
5526 5526 // opcode -- Three instruction opcodes may be provided. These are referred
5527 5527 // to within an encode class as $primary, $secondary, and $tertiary
5528 5528 // respectively. The primary opcode is commonly used to
5529 5529 // indicate the type of machine instruction, while secondary
5530 5530 // and tertiary are often used for prefix options or addressing
5531 5531 // modes.
5532 5532 // ins_encode -- A list of encode classes with parameters. The encode class
5533 5533 // name must have been defined in an 'enc_class' specification
5534 5534 // in the encode section of the architecture description.
5535 5535
5536 5536 //----------BSWAP-Instruction--------------------------------------------------
5537 5537 instruct bytes_reverse_int(eRegI dst) %{
5538 5538 match(Set dst (ReverseBytesI dst));
5539 5539
5540 5540 format %{ "BSWAP $dst" %}
5541 5541 opcode(0x0F, 0xC8);
5542 5542 ins_encode( OpcP, OpcSReg(dst) );
5543 5543 ins_pipe( ialu_reg );
5544 5544 %}
5545 5545
5546 5546 instruct bytes_reverse_long(eRegL dst) %{
5547 5547 match(Set dst (ReverseBytesL dst));
↓ open down ↓ |
5547 lines elided |
↑ open up ↑ |
5548 5548
5549 5549 format %{ "BSWAP $dst.lo\n\t"
5550 5550 "BSWAP $dst.hi\n\t"
5551 5551 "XCHG $dst.lo $dst.hi" %}
5552 5552
5553 5553 ins_cost(125);
5554 5554 ins_encode( bswap_long_bytes(dst) );
5555 5555 ins_pipe( ialu_reg_reg);
5556 5556 %}
5557 5557
5558 -instruct bytes_reverse_unsigned_short(eRegI dst) %{
5558 +instruct bytes_reverse_unsigned_short(eRegI dst, eFlagsReg cr) %{
5559 5559 match(Set dst (ReverseBytesUS dst));
5560 + effect(KILL cr);
5560 5561
5561 5562 format %{ "BSWAP $dst\n\t"
5562 5563 "SHR $dst,16\n\t" %}
5563 5564 ins_encode %{
5564 5565 __ bswapl($dst$$Register);
5565 5566 __ shrl($dst$$Register, 16);
5566 5567 %}
5567 5568 ins_pipe( ialu_reg );
5568 5569 %}
5569 5570
5570 -instruct bytes_reverse_short(eRegI dst) %{
5571 +instruct bytes_reverse_short(eRegI dst, eFlagsReg cr) %{
5571 5572 match(Set dst (ReverseBytesS dst));
5573 + effect(KILL cr);
5572 5574
5573 5575 format %{ "BSWAP $dst\n\t"
5574 5576 "SAR $dst,16\n\t" %}
5575 5577 ins_encode %{
5576 5578 __ bswapl($dst$$Register);
5577 5579 __ sarl($dst$$Register, 16);
5578 5580 %}
5579 5581 ins_pipe( ialu_reg );
5580 5582 %}
5581 5583
5582 5584
5583 5585 //---------- Zeros Count Instructions ------------------------------------------
5584 5586
5585 5587 instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
5586 5588 predicate(UseCountLeadingZerosInstruction);
5587 5589 match(Set dst (CountLeadingZerosI src));
5588 5590 effect(KILL cr);
5589 5591
5590 5592 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %}
5591 5593 ins_encode %{
5592 5594 __ lzcntl($dst$$Register, $src$$Register);
5593 5595 %}
5594 5596 ins_pipe(ialu_reg);
5595 5597 %}
5596 5598
5597 5599 instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
5598 5600 predicate(!UseCountLeadingZerosInstruction);
5599 5601 match(Set dst (CountLeadingZerosI src));
5600 5602 effect(KILL cr);
5601 5603
5602 5604 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t"
5603 5605 "JNZ skip\n\t"
5604 5606 "MOV $dst, -1\n"
5605 5607 "skip:\n\t"
5606 5608 "NEG $dst\n\t"
5607 5609 "ADD $dst, 31" %}
5608 5610 ins_encode %{
5609 5611 Register Rdst = $dst$$Register;
5610 5612 Register Rsrc = $src$$Register;
5611 5613 Label skip;
5612 5614 __ bsrl(Rdst, Rsrc);
5613 5615 __ jccb(Assembler::notZero, skip);
5614 5616 __ movl(Rdst, -1);
5615 5617 __ bind(skip);
5616 5618 __ negl(Rdst);
5617 5619 __ addl(Rdst, BitsPerInt - 1);
5618 5620 %}
5619 5621 ins_pipe(ialu_reg);
5620 5622 %}
5621 5623
5622 5624 instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
5623 5625 predicate(UseCountLeadingZerosInstruction);
5624 5626 match(Set dst (CountLeadingZerosL src));
5625 5627 effect(TEMP dst, KILL cr);
5626 5628
5627 5629 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t"
5628 5630 "JNC done\n\t"
5629 5631 "LZCNT $dst, $src.lo\n\t"
5630 5632 "ADD $dst, 32\n"
5631 5633 "done:" %}
5632 5634 ins_encode %{
5633 5635 Register Rdst = $dst$$Register;
5634 5636 Register Rsrc = $src$$Register;
5635 5637 Label done;
5636 5638 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5637 5639 __ jccb(Assembler::carryClear, done);
5638 5640 __ lzcntl(Rdst, Rsrc);
5639 5641 __ addl(Rdst, BitsPerInt);
5640 5642 __ bind(done);
5641 5643 %}
5642 5644 ins_pipe(ialu_reg);
5643 5645 %}
5644 5646
5645 5647 instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
5646 5648 predicate(!UseCountLeadingZerosInstruction);
5647 5649 match(Set dst (CountLeadingZerosL src));
5648 5650 effect(TEMP dst, KILL cr);
5649 5651
5650 5652 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t"
5651 5653 "JZ msw_is_zero\n\t"
5652 5654 "ADD $dst, 32\n\t"
5653 5655 "JMP not_zero\n"
5654 5656 "msw_is_zero:\n\t"
5655 5657 "BSR $dst, $src.lo\n\t"
5656 5658 "JNZ not_zero\n\t"
5657 5659 "MOV $dst, -1\n"
5658 5660 "not_zero:\n\t"
5659 5661 "NEG $dst\n\t"
5660 5662 "ADD $dst, 63\n" %}
5661 5663 ins_encode %{
5662 5664 Register Rdst = $dst$$Register;
5663 5665 Register Rsrc = $src$$Register;
5664 5666 Label msw_is_zero;
5665 5667 Label not_zero;
5666 5668 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5667 5669 __ jccb(Assembler::zero, msw_is_zero);
5668 5670 __ addl(Rdst, BitsPerInt);
5669 5671 __ jmpb(not_zero);
5670 5672 __ bind(msw_is_zero);
5671 5673 __ bsrl(Rdst, Rsrc);
5672 5674 __ jccb(Assembler::notZero, not_zero);
5673 5675 __ movl(Rdst, -1);
5674 5676 __ bind(not_zero);
5675 5677 __ negl(Rdst);
5676 5678 __ addl(Rdst, BitsPerLong - 1);
5677 5679 %}
5678 5680 ins_pipe(ialu_reg);
5679 5681 %}
5680 5682
5681 5683 instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
5682 5684 match(Set dst (CountTrailingZerosI src));
5683 5685 effect(KILL cr);
5684 5686
5685 5687 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
5686 5688 "JNZ done\n\t"
5687 5689 "MOV $dst, 32\n"
5688 5690 "done:" %}
5689 5691 ins_encode %{
5690 5692 Register Rdst = $dst$$Register;
5691 5693 Label done;
5692 5694 __ bsfl(Rdst, $src$$Register);
5693 5695 __ jccb(Assembler::notZero, done);
5694 5696 __ movl(Rdst, BitsPerInt);
5695 5697 __ bind(done);
5696 5698 %}
5697 5699 ins_pipe(ialu_reg);
5698 5700 %}
5699 5701
5700 5702 instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
5701 5703 match(Set dst (CountTrailingZerosL src));
5702 5704 effect(TEMP dst, KILL cr);
5703 5705
5704 5706 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
5705 5707 "JNZ done\n\t"
5706 5708 "BSF $dst, $src.hi\n\t"
5707 5709 "JNZ msw_not_zero\n\t"
5708 5710 "MOV $dst, 32\n"
5709 5711 "msw_not_zero:\n\t"
5710 5712 "ADD $dst, 32\n"
5711 5713 "done:" %}
5712 5714 ins_encode %{
5713 5715 Register Rdst = $dst$$Register;
5714 5716 Register Rsrc = $src$$Register;
5715 5717 Label msw_not_zero;
5716 5718 Label done;
5717 5719 __ bsfl(Rdst, Rsrc);
5718 5720 __ jccb(Assembler::notZero, done);
5719 5721 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5720 5722 __ jccb(Assembler::notZero, msw_not_zero);
5721 5723 __ movl(Rdst, BitsPerInt);
↓ open down ↓ |
140 lines elided |
↑ open up ↑ |
5722 5724 __ bind(msw_not_zero);
5723 5725 __ addl(Rdst, BitsPerInt);
5724 5726 __ bind(done);
5725 5727 %}
5726 5728 ins_pipe(ialu_reg);
5727 5729 %}
5728 5730
5729 5731
5730 5732 //---------- Population Count Instructions -------------------------------------
5731 5733
5732 -instruct popCountI(eRegI dst, eRegI src) %{
5734 +instruct popCountI(eRegI dst, eRegI src, eFlagsReg cr) %{
5733 5735 predicate(UsePopCountInstruction);
5734 5736 match(Set dst (PopCountI src));
5737 + effect(KILL cr);
5735 5738
5736 5739 format %{ "POPCNT $dst, $src" %}
5737 5740 ins_encode %{
5738 5741 __ popcntl($dst$$Register, $src$$Register);
5739 5742 %}
5740 5743 ins_pipe(ialu_reg);
5741 5744 %}
5742 5745
5743 -instruct popCountI_mem(eRegI dst, memory mem) %{
5746 +instruct popCountI_mem(eRegI dst, memory mem, eFlagsReg cr) %{
5744 5747 predicate(UsePopCountInstruction);
5745 5748 match(Set dst (PopCountI (LoadI mem)));
5749 + effect(KILL cr);
5746 5750
5747 5751 format %{ "POPCNT $dst, $mem" %}
5748 5752 ins_encode %{
5749 5753 __ popcntl($dst$$Register, $mem$$Address);
5750 5754 %}
5751 5755 ins_pipe(ialu_reg);
5752 5756 %}
5753 5757
5754 5758 // Note: Long.bitCount(long) returns an int.
5755 5759 instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
5756 5760 predicate(UsePopCountInstruction);
5757 5761 match(Set dst (PopCountL src));
5758 5762 effect(KILL cr, TEMP tmp, TEMP dst);
5759 5763
5760 5764 format %{ "POPCNT $dst, $src.lo\n\t"
5761 5765 "POPCNT $tmp, $src.hi\n\t"
5762 5766 "ADD $dst, $tmp" %}
5763 5767 ins_encode %{
5764 5768 __ popcntl($dst$$Register, $src$$Register);
5765 5769 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5766 5770 __ addl($dst$$Register, $tmp$$Register);
5767 5771 %}
5768 5772 ins_pipe(ialu_reg);
5769 5773 %}
5770 5774
5771 5775 // Note: Long.bitCount(long) returns an int.
5772 5776 instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
5773 5777 predicate(UsePopCountInstruction);
5774 5778 match(Set dst (PopCountL (LoadL mem)));
5775 5779 effect(KILL cr, TEMP tmp, TEMP dst);
5776 5780
5777 5781 format %{ "POPCNT $dst, $mem\n\t"
5778 5782 "POPCNT $tmp, $mem+4\n\t"
5779 5783 "ADD $dst, $tmp" %}
5780 5784 ins_encode %{
5781 5785 //__ popcntl($dst$$Register, $mem$$Address$$first);
5782 5786 //__ popcntl($tmp$$Register, $mem$$Address$$second);
5783 5787 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
5784 5788 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
5785 5789 __ addl($dst$$Register, $tmp$$Register);
5786 5790 %}
5787 5791 ins_pipe(ialu_reg);
5788 5792 %}
5789 5793
5790 5794
5791 5795 //----------Load/Store/Move Instructions---------------------------------------
5792 5796 //----------Load Instructions--------------------------------------------------
5793 5797 // Load Byte (8bit signed)
5794 5798 instruct loadB(xRegI dst, memory mem) %{
5795 5799 match(Set dst (LoadB mem));
5796 5800
5797 5801 ins_cost(125);
5798 5802 format %{ "MOVSX8 $dst,$mem\t# byte" %}
5799 5803
5800 5804 ins_encode %{
5801 5805 __ movsbl($dst$$Register, $mem$$Address);
5802 5806 %}
5803 5807
5804 5808 ins_pipe(ialu_reg_mem);
5805 5809 %}
5806 5810
5807 5811 // Load Byte (8bit signed) into Long Register
5808 5812 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5809 5813 match(Set dst (ConvI2L (LoadB mem)));
5810 5814 effect(KILL cr);
5811 5815
5812 5816 ins_cost(375);
5813 5817 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5814 5818 "MOV $dst.hi,$dst.lo\n\t"
5815 5819 "SAR $dst.hi,7" %}
5816 5820
5817 5821 ins_encode %{
5818 5822 __ movsbl($dst$$Register, $mem$$Address);
5819 5823 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5820 5824 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5821 5825 %}
5822 5826
5823 5827 ins_pipe(ialu_reg_mem);
5824 5828 %}
5825 5829
5826 5830 // Load Unsigned Byte (8bit UNsigned)
5827 5831 instruct loadUB(xRegI dst, memory mem) %{
5828 5832 match(Set dst (LoadUB mem));
5829 5833
5830 5834 ins_cost(125);
5831 5835 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5832 5836
5833 5837 ins_encode %{
5834 5838 __ movzbl($dst$$Register, $mem$$Address);
5835 5839 %}
5836 5840
5837 5841 ins_pipe(ialu_reg_mem);
5838 5842 %}
5839 5843
5840 5844 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5841 5845 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5842 5846 match(Set dst (ConvI2L (LoadUB mem)));
5843 5847 effect(KILL cr);
5844 5848
5845 5849 ins_cost(250);
5846 5850 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5847 5851 "XOR $dst.hi,$dst.hi" %}
5848 5852
5849 5853 ins_encode %{
5850 5854 Register Rdst = $dst$$Register;
5851 5855 __ movzbl(Rdst, $mem$$Address);
5852 5856 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5853 5857 %}
5854 5858
5855 5859 ins_pipe(ialu_reg_mem);
5856 5860 %}
5857 5861
5858 5862 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5859 5863 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
5860 5864 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5861 5865 effect(KILL cr);
5862 5866
5863 5867 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
5864 5868 "XOR $dst.hi,$dst.hi\n\t"
5865 5869 "AND $dst.lo,$mask" %}
5866 5870 ins_encode %{
5867 5871 Register Rdst = $dst$$Register;
5868 5872 __ movzbl(Rdst, $mem$$Address);
5869 5873 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5870 5874 __ andl(Rdst, $mask$$constant);
5871 5875 %}
5872 5876 ins_pipe(ialu_reg_mem);
5873 5877 %}
5874 5878
5875 5879 // Load Short (16bit signed)
5876 5880 instruct loadS(eRegI dst, memory mem) %{
5877 5881 match(Set dst (LoadS mem));
5878 5882
5879 5883 ins_cost(125);
5880 5884 format %{ "MOVSX $dst,$mem\t# short" %}
5881 5885
5882 5886 ins_encode %{
5883 5887 __ movswl($dst$$Register, $mem$$Address);
5884 5888 %}
5885 5889
5886 5890 ins_pipe(ialu_reg_mem);
5887 5891 %}
5888 5892
5889 5893 // Load Short (16 bit signed) to Byte (8 bit signed)
5890 5894 instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
5891 5895 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5892 5896
5893 5897 ins_cost(125);
5894 5898 format %{ "MOVSX $dst, $mem\t# short -> byte" %}
5895 5899 ins_encode %{
5896 5900 __ movsbl($dst$$Register, $mem$$Address);
5897 5901 %}
5898 5902 ins_pipe(ialu_reg_mem);
5899 5903 %}
5900 5904
5901 5905 // Load Short (16bit signed) into Long Register
5902 5906 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5903 5907 match(Set dst (ConvI2L (LoadS mem)));
5904 5908 effect(KILL cr);
5905 5909
5906 5910 ins_cost(375);
5907 5911 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t"
5908 5912 "MOV $dst.hi,$dst.lo\n\t"
5909 5913 "SAR $dst.hi,15" %}
5910 5914
5911 5915 ins_encode %{
5912 5916 __ movswl($dst$$Register, $mem$$Address);
5913 5917 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5914 5918 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5915 5919 %}
5916 5920
5917 5921 ins_pipe(ialu_reg_mem);
5918 5922 %}
5919 5923
5920 5924 // Load Unsigned Short/Char (16bit unsigned)
5921 5925 instruct loadUS(eRegI dst, memory mem) %{
5922 5926 match(Set dst (LoadUS mem));
5923 5927
5924 5928 ins_cost(125);
5925 5929 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %}
5926 5930
5927 5931 ins_encode %{
5928 5932 __ movzwl($dst$$Register, $mem$$Address);
5929 5933 %}
5930 5934
5931 5935 ins_pipe(ialu_reg_mem);
5932 5936 %}
5933 5937
5934 5938 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5935 5939 instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
5936 5940 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5937 5941
5938 5942 ins_cost(125);
5939 5943 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %}
5940 5944 ins_encode %{
5941 5945 __ movsbl($dst$$Register, $mem$$Address);
5942 5946 %}
5943 5947 ins_pipe(ialu_reg_mem);
5944 5948 %}
5945 5949
5946 5950 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5947 5951 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5948 5952 match(Set dst (ConvI2L (LoadUS mem)));
5949 5953 effect(KILL cr);
5950 5954
5951 5955 ins_cost(250);
5952 5956 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t"
5953 5957 "XOR $dst.hi,$dst.hi" %}
5954 5958
5955 5959 ins_encode %{
5956 5960 __ movzwl($dst$$Register, $mem$$Address);
5957 5961 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5958 5962 %}
5959 5963
5960 5964 ins_pipe(ialu_reg_mem);
5961 5965 %}
5962 5966
5963 5967 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5964 5968 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5965 5969 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5966 5970 effect(KILL cr);
5967 5971
5968 5972 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5969 5973 "XOR $dst.hi,$dst.hi" %}
5970 5974 ins_encode %{
5971 5975 Register Rdst = $dst$$Register;
5972 5976 __ movzbl(Rdst, $mem$$Address);
5973 5977 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5974 5978 %}
5975 5979 ins_pipe(ialu_reg_mem);
5976 5980 %}
5977 5981
5978 5982 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
5979 5983 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
5980 5984 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5981 5985 effect(KILL cr);
5982 5986
5983 5987 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5984 5988 "XOR $dst.hi,$dst.hi\n\t"
5985 5989 "AND $dst.lo,$mask" %}
5986 5990 ins_encode %{
5987 5991 Register Rdst = $dst$$Register;
5988 5992 __ movzwl(Rdst, $mem$$Address);
5989 5993 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5990 5994 __ andl(Rdst, $mask$$constant);
5991 5995 %}
5992 5996 ins_pipe(ialu_reg_mem);
5993 5997 %}
5994 5998
5995 5999 // Load Integer
5996 6000 instruct loadI(eRegI dst, memory mem) %{
5997 6001 match(Set dst (LoadI mem));
5998 6002
5999 6003 ins_cost(125);
6000 6004 format %{ "MOV $dst,$mem\t# int" %}
6001 6005
6002 6006 ins_encode %{
6003 6007 __ movl($dst$$Register, $mem$$Address);
6004 6008 %}
6005 6009
6006 6010 ins_pipe(ialu_reg_mem);
6007 6011 %}
6008 6012
6009 6013 // Load Integer (32 bit signed) to Byte (8 bit signed)
6010 6014 instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6011 6015 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6012 6016
6013 6017 ins_cost(125);
6014 6018 format %{ "MOVSX $dst, $mem\t# int -> byte" %}
6015 6019 ins_encode %{
6016 6020 __ movsbl($dst$$Register, $mem$$Address);
6017 6021 %}
6018 6022 ins_pipe(ialu_reg_mem);
6019 6023 %}
6020 6024
6021 6025 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6022 6026 instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6023 6027 match(Set dst (AndI (LoadI mem) mask));
6024 6028
6025 6029 ins_cost(125);
6026 6030 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %}
6027 6031 ins_encode %{
6028 6032 __ movzbl($dst$$Register, $mem$$Address);
6029 6033 %}
6030 6034 ins_pipe(ialu_reg_mem);
6031 6035 %}
6032 6036
6033 6037 // Load Integer (32 bit signed) to Short (16 bit signed)
6034 6038 instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6035 6039 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6036 6040
6037 6041 ins_cost(125);
6038 6042 format %{ "MOVSX $dst, $mem\t# int -> short" %}
6039 6043 ins_encode %{
6040 6044 __ movswl($dst$$Register, $mem$$Address);
6041 6045 %}
6042 6046 ins_pipe(ialu_reg_mem);
6043 6047 %}
6044 6048
6045 6049 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6046 6050 instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6047 6051 match(Set dst (AndI (LoadI mem) mask));
6048 6052
6049 6053 ins_cost(125);
6050 6054 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %}
6051 6055 ins_encode %{
6052 6056 __ movzwl($dst$$Register, $mem$$Address);
6053 6057 %}
6054 6058 ins_pipe(ialu_reg_mem);
6055 6059 %}
6056 6060
6057 6061 // Load Integer into Long Register
6058 6062 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6059 6063 match(Set dst (ConvI2L (LoadI mem)));
6060 6064 effect(KILL cr);
6061 6065
6062 6066 ins_cost(375);
6063 6067 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t"
6064 6068 "MOV $dst.hi,$dst.lo\n\t"
6065 6069 "SAR $dst.hi,31" %}
6066 6070
6067 6071 ins_encode %{
6068 6072 __ movl($dst$$Register, $mem$$Address);
6069 6073 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6070 6074 __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6071 6075 %}
6072 6076
6073 6077 ins_pipe(ialu_reg_mem);
6074 6078 %}
6075 6079
6076 6080 // Load Integer with mask 0xFF into Long Register
6077 6081 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6078 6082 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6079 6083 effect(KILL cr);
6080 6084
6081 6085 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6082 6086 "XOR $dst.hi,$dst.hi" %}
6083 6087 ins_encode %{
6084 6088 Register Rdst = $dst$$Register;
6085 6089 __ movzbl(Rdst, $mem$$Address);
6086 6090 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6087 6091 %}
6088 6092 ins_pipe(ialu_reg_mem);
6089 6093 %}
6090 6094
6091 6095 // Load Integer with mask 0xFFFF into Long Register
6092 6096 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6093 6097 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6094 6098 effect(KILL cr);
6095 6099
6096 6100 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6097 6101 "XOR $dst.hi,$dst.hi" %}
6098 6102 ins_encode %{
6099 6103 Register Rdst = $dst$$Register;
6100 6104 __ movzwl(Rdst, $mem$$Address);
6101 6105 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6102 6106 %}
6103 6107 ins_pipe(ialu_reg_mem);
6104 6108 %}
6105 6109
6106 6110 // Load Integer with 32-bit mask into Long Register
6107 6111 instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6108 6112 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6109 6113 effect(KILL cr);
6110 6114
6111 6115 format %{ "MOV $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6112 6116 "XOR $dst.hi,$dst.hi\n\t"
6113 6117 "AND $dst.lo,$mask" %}
6114 6118 ins_encode %{
6115 6119 Register Rdst = $dst$$Register;
6116 6120 __ movl(Rdst, $mem$$Address);
6117 6121 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6118 6122 __ andl(Rdst, $mask$$constant);
6119 6123 %}
6120 6124 ins_pipe(ialu_reg_mem);
6121 6125 %}
6122 6126
6123 6127 // Load Unsigned Integer into Long Register
6124 6128 instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6125 6129 match(Set dst (LoadUI2L mem));
6126 6130 effect(KILL cr);
6127 6131
6128 6132 ins_cost(250);
6129 6133 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t"
6130 6134 "XOR $dst.hi,$dst.hi" %}
6131 6135
6132 6136 ins_encode %{
6133 6137 __ movl($dst$$Register, $mem$$Address);
6134 6138 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6135 6139 %}
6136 6140
6137 6141 ins_pipe(ialu_reg_mem);
6138 6142 %}
6139 6143
6140 6144 // Load Long. Cannot clobber address while loading, so restrict address
6141 6145 // register to ESI
6142 6146 instruct loadL(eRegL dst, load_long_memory mem) %{
6143 6147 predicate(!((LoadLNode*)n)->require_atomic_access());
6144 6148 match(Set dst (LoadL mem));
6145 6149
6146 6150 ins_cost(250);
6147 6151 format %{ "MOV $dst.lo,$mem\t# long\n\t"
6148 6152 "MOV $dst.hi,$mem+4" %}
6149 6153
6150 6154 ins_encode %{
6151 6155 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6152 6156 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6153 6157 __ movl($dst$$Register, Amemlo);
6154 6158 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6155 6159 %}
6156 6160
6157 6161 ins_pipe(ialu_reg_long_mem);
6158 6162 %}
6159 6163
6160 6164 // Volatile Load Long. Must be atomic, so do 64-bit FILD
6161 6165 // then store it down to the stack and reload on the int
6162 6166 // side.
6163 6167 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6164 6168 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6165 6169 match(Set dst (LoadL mem));
6166 6170
6167 6171 ins_cost(200);
6168 6172 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
6169 6173 "FISTp $dst" %}
6170 6174 ins_encode(enc_loadL_volatile(mem,dst));
6171 6175 ins_pipe( fpu_reg_mem );
6172 6176 %}
6173 6177
6174 6178 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
6175 6179 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6176 6180 match(Set dst (LoadL mem));
6177 6181 effect(TEMP tmp);
6178 6182 ins_cost(180);
6179 6183 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6180 6184 "MOVSD $dst,$tmp" %}
6181 6185 ins_encode %{
6182 6186 __ movdbl($tmp$$XMMRegister, $mem$$Address);
6183 6187 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
6184 6188 %}
6185 6189 ins_pipe( pipe_slow );
6186 6190 %}
6187 6191
6188 6192 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
6189 6193 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6190 6194 match(Set dst (LoadL mem));
6191 6195 effect(TEMP tmp);
6192 6196 ins_cost(160);
6193 6197 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6194 6198 "MOVD $dst.lo,$tmp\n\t"
6195 6199 "PSRLQ $tmp,32\n\t"
6196 6200 "MOVD $dst.hi,$tmp" %}
6197 6201 ins_encode %{
6198 6202 __ movdbl($tmp$$XMMRegister, $mem$$Address);
6199 6203 __ movdl($dst$$Register, $tmp$$XMMRegister);
6200 6204 __ psrlq($tmp$$XMMRegister, 32);
6201 6205 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
6202 6206 %}
6203 6207 ins_pipe( pipe_slow );
6204 6208 %}
6205 6209
6206 6210 // Load Range
6207 6211 instruct loadRange(eRegI dst, memory mem) %{
6208 6212 match(Set dst (LoadRange mem));
6209 6213
6210 6214 ins_cost(125);
6211 6215 format %{ "MOV $dst,$mem" %}
6212 6216 opcode(0x8B);
6213 6217 ins_encode( OpcP, RegMem(dst,mem));
6214 6218 ins_pipe( ialu_reg_mem );
6215 6219 %}
6216 6220
6217 6221
6218 6222 // Load Pointer
6219 6223 instruct loadP(eRegP dst, memory mem) %{
6220 6224 match(Set dst (LoadP mem));
6221 6225
6222 6226 ins_cost(125);
6223 6227 format %{ "MOV $dst,$mem" %}
6224 6228 opcode(0x8B);
6225 6229 ins_encode( OpcP, RegMem(dst,mem));
6226 6230 ins_pipe( ialu_reg_mem );
6227 6231 %}
6228 6232
6229 6233 // Load Klass Pointer
6230 6234 instruct loadKlass(eRegP dst, memory mem) %{
6231 6235 match(Set dst (LoadKlass mem));
6232 6236
6233 6237 ins_cost(125);
6234 6238 format %{ "MOV $dst,$mem" %}
6235 6239 opcode(0x8B);
6236 6240 ins_encode( OpcP, RegMem(dst,mem));
6237 6241 ins_pipe( ialu_reg_mem );
6238 6242 %}
6239 6243
6240 6244 // Load Double
6241 6245 instruct loadDPR(regDPR dst, memory mem) %{
6242 6246 predicate(UseSSE<=1);
6243 6247 match(Set dst (LoadD mem));
6244 6248
6245 6249 ins_cost(150);
6246 6250 format %{ "FLD_D ST,$mem\n\t"
6247 6251 "FSTP $dst" %}
6248 6252 opcode(0xDD); /* DD /0 */
6249 6253 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6250 6254 Pop_Reg_DPR(dst) );
6251 6255 ins_pipe( fpu_reg_mem );
6252 6256 %}
6253 6257
6254 6258 // Load Double to XMM
6255 6259 instruct loadD(regD dst, memory mem) %{
6256 6260 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6257 6261 match(Set dst (LoadD mem));
6258 6262 ins_cost(145);
6259 6263 format %{ "MOVSD $dst,$mem" %}
6260 6264 ins_encode %{
6261 6265 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6262 6266 %}
6263 6267 ins_pipe( pipe_slow );
6264 6268 %}
6265 6269
6266 6270 instruct loadD_partial(regD dst, memory mem) %{
6267 6271 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6268 6272 match(Set dst (LoadD mem));
6269 6273 ins_cost(145);
6270 6274 format %{ "MOVLPD $dst,$mem" %}
6271 6275 ins_encode %{
6272 6276 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6273 6277 %}
6274 6278 ins_pipe( pipe_slow );
6275 6279 %}
6276 6280
6277 6281 // Load to XMM register (single-precision floating point)
6278 6282 // MOVSS instruction
6279 6283 instruct loadF(regF dst, memory mem) %{
6280 6284 predicate(UseSSE>=1);
6281 6285 match(Set dst (LoadF mem));
6282 6286 ins_cost(145);
6283 6287 format %{ "MOVSS $dst,$mem" %}
6284 6288 ins_encode %{
6285 6289 __ movflt ($dst$$XMMRegister, $mem$$Address);
6286 6290 %}
6287 6291 ins_pipe( pipe_slow );
6288 6292 %}
6289 6293
6290 6294 // Load Float
6291 6295 instruct loadFPR(regFPR dst, memory mem) %{
6292 6296 predicate(UseSSE==0);
6293 6297 match(Set dst (LoadF mem));
6294 6298
6295 6299 ins_cost(150);
6296 6300 format %{ "FLD_S ST,$mem\n\t"
6297 6301 "FSTP $dst" %}
6298 6302 opcode(0xD9); /* D9 /0 */
6299 6303 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6300 6304 Pop_Reg_FPR(dst) );
6301 6305 ins_pipe( fpu_reg_mem );
6302 6306 %}
6303 6307
6304 6308 // Load Aligned Packed Byte to XMM register
6305 6309 instruct loadA8B(regD dst, memory mem) %{
6306 6310 predicate(UseSSE>=1);
6307 6311 match(Set dst (Load8B mem));
6308 6312 ins_cost(125);
6309 6313 format %{ "MOVQ $dst,$mem\t! packed8B" %}
6310 6314 ins_encode %{
6311 6315 __ movq($dst$$XMMRegister, $mem$$Address);
6312 6316 %}
6313 6317 ins_pipe( pipe_slow );
6314 6318 %}
6315 6319
6316 6320 // Load Aligned Packed Short to XMM register
6317 6321 instruct loadA4S(regD dst, memory mem) %{
6318 6322 predicate(UseSSE>=1);
6319 6323 match(Set dst (Load4S mem));
6320 6324 ins_cost(125);
6321 6325 format %{ "MOVQ $dst,$mem\t! packed4S" %}
6322 6326 ins_encode %{
6323 6327 __ movq($dst$$XMMRegister, $mem$$Address);
6324 6328 %}
6325 6329 ins_pipe( pipe_slow );
6326 6330 %}
6327 6331
6328 6332 // Load Aligned Packed Char to XMM register
6329 6333 instruct loadA4C(regD dst, memory mem) %{
6330 6334 predicate(UseSSE>=1);
6331 6335 match(Set dst (Load4C mem));
6332 6336 ins_cost(125);
6333 6337 format %{ "MOVQ $dst,$mem\t! packed4C" %}
6334 6338 ins_encode %{
6335 6339 __ movq($dst$$XMMRegister, $mem$$Address);
6336 6340 %}
6337 6341 ins_pipe( pipe_slow );
6338 6342 %}
6339 6343
6340 6344 // Load Aligned Packed Integer to XMM register
6341 6345 instruct load2IU(regD dst, memory mem) %{
6342 6346 predicate(UseSSE>=1);
6343 6347 match(Set dst (Load2I mem));
6344 6348 ins_cost(125);
6345 6349 format %{ "MOVQ $dst,$mem\t! packed2I" %}
6346 6350 ins_encode %{
6347 6351 __ movq($dst$$XMMRegister, $mem$$Address);
6348 6352 %}
6349 6353 ins_pipe( pipe_slow );
6350 6354 %}
6351 6355
6352 6356 // Load Aligned Packed Single to XMM
6353 6357 instruct loadA2F(regD dst, memory mem) %{
6354 6358 predicate(UseSSE>=1);
6355 6359 match(Set dst (Load2F mem));
6356 6360 ins_cost(145);
6357 6361 format %{ "MOVQ $dst,$mem\t! packed2F" %}
6358 6362 ins_encode %{
6359 6363 __ movq($dst$$XMMRegister, $mem$$Address);
6360 6364 %}
6361 6365 ins_pipe( pipe_slow );
6362 6366 %}
6363 6367
6364 6368 // Load Effective Address
6365 6369 instruct leaP8(eRegP dst, indOffset8 mem) %{
6366 6370 match(Set dst mem);
6367 6371
6368 6372 ins_cost(110);
6369 6373 format %{ "LEA $dst,$mem" %}
6370 6374 opcode(0x8D);
6371 6375 ins_encode( OpcP, RegMem(dst,mem));
6372 6376 ins_pipe( ialu_reg_reg_fat );
6373 6377 %}
6374 6378
6375 6379 instruct leaP32(eRegP dst, indOffset32 mem) %{
6376 6380 match(Set dst mem);
6377 6381
6378 6382 ins_cost(110);
6379 6383 format %{ "LEA $dst,$mem" %}
6380 6384 opcode(0x8D);
6381 6385 ins_encode( OpcP, RegMem(dst,mem));
6382 6386 ins_pipe( ialu_reg_reg_fat );
6383 6387 %}
6384 6388
6385 6389 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6386 6390 match(Set dst mem);
6387 6391
6388 6392 ins_cost(110);
6389 6393 format %{ "LEA $dst,$mem" %}
6390 6394 opcode(0x8D);
6391 6395 ins_encode( OpcP, RegMem(dst,mem));
6392 6396 ins_pipe( ialu_reg_reg_fat );
6393 6397 %}
6394 6398
6395 6399 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6396 6400 match(Set dst mem);
6397 6401
6398 6402 ins_cost(110);
6399 6403 format %{ "LEA $dst,$mem" %}
6400 6404 opcode(0x8D);
6401 6405 ins_encode( OpcP, RegMem(dst,mem));
6402 6406 ins_pipe( ialu_reg_reg_fat );
6403 6407 %}
6404 6408
6405 6409 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6406 6410 match(Set dst mem);
6407 6411
6408 6412 ins_cost(110);
6409 6413 format %{ "LEA $dst,$mem" %}
6410 6414 opcode(0x8D);
6411 6415 ins_encode( OpcP, RegMem(dst,mem));
6412 6416 ins_pipe( ialu_reg_reg_fat );
6413 6417 %}
6414 6418
6415 6419 // Load Constant
6416 6420 instruct loadConI(eRegI dst, immI src) %{
6417 6421 match(Set dst src);
6418 6422
6419 6423 format %{ "MOV $dst,$src" %}
6420 6424 ins_encode( LdImmI(dst, src) );
6421 6425 ins_pipe( ialu_reg_fat );
6422 6426 %}
6423 6427
6424 6428 // Load Constant zero
6425 6429 instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
6426 6430 match(Set dst src);
6427 6431 effect(KILL cr);
6428 6432
6429 6433 ins_cost(50);
6430 6434 format %{ "XOR $dst,$dst" %}
6431 6435 opcode(0x33); /* + rd */
6432 6436 ins_encode( OpcP, RegReg( dst, dst ) );
6433 6437 ins_pipe( ialu_reg );
6434 6438 %}
6435 6439
6436 6440 instruct loadConP(eRegP dst, immP src) %{
6437 6441 match(Set dst src);
6438 6442
6439 6443 format %{ "MOV $dst,$src" %}
6440 6444 opcode(0xB8); /* + rd */
6441 6445 ins_encode( LdImmP(dst, src) );
6442 6446 ins_pipe( ialu_reg_fat );
6443 6447 %}
6444 6448
6445 6449 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6446 6450 match(Set dst src);
6447 6451 effect(KILL cr);
6448 6452 ins_cost(200);
6449 6453 format %{ "MOV $dst.lo,$src.lo\n\t"
6450 6454 "MOV $dst.hi,$src.hi" %}
6451 6455 opcode(0xB8);
6452 6456 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6453 6457 ins_pipe( ialu_reg_long_fat );
6454 6458 %}
6455 6459
6456 6460 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6457 6461 match(Set dst src);
6458 6462 effect(KILL cr);
6459 6463 ins_cost(150);
6460 6464 format %{ "XOR $dst.lo,$dst.lo\n\t"
6461 6465 "XOR $dst.hi,$dst.hi" %}
6462 6466 opcode(0x33,0x33);
6463 6467 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6464 6468 ins_pipe( ialu_reg_long );
6465 6469 %}
6466 6470
6467 6471 // The instruction usage is guarded by predicate in operand immFPR().
6468 6472 instruct loadConFPR(regFPR dst, immFPR con) %{
6469 6473 match(Set dst con);
6470 6474 ins_cost(125);
6471 6475 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6472 6476 "FSTP $dst" %}
6473 6477 ins_encode %{
6474 6478 __ fld_s($constantaddress($con));
6475 6479 __ fstp_d($dst$$reg);
6476 6480 %}
6477 6481 ins_pipe(fpu_reg_con);
6478 6482 %}
6479 6483
6480 6484 // The instruction usage is guarded by predicate in operand immFPR0().
6481 6485 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6482 6486 match(Set dst con);
6483 6487 ins_cost(125);
6484 6488 format %{ "FLDZ ST\n\t"
6485 6489 "FSTP $dst" %}
6486 6490 ins_encode %{
6487 6491 __ fldz();
6488 6492 __ fstp_d($dst$$reg);
6489 6493 %}
6490 6494 ins_pipe(fpu_reg_con);
6491 6495 %}
6492 6496
6493 6497 // The instruction usage is guarded by predicate in operand immFPR1().
6494 6498 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6495 6499 match(Set dst con);
6496 6500 ins_cost(125);
6497 6501 format %{ "FLD1 ST\n\t"
6498 6502 "FSTP $dst" %}
6499 6503 ins_encode %{
6500 6504 __ fld1();
6501 6505 __ fstp_d($dst$$reg);
6502 6506 %}
6503 6507 ins_pipe(fpu_reg_con);
6504 6508 %}
6505 6509
6506 6510 // The instruction usage is guarded by predicate in operand immF().
6507 6511 instruct loadConF(regF dst, immF con) %{
6508 6512 match(Set dst con);
6509 6513 ins_cost(125);
6510 6514 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6511 6515 ins_encode %{
6512 6516 __ movflt($dst$$XMMRegister, $constantaddress($con));
6513 6517 %}
6514 6518 ins_pipe(pipe_slow);
6515 6519 %}
6516 6520
6517 6521 // The instruction usage is guarded by predicate in operand immF0().
6518 6522 instruct loadConF0(regF dst, immF0 src) %{
6519 6523 match(Set dst src);
6520 6524 ins_cost(100);
6521 6525 format %{ "XORPS $dst,$dst\t# float 0.0" %}
6522 6526 ins_encode %{
6523 6527 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6524 6528 %}
6525 6529 ins_pipe(pipe_slow);
6526 6530 %}
6527 6531
6528 6532 // The instruction usage is guarded by predicate in operand immDPR().
6529 6533 instruct loadConDPR(regDPR dst, immDPR con) %{
6530 6534 match(Set dst con);
6531 6535 ins_cost(125);
6532 6536
6533 6537 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6534 6538 "FSTP $dst" %}
6535 6539 ins_encode %{
6536 6540 __ fld_d($constantaddress($con));
6537 6541 __ fstp_d($dst$$reg);
6538 6542 %}
6539 6543 ins_pipe(fpu_reg_con);
6540 6544 %}
6541 6545
6542 6546 // The instruction usage is guarded by predicate in operand immDPR0().
6543 6547 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6544 6548 match(Set dst con);
6545 6549 ins_cost(125);
6546 6550
6547 6551 format %{ "FLDZ ST\n\t"
6548 6552 "FSTP $dst" %}
6549 6553 ins_encode %{
6550 6554 __ fldz();
6551 6555 __ fstp_d($dst$$reg);
6552 6556 %}
6553 6557 ins_pipe(fpu_reg_con);
6554 6558 %}
6555 6559
6556 6560 // The instruction usage is guarded by predicate in operand immDPR1().
6557 6561 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6558 6562 match(Set dst con);
6559 6563 ins_cost(125);
6560 6564
6561 6565 format %{ "FLD1 ST\n\t"
6562 6566 "FSTP $dst" %}
6563 6567 ins_encode %{
6564 6568 __ fld1();
6565 6569 __ fstp_d($dst$$reg);
6566 6570 %}
6567 6571 ins_pipe(fpu_reg_con);
6568 6572 %}
6569 6573
6570 6574 // The instruction usage is guarded by predicate in operand immD().
6571 6575 instruct loadConD(regD dst, immD con) %{
6572 6576 match(Set dst con);
6573 6577 ins_cost(125);
6574 6578 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6575 6579 ins_encode %{
6576 6580 __ movdbl($dst$$XMMRegister, $constantaddress($con));
6577 6581 %}
6578 6582 ins_pipe(pipe_slow);
6579 6583 %}
6580 6584
6581 6585 // The instruction usage is guarded by predicate in operand immD0().
6582 6586 instruct loadConD0(regD dst, immD0 src) %{
6583 6587 match(Set dst src);
6584 6588 ins_cost(100);
6585 6589 format %{ "XORPD $dst,$dst\t# double 0.0" %}
6586 6590 ins_encode %{
6587 6591 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6588 6592 %}
6589 6593 ins_pipe( pipe_slow );
6590 6594 %}
6591 6595
6592 6596 // Load Stack Slot
6593 6597 instruct loadSSI(eRegI dst, stackSlotI src) %{
6594 6598 match(Set dst src);
6595 6599 ins_cost(125);
6596 6600
6597 6601 format %{ "MOV $dst,$src" %}
6598 6602 opcode(0x8B);
6599 6603 ins_encode( OpcP, RegMem(dst,src));
6600 6604 ins_pipe( ialu_reg_mem );
6601 6605 %}
6602 6606
6603 6607 instruct loadSSL(eRegL dst, stackSlotL src) %{
6604 6608 match(Set dst src);
6605 6609
6606 6610 ins_cost(200);
6607 6611 format %{ "MOV $dst,$src.lo\n\t"
6608 6612 "MOV $dst+4,$src.hi" %}
6609 6613 opcode(0x8B, 0x8B);
6610 6614 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6611 6615 ins_pipe( ialu_mem_long_reg );
6612 6616 %}
6613 6617
6614 6618 // Load Stack Slot
6615 6619 instruct loadSSP(eRegP dst, stackSlotP src) %{
6616 6620 match(Set dst src);
6617 6621 ins_cost(125);
6618 6622
6619 6623 format %{ "MOV $dst,$src" %}
6620 6624 opcode(0x8B);
6621 6625 ins_encode( OpcP, RegMem(dst,src));
6622 6626 ins_pipe( ialu_reg_mem );
6623 6627 %}
6624 6628
6625 6629 // Load Stack Slot
6626 6630 instruct loadSSF(regFPR dst, stackSlotF src) %{
6627 6631 match(Set dst src);
6628 6632 ins_cost(125);
6629 6633
6630 6634 format %{ "FLD_S $src\n\t"
6631 6635 "FSTP $dst" %}
6632 6636 opcode(0xD9); /* D9 /0, FLD m32real */
6633 6637 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6634 6638 Pop_Reg_FPR(dst) );
6635 6639 ins_pipe( fpu_reg_mem );
6636 6640 %}
6637 6641
6638 6642 // Load Stack Slot
6639 6643 instruct loadSSD(regDPR dst, stackSlotD src) %{
6640 6644 match(Set dst src);
6641 6645 ins_cost(125);
6642 6646
6643 6647 format %{ "FLD_D $src\n\t"
6644 6648 "FSTP $dst" %}
6645 6649 opcode(0xDD); /* DD /0, FLD m64real */
6646 6650 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6647 6651 Pop_Reg_DPR(dst) );
6648 6652 ins_pipe( fpu_reg_mem );
6649 6653 %}
6650 6654
6651 6655 // Prefetch instructions.
6652 6656 // Must be safe to execute with invalid address (cannot fault).
6653 6657
6654 6658 instruct prefetchr0( memory mem ) %{
6655 6659 predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
6656 6660 match(PrefetchRead mem);
6657 6661 ins_cost(0);
6658 6662 size(0);
6659 6663 format %{ "PREFETCHR (non-SSE is empty encoding)" %}
6660 6664 ins_encode();
6661 6665 ins_pipe(empty);
6662 6666 %}
6663 6667
6664 6668 instruct prefetchr( memory mem ) %{
6665 6669 predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3);
6666 6670 match(PrefetchRead mem);
6667 6671 ins_cost(100);
6668 6672
6669 6673 format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
6670 6674 ins_encode %{
6671 6675 __ prefetchr($mem$$Address);
6672 6676 %}
6673 6677 ins_pipe(ialu_mem);
6674 6678 %}
6675 6679
6676 6680 instruct prefetchrNTA( memory mem ) %{
6677 6681 predicate(UseSSE>=1 && ReadPrefetchInstr==0);
6678 6682 match(PrefetchRead mem);
6679 6683 ins_cost(100);
6680 6684
6681 6685 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
6682 6686 ins_encode %{
6683 6687 __ prefetchnta($mem$$Address);
6684 6688 %}
6685 6689 ins_pipe(ialu_mem);
6686 6690 %}
6687 6691
6688 6692 instruct prefetchrT0( memory mem ) %{
6689 6693 predicate(UseSSE>=1 && ReadPrefetchInstr==1);
6690 6694 match(PrefetchRead mem);
6691 6695 ins_cost(100);
6692 6696
6693 6697 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
6694 6698 ins_encode %{
6695 6699 __ prefetcht0($mem$$Address);
6696 6700 %}
6697 6701 ins_pipe(ialu_mem);
6698 6702 %}
6699 6703
6700 6704 instruct prefetchrT2( memory mem ) %{
6701 6705 predicate(UseSSE>=1 && ReadPrefetchInstr==2);
6702 6706 match(PrefetchRead mem);
6703 6707 ins_cost(100);
6704 6708
6705 6709 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
6706 6710 ins_encode %{
6707 6711 __ prefetcht2($mem$$Address);
6708 6712 %}
6709 6713 ins_pipe(ialu_mem);
6710 6714 %}
6711 6715
6712 6716 instruct prefetchw0( memory mem ) %{
6713 6717 predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
6714 6718 match(PrefetchWrite mem);
6715 6719 ins_cost(0);
6716 6720 size(0);
6717 6721 format %{ "Prefetch (non-SSE is empty encoding)" %}
6718 6722 ins_encode();
6719 6723 ins_pipe(empty);
6720 6724 %}
6721 6725
6722 6726 instruct prefetchw( memory mem ) %{
6723 6727 predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch());
6724 6728 match( PrefetchWrite mem );
6725 6729 ins_cost(100);
6726 6730
6727 6731 format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
6728 6732 ins_encode %{
6729 6733 __ prefetchw($mem$$Address);
6730 6734 %}
6731 6735 ins_pipe(ialu_mem);
6732 6736 %}
6733 6737
6734 6738 instruct prefetchwNTA( memory mem ) %{
6735 6739 predicate(UseSSE>=1);
6736 6740 match(PrefetchWrite mem);
6737 6741 ins_cost(100);
6738 6742
6739 6743 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
6740 6744 ins_encode %{
6741 6745 __ prefetchnta($mem$$Address);
6742 6746 %}
6743 6747 ins_pipe(ialu_mem);
6744 6748 %}
6745 6749
6746 6750 // Prefetch instructions for allocation.
6747 6751
6748 6752 instruct prefetchAlloc0( memory mem ) %{
6749 6753 predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6750 6754 match(PrefetchAllocation mem);
6751 6755 ins_cost(0);
6752 6756 size(0);
6753 6757 format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6754 6758 ins_encode();
6755 6759 ins_pipe(empty);
6756 6760 %}
6757 6761
6758 6762 instruct prefetchAlloc( memory mem ) %{
6759 6763 predicate(AllocatePrefetchInstr==3);
6760 6764 match( PrefetchAllocation mem );
6761 6765 ins_cost(100);
6762 6766
6763 6767 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6764 6768 ins_encode %{
6765 6769 __ prefetchw($mem$$Address);
6766 6770 %}
6767 6771 ins_pipe(ialu_mem);
6768 6772 %}
6769 6773
6770 6774 instruct prefetchAllocNTA( memory mem ) %{
6771 6775 predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6772 6776 match(PrefetchAllocation mem);
6773 6777 ins_cost(100);
6774 6778
6775 6779 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6776 6780 ins_encode %{
6777 6781 __ prefetchnta($mem$$Address);
6778 6782 %}
6779 6783 ins_pipe(ialu_mem);
6780 6784 %}
6781 6785
6782 6786 instruct prefetchAllocT0( memory mem ) %{
6783 6787 predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6784 6788 match(PrefetchAllocation mem);
6785 6789 ins_cost(100);
6786 6790
6787 6791 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6788 6792 ins_encode %{
6789 6793 __ prefetcht0($mem$$Address);
6790 6794 %}
6791 6795 ins_pipe(ialu_mem);
6792 6796 %}
6793 6797
6794 6798 instruct prefetchAllocT2( memory mem ) %{
6795 6799 predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6796 6800 match(PrefetchAllocation mem);
6797 6801 ins_cost(100);
6798 6802
6799 6803 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6800 6804 ins_encode %{
6801 6805 __ prefetcht2($mem$$Address);
6802 6806 %}
6803 6807 ins_pipe(ialu_mem);
6804 6808 %}
6805 6809
6806 6810 //----------Store Instructions-------------------------------------------------
6807 6811
6808 6812 // Store Byte
6809 6813 instruct storeB(memory mem, xRegI src) %{
6810 6814 match(Set mem (StoreB mem src));
6811 6815
6812 6816 ins_cost(125);
6813 6817 format %{ "MOV8 $mem,$src" %}
6814 6818 opcode(0x88);
6815 6819 ins_encode( OpcP, RegMem( src, mem ) );
6816 6820 ins_pipe( ialu_mem_reg );
6817 6821 %}
6818 6822
6819 6823 // Store Char/Short
6820 6824 instruct storeC(memory mem, eRegI src) %{
6821 6825 match(Set mem (StoreC mem src));
6822 6826
6823 6827 ins_cost(125);
6824 6828 format %{ "MOV16 $mem,$src" %}
6825 6829 opcode(0x89, 0x66);
6826 6830 ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6827 6831 ins_pipe( ialu_mem_reg );
6828 6832 %}
6829 6833
6830 6834 // Store Integer
6831 6835 instruct storeI(memory mem, eRegI src) %{
6832 6836 match(Set mem (StoreI mem src));
6833 6837
6834 6838 ins_cost(125);
6835 6839 format %{ "MOV $mem,$src" %}
6836 6840 opcode(0x89);
6837 6841 ins_encode( OpcP, RegMem( src, mem ) );
6838 6842 ins_pipe( ialu_mem_reg );
6839 6843 %}
6840 6844
6841 6845 // Store Long
6842 6846 instruct storeL(long_memory mem, eRegL src) %{
6843 6847 predicate(!((StoreLNode*)n)->require_atomic_access());
6844 6848 match(Set mem (StoreL mem src));
6845 6849
6846 6850 ins_cost(200);
6847 6851 format %{ "MOV $mem,$src.lo\n\t"
6848 6852 "MOV $mem+4,$src.hi" %}
6849 6853 opcode(0x89, 0x89);
6850 6854 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6851 6855 ins_pipe( ialu_mem_long_reg );
6852 6856 %}
6853 6857
6854 6858 // Store Long to Integer
6855 6859 instruct storeL2I(memory mem, eRegL src) %{
6856 6860 match(Set mem (StoreI mem (ConvL2I src)));
6857 6861
6858 6862 format %{ "MOV $mem,$src.lo\t# long -> int" %}
6859 6863 ins_encode %{
6860 6864 __ movl($mem$$Address, $src$$Register);
6861 6865 %}
6862 6866 ins_pipe(ialu_mem_reg);
6863 6867 %}
6864 6868
6865 6869 // Volatile Store Long. Must be atomic, so move it into
6866 6870 // the FP TOS and then do a 64-bit FIST. Has to probe the
6867 6871 // target address before the store (for null-ptr checks)
6868 6872 // so the memory operand is used twice in the encoding.
6869 6873 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6870 6874 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6871 6875 match(Set mem (StoreL mem src));
6872 6876 effect( KILL cr );
6873 6877 ins_cost(400);
6874 6878 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6875 6879 "FILD $src\n\t"
6876 6880 "FISTp $mem\t # 64-bit atomic volatile long store" %}
6877 6881 opcode(0x3B);
6878 6882 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6879 6883 ins_pipe( fpu_reg_mem );
6880 6884 %}
6881 6885
6882 6886 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6883 6887 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6884 6888 match(Set mem (StoreL mem src));
6885 6889 effect( TEMP tmp, KILL cr );
6886 6890 ins_cost(380);
6887 6891 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6888 6892 "MOVSD $tmp,$src\n\t"
6889 6893 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6890 6894 ins_encode %{
6891 6895 __ cmpl(rax, $mem$$Address);
6892 6896 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6893 6897 __ movdbl($mem$$Address, $tmp$$XMMRegister);
6894 6898 %}
6895 6899 ins_pipe( pipe_slow );
6896 6900 %}
6897 6901
6898 6902 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6899 6903 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6900 6904 match(Set mem (StoreL mem src));
6901 6905 effect( TEMP tmp2 , TEMP tmp, KILL cr );
6902 6906 ins_cost(360);
6903 6907 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6904 6908 "MOVD $tmp,$src.lo\n\t"
6905 6909 "MOVD $tmp2,$src.hi\n\t"
6906 6910 "PUNPCKLDQ $tmp,$tmp2\n\t"
6907 6911 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6908 6912 ins_encode %{
6909 6913 __ cmpl(rax, $mem$$Address);
6910 6914 __ movdl($tmp$$XMMRegister, $src$$Register);
6911 6915 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6912 6916 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6913 6917 __ movdbl($mem$$Address, $tmp$$XMMRegister);
6914 6918 %}
6915 6919 ins_pipe( pipe_slow );
6916 6920 %}
6917 6921
6918 6922 // Store Pointer; for storing unknown oops and raw pointers
6919 6923 instruct storeP(memory mem, anyRegP src) %{
6920 6924 match(Set mem (StoreP mem src));
6921 6925
6922 6926 ins_cost(125);
6923 6927 format %{ "MOV $mem,$src" %}
6924 6928 opcode(0x89);
6925 6929 ins_encode( OpcP, RegMem( src, mem ) );
6926 6930 ins_pipe( ialu_mem_reg );
6927 6931 %}
6928 6932
6929 6933 // Store Integer Immediate
6930 6934 instruct storeImmI(memory mem, immI src) %{
6931 6935 match(Set mem (StoreI mem src));
6932 6936
6933 6937 ins_cost(150);
6934 6938 format %{ "MOV $mem,$src" %}
6935 6939 opcode(0xC7); /* C7 /0 */
6936 6940 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
6937 6941 ins_pipe( ialu_mem_imm );
6938 6942 %}
6939 6943
6940 6944 // Store Short/Char Immediate
6941 6945 instruct storeImmI16(memory mem, immI16 src) %{
6942 6946 predicate(UseStoreImmI16);
6943 6947 match(Set mem (StoreC mem src));
6944 6948
6945 6949 ins_cost(150);
6946 6950 format %{ "MOV16 $mem,$src" %}
6947 6951 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6948 6952 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src ));
6949 6953 ins_pipe( ialu_mem_imm );
6950 6954 %}
6951 6955
6952 6956 // Store Pointer Immediate; null pointers or constant oops that do not
6953 6957 // need card-mark barriers.
6954 6958 instruct storeImmP(memory mem, immP src) %{
6955 6959 match(Set mem (StoreP mem src));
6956 6960
6957 6961 ins_cost(150);
6958 6962 format %{ "MOV $mem,$src" %}
6959 6963 opcode(0xC7); /* C7 /0 */
6960 6964 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
6961 6965 ins_pipe( ialu_mem_imm );
6962 6966 %}
6963 6967
6964 6968 // Store Byte Immediate
6965 6969 instruct storeImmB(memory mem, immI8 src) %{
6966 6970 match(Set mem (StoreB mem src));
6967 6971
6968 6972 ins_cost(150);
6969 6973 format %{ "MOV8 $mem,$src" %}
6970 6974 opcode(0xC6); /* C6 /0 */
6971 6975 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
6972 6976 ins_pipe( ialu_mem_imm );
6973 6977 %}
6974 6978
6975 6979 // Store Aligned Packed Byte XMM register to memory
6976 6980 instruct storeA8B(memory mem, regD src) %{
6977 6981 predicate(UseSSE>=1);
6978 6982 match(Set mem (Store8B mem src));
6979 6983 ins_cost(145);
6980 6984 format %{ "MOVQ $mem,$src\t! packed8B" %}
6981 6985 ins_encode %{
6982 6986 __ movq($mem$$Address, $src$$XMMRegister);
6983 6987 %}
6984 6988 ins_pipe( pipe_slow );
6985 6989 %}
6986 6990
6987 6991 // Store Aligned Packed Char/Short XMM register to memory
6988 6992 instruct storeA4C(memory mem, regD src) %{
6989 6993 predicate(UseSSE>=1);
6990 6994 match(Set mem (Store4C mem src));
6991 6995 ins_cost(145);
6992 6996 format %{ "MOVQ $mem,$src\t! packed4C" %}
6993 6997 ins_encode %{
6994 6998 __ movq($mem$$Address, $src$$XMMRegister);
6995 6999 %}
6996 7000 ins_pipe( pipe_slow );
6997 7001 %}
6998 7002
6999 7003 // Store Aligned Packed Integer XMM register to memory
7000 7004 instruct storeA2I(memory mem, regD src) %{
7001 7005 predicate(UseSSE>=1);
7002 7006 match(Set mem (Store2I mem src));
7003 7007 ins_cost(145);
7004 7008 format %{ "MOVQ $mem,$src\t! packed2I" %}
7005 7009 ins_encode %{
7006 7010 __ movq($mem$$Address, $src$$XMMRegister);
7007 7011 %}
7008 7012 ins_pipe( pipe_slow );
7009 7013 %}
7010 7014
7011 7015 // Store CMS card-mark Immediate
7012 7016 instruct storeImmCM(memory mem, immI8 src) %{
7013 7017 match(Set mem (StoreCM mem src));
7014 7018
7015 7019 ins_cost(150);
7016 7020 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
7017 7021 opcode(0xC6); /* C6 /0 */
7018 7022 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7019 7023 ins_pipe( ialu_mem_imm );
7020 7024 %}
7021 7025
7022 7026 // Store Double
7023 7027 instruct storeDPR( memory mem, regDPR1 src) %{
7024 7028 predicate(UseSSE<=1);
7025 7029 match(Set mem (StoreD mem src));
7026 7030
7027 7031 ins_cost(100);
7028 7032 format %{ "FST_D $mem,$src" %}
7029 7033 opcode(0xDD); /* DD /2 */
7030 7034 ins_encode( enc_FPR_store(mem,src) );
7031 7035 ins_pipe( fpu_mem_reg );
7032 7036 %}
7033 7037
7034 7038 // Store double does rounding on x86
7035 7039 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
7036 7040 predicate(UseSSE<=1);
7037 7041 match(Set mem (StoreD mem (RoundDouble src)));
7038 7042
7039 7043 ins_cost(100);
7040 7044 format %{ "FST_D $mem,$src\t# round" %}
7041 7045 opcode(0xDD); /* DD /2 */
7042 7046 ins_encode( enc_FPR_store(mem,src) );
7043 7047 ins_pipe( fpu_mem_reg );
7044 7048 %}
7045 7049
7046 7050 // Store XMM register to memory (double-precision floating points)
7047 7051 // MOVSD instruction
7048 7052 instruct storeD(memory mem, regD src) %{
7049 7053 predicate(UseSSE>=2);
7050 7054 match(Set mem (StoreD mem src));
7051 7055 ins_cost(95);
7052 7056 format %{ "MOVSD $mem,$src" %}
7053 7057 ins_encode %{
7054 7058 __ movdbl($mem$$Address, $src$$XMMRegister);
7055 7059 %}
7056 7060 ins_pipe( pipe_slow );
7057 7061 %}
7058 7062
7059 7063 // Store XMM register to memory (single-precision floating point)
7060 7064 // MOVSS instruction
7061 7065 instruct storeF(memory mem, regF src) %{
7062 7066 predicate(UseSSE>=1);
7063 7067 match(Set mem (StoreF mem src));
7064 7068 ins_cost(95);
7065 7069 format %{ "MOVSS $mem,$src" %}
7066 7070 ins_encode %{
7067 7071 __ movflt($mem$$Address, $src$$XMMRegister);
7068 7072 %}
7069 7073 ins_pipe( pipe_slow );
7070 7074 %}
7071 7075
7072 7076 // Store Aligned Packed Single Float XMM register to memory
7073 7077 instruct storeA2F(memory mem, regD src) %{
7074 7078 predicate(UseSSE>=1);
7075 7079 match(Set mem (Store2F mem src));
7076 7080 ins_cost(145);
7077 7081 format %{ "MOVQ $mem,$src\t! packed2F" %}
7078 7082 ins_encode %{
7079 7083 __ movq($mem$$Address, $src$$XMMRegister);
7080 7084 %}
7081 7085 ins_pipe( pipe_slow );
7082 7086 %}
7083 7087
7084 7088 // Store Float
7085 7089 instruct storeFPR( memory mem, regFPR1 src) %{
7086 7090 predicate(UseSSE==0);
7087 7091 match(Set mem (StoreF mem src));
7088 7092
7089 7093 ins_cost(100);
7090 7094 format %{ "FST_S $mem,$src" %}
7091 7095 opcode(0xD9); /* D9 /2 */
7092 7096 ins_encode( enc_FPR_store(mem,src) );
7093 7097 ins_pipe( fpu_mem_reg );
7094 7098 %}
7095 7099
7096 7100 // Store Float does rounding on x86
7097 7101 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
7098 7102 predicate(UseSSE==0);
7099 7103 match(Set mem (StoreF mem (RoundFloat src)));
7100 7104
7101 7105 ins_cost(100);
7102 7106 format %{ "FST_S $mem,$src\t# round" %}
7103 7107 opcode(0xD9); /* D9 /2 */
7104 7108 ins_encode( enc_FPR_store(mem,src) );
7105 7109 ins_pipe( fpu_mem_reg );
7106 7110 %}
7107 7111
7108 7112 // Store Float does rounding on x86
7109 7113 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
7110 7114 predicate(UseSSE<=1);
7111 7115 match(Set mem (StoreF mem (ConvD2F src)));
7112 7116
7113 7117 ins_cost(100);
7114 7118 format %{ "FST_S $mem,$src\t# D-round" %}
7115 7119 opcode(0xD9); /* D9 /2 */
7116 7120 ins_encode( enc_FPR_store(mem,src) );
7117 7121 ins_pipe( fpu_mem_reg );
7118 7122 %}
7119 7123
7120 7124 // Store immediate Float value (it is faster than store from FPU register)
7121 7125 // The instruction usage is guarded by predicate in operand immFPR().
7122 7126 instruct storeFPR_imm( memory mem, immFPR src) %{
7123 7127 match(Set mem (StoreF mem src));
7124 7128
7125 7129 ins_cost(50);
7126 7130 format %{ "MOV $mem,$src\t# store float" %}
7127 7131 opcode(0xC7); /* C7 /0 */
7128 7132 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src ));
7129 7133 ins_pipe( ialu_mem_imm );
7130 7134 %}
7131 7135
7132 7136 // Store immediate Float value (it is faster than store from XMM register)
7133 7137 // The instruction usage is guarded by predicate in operand immF().
7134 7138 instruct storeF_imm( memory mem, immF src) %{
7135 7139 match(Set mem (StoreF mem src));
7136 7140
7137 7141 ins_cost(50);
7138 7142 format %{ "MOV $mem,$src\t# store float" %}
7139 7143 opcode(0xC7); /* C7 /0 */
7140 7144 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
7141 7145 ins_pipe( ialu_mem_imm );
7142 7146 %}
7143 7147
7144 7148 // Store Integer to stack slot
7145 7149 instruct storeSSI(stackSlotI dst, eRegI src) %{
7146 7150 match(Set dst src);
7147 7151
7148 7152 ins_cost(100);
7149 7153 format %{ "MOV $dst,$src" %}
7150 7154 opcode(0x89);
7151 7155 ins_encode( OpcPRegSS( dst, src ) );
7152 7156 ins_pipe( ialu_mem_reg );
7153 7157 %}
7154 7158
7155 7159 // Store Integer to stack slot
7156 7160 instruct storeSSP(stackSlotP dst, eRegP src) %{
7157 7161 match(Set dst src);
7158 7162
7159 7163 ins_cost(100);
7160 7164 format %{ "MOV $dst,$src" %}
7161 7165 opcode(0x89);
7162 7166 ins_encode( OpcPRegSS( dst, src ) );
7163 7167 ins_pipe( ialu_mem_reg );
7164 7168 %}
7165 7169
7166 7170 // Store Long to stack slot
7167 7171 instruct storeSSL(stackSlotL dst, eRegL src) %{
7168 7172 match(Set dst src);
7169 7173
7170 7174 ins_cost(200);
7171 7175 format %{ "MOV $dst,$src.lo\n\t"
7172 7176 "MOV $dst+4,$src.hi" %}
7173 7177 opcode(0x89, 0x89);
7174 7178 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7175 7179 ins_pipe( ialu_mem_long_reg );
7176 7180 %}
7177 7181
7178 7182 //----------MemBar Instructions-----------------------------------------------
7179 7183 // Memory barrier flavors
7180 7184
7181 7185 instruct membar_acquire() %{
7182 7186 match(MemBarAcquire);
7183 7187 ins_cost(400);
7184 7188
7185 7189 size(0);
7186 7190 format %{ "MEMBAR-acquire ! (empty encoding)" %}
7187 7191 ins_encode();
7188 7192 ins_pipe(empty);
7189 7193 %}
7190 7194
7191 7195 instruct membar_acquire_lock() %{
7192 7196 match(MemBarAcquireLock);
7193 7197 ins_cost(0);
7194 7198
7195 7199 size(0);
7196 7200 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7197 7201 ins_encode( );
7198 7202 ins_pipe(empty);
7199 7203 %}
7200 7204
7201 7205 instruct membar_release() %{
7202 7206 match(MemBarRelease);
7203 7207 ins_cost(400);
7204 7208
7205 7209 size(0);
7206 7210 format %{ "MEMBAR-release ! (empty encoding)" %}
7207 7211 ins_encode( );
7208 7212 ins_pipe(empty);
7209 7213 %}
7210 7214
7211 7215 instruct membar_release_lock() %{
7212 7216 match(MemBarReleaseLock);
7213 7217 ins_cost(0);
7214 7218
7215 7219 size(0);
7216 7220 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7217 7221 ins_encode( );
7218 7222 ins_pipe(empty);
7219 7223 %}
7220 7224
7221 7225 instruct membar_volatile(eFlagsReg cr) %{
7222 7226 match(MemBarVolatile);
7223 7227 effect(KILL cr);
7224 7228 ins_cost(400);
7225 7229
7226 7230 format %{
7227 7231 $$template
7228 7232 if (os::is_MP()) {
7229 7233 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7230 7234 } else {
7231 7235 $$emit$$"MEMBAR-volatile ! (empty encoding)"
7232 7236 }
7233 7237 %}
7234 7238 ins_encode %{
7235 7239 __ membar(Assembler::StoreLoad);
7236 7240 %}
7237 7241 ins_pipe(pipe_slow);
7238 7242 %}
7239 7243
7240 7244 instruct unnecessary_membar_volatile() %{
7241 7245 match(MemBarVolatile);
7242 7246 predicate(Matcher::post_store_load_barrier(n));
7243 7247 ins_cost(0);
7244 7248
7245 7249 size(0);
7246 7250 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7247 7251 ins_encode( );
7248 7252 ins_pipe(empty);
7249 7253 %}
7250 7254
7251 7255 instruct membar_storestore() %{
7252 7256 match(MemBarStoreStore);
7253 7257 ins_cost(0);
7254 7258
7255 7259 size(0);
7256 7260 format %{ "MEMBAR-storestore (empty encoding)" %}
7257 7261 ins_encode( );
7258 7262 ins_pipe(empty);
7259 7263 %}
7260 7264
7261 7265 //----------Move Instructions--------------------------------------------------
7262 7266 instruct castX2P(eAXRegP dst, eAXRegI src) %{
7263 7267 match(Set dst (CastX2P src));
7264 7268 format %{ "# X2P $dst, $src" %}
7265 7269 ins_encode( /*empty encoding*/ );
7266 7270 ins_cost(0);
7267 7271 ins_pipe(empty);
7268 7272 %}
7269 7273
7270 7274 instruct castP2X(eRegI dst, eRegP src ) %{
7271 7275 match(Set dst (CastP2X src));
7272 7276 ins_cost(50);
7273 7277 format %{ "MOV $dst, $src\t# CastP2X" %}
7274 7278 ins_encode( enc_Copy( dst, src) );
7275 7279 ins_pipe( ialu_reg_reg );
7276 7280 %}
7277 7281
7278 7282 //----------Conditional Move---------------------------------------------------
7279 7283 // Conditional move
7280 7284 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, eRegI dst, eRegI src) %{
7281 7285 predicate(!VM_Version::supports_cmov() );
7282 7286 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7283 7287 ins_cost(200);
7284 7288 format %{ "J$cop,us skip\t# signed cmove\n\t"
7285 7289 "MOV $dst,$src\n"
7286 7290 "skip:" %}
7287 7291 ins_encode %{
7288 7292 Label Lskip;
7289 7293 // Invert sense of branch from sense of CMOV
7290 7294 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7291 7295 __ movl($dst$$Register, $src$$Register);
7292 7296 __ bind(Lskip);
7293 7297 %}
7294 7298 ins_pipe( pipe_cmov_reg );
7295 7299 %}
7296 7300
7297 7301 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src) %{
7298 7302 predicate(!VM_Version::supports_cmov() );
7299 7303 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7300 7304 ins_cost(200);
7301 7305 format %{ "J$cop,us skip\t# unsigned cmove\n\t"
7302 7306 "MOV $dst,$src\n"
7303 7307 "skip:" %}
7304 7308 ins_encode %{
7305 7309 Label Lskip;
7306 7310 // Invert sense of branch from sense of CMOV
7307 7311 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7308 7312 __ movl($dst$$Register, $src$$Register);
7309 7313 __ bind(Lskip);
7310 7314 %}
7311 7315 ins_pipe( pipe_cmov_reg );
7312 7316 %}
7313 7317
7314 7318 instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7315 7319 predicate(VM_Version::supports_cmov() );
7316 7320 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7317 7321 ins_cost(200);
7318 7322 format %{ "CMOV$cop $dst,$src" %}
7319 7323 opcode(0x0F,0x40);
7320 7324 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7321 7325 ins_pipe( pipe_cmov_reg );
7322 7326 %}
7323 7327
7324 7328 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7325 7329 predicate(VM_Version::supports_cmov() );
7326 7330 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7327 7331 ins_cost(200);
7328 7332 format %{ "CMOV$cop $dst,$src" %}
7329 7333 opcode(0x0F,0x40);
7330 7334 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7331 7335 ins_pipe( pipe_cmov_reg );
7332 7336 %}
7333 7337
7334 7338 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7335 7339 predicate(VM_Version::supports_cmov() );
7336 7340 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7337 7341 ins_cost(200);
7338 7342 expand %{
7339 7343 cmovI_regU(cop, cr, dst, src);
7340 7344 %}
7341 7345 %}
7342 7346
7343 7347 // Conditional move
7344 7348 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7345 7349 predicate(VM_Version::supports_cmov() );
7346 7350 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7347 7351 ins_cost(250);
7348 7352 format %{ "CMOV$cop $dst,$src" %}
7349 7353 opcode(0x0F,0x40);
7350 7354 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7351 7355 ins_pipe( pipe_cmov_mem );
7352 7356 %}
7353 7357
7354 7358 // Conditional move
7355 7359 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7356 7360 predicate(VM_Version::supports_cmov() );
7357 7361 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7358 7362 ins_cost(250);
7359 7363 format %{ "CMOV$cop $dst,$src" %}
7360 7364 opcode(0x0F,0x40);
7361 7365 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7362 7366 ins_pipe( pipe_cmov_mem );
7363 7367 %}
7364 7368
7365 7369 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7366 7370 predicate(VM_Version::supports_cmov() );
7367 7371 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7368 7372 ins_cost(250);
7369 7373 expand %{
7370 7374 cmovI_memU(cop, cr, dst, src);
7371 7375 %}
7372 7376 %}
7373 7377
7374 7378 // Conditional move
7375 7379 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7376 7380 predicate(VM_Version::supports_cmov() );
7377 7381 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7378 7382 ins_cost(200);
7379 7383 format %{ "CMOV$cop $dst,$src\t# ptr" %}
7380 7384 opcode(0x0F,0x40);
7381 7385 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7382 7386 ins_pipe( pipe_cmov_reg );
7383 7387 %}
7384 7388
7385 7389 // Conditional move (non-P6 version)
7386 7390 // Note: a CMoveP is generated for stubs and native wrappers
7387 7391 // regardless of whether we are on a P6, so we
7388 7392 // emulate a cmov here
7389 7393 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7390 7394 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7391 7395 ins_cost(300);
7392 7396 format %{ "Jn$cop skip\n\t"
7393 7397 "MOV $dst,$src\t# pointer\n"
7394 7398 "skip:" %}
7395 7399 opcode(0x8b);
7396 7400 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7397 7401 ins_pipe( pipe_cmov_reg );
7398 7402 %}
7399 7403
7400 7404 // Conditional move
7401 7405 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7402 7406 predicate(VM_Version::supports_cmov() );
7403 7407 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7404 7408 ins_cost(200);
7405 7409 format %{ "CMOV$cop $dst,$src\t# ptr" %}
7406 7410 opcode(0x0F,0x40);
7407 7411 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7408 7412 ins_pipe( pipe_cmov_reg );
7409 7413 %}
7410 7414
7411 7415 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7412 7416 predicate(VM_Version::supports_cmov() );
7413 7417 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7414 7418 ins_cost(200);
7415 7419 expand %{
7416 7420 cmovP_regU(cop, cr, dst, src);
7417 7421 %}
7418 7422 %}
7419 7423
7420 7424 // DISABLED: Requires the ADLC to emit a bottom_type call that
7421 7425 // correctly meets the two pointer arguments; one is an incoming
7422 7426 // register but the other is a memory operand. ALSO appears to
7423 7427 // be buggy with implicit null checks.
7424 7428 //
7425 7429 //// Conditional move
7426 7430 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
7427 7431 // predicate(VM_Version::supports_cmov() );
7428 7432 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7429 7433 // ins_cost(250);
7430 7434 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
7431 7435 // opcode(0x0F,0x40);
7432 7436 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7433 7437 // ins_pipe( pipe_cmov_mem );
7434 7438 //%}
7435 7439 //
7436 7440 //// Conditional move
7437 7441 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
7438 7442 // predicate(VM_Version::supports_cmov() );
7439 7443 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7440 7444 // ins_cost(250);
7441 7445 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
7442 7446 // opcode(0x0F,0x40);
7443 7447 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7444 7448 // ins_pipe( pipe_cmov_mem );
7445 7449 //%}
7446 7450
7447 7451 // Conditional move
7448 7452 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
7449 7453 predicate(UseSSE<=1);
7450 7454 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7451 7455 ins_cost(200);
7452 7456 format %{ "FCMOV$cop $dst,$src\t# double" %}
7453 7457 opcode(0xDA);
7454 7458 ins_encode( enc_cmov_dpr(cop,src) );
7455 7459 ins_pipe( pipe_cmovDPR_reg );
7456 7460 %}
7457 7461
7458 7462 // Conditional move
7459 7463 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
7460 7464 predicate(UseSSE==0);
7461 7465 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7462 7466 ins_cost(200);
7463 7467 format %{ "FCMOV$cop $dst,$src\t# float" %}
7464 7468 opcode(0xDA);
7465 7469 ins_encode( enc_cmov_dpr(cop,src) );
7466 7470 ins_pipe( pipe_cmovDPR_reg );
7467 7471 %}
7468 7472
7469 7473 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7470 7474 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7471 7475 predicate(UseSSE<=1);
7472 7476 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7473 7477 ins_cost(200);
7474 7478 format %{ "Jn$cop skip\n\t"
7475 7479 "MOV $dst,$src\t# double\n"
7476 7480 "skip:" %}
7477 7481 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
7478 7482 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7479 7483 ins_pipe( pipe_cmovDPR_reg );
7480 7484 %}
7481 7485
7482 7486 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7483 7487 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7484 7488 predicate(UseSSE==0);
7485 7489 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7486 7490 ins_cost(200);
7487 7491 format %{ "Jn$cop skip\n\t"
7488 7492 "MOV $dst,$src\t# float\n"
7489 7493 "skip:" %}
7490 7494 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
7491 7495 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7492 7496 ins_pipe( pipe_cmovDPR_reg );
7493 7497 %}
7494 7498
7495 7499 // No CMOVE with SSE/SSE2
7496 7500 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7497 7501 predicate (UseSSE>=1);
7498 7502 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7499 7503 ins_cost(200);
7500 7504 format %{ "Jn$cop skip\n\t"
7501 7505 "MOVSS $dst,$src\t# float\n"
7502 7506 "skip:" %}
7503 7507 ins_encode %{
7504 7508 Label skip;
7505 7509 // Invert sense of branch from sense of CMOV
7506 7510 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7507 7511 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7508 7512 __ bind(skip);
7509 7513 %}
7510 7514 ins_pipe( pipe_slow );
7511 7515 %}
7512 7516
7513 7517 // No CMOVE with SSE/SSE2
7514 7518 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7515 7519 predicate (UseSSE>=2);
7516 7520 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7517 7521 ins_cost(200);
7518 7522 format %{ "Jn$cop skip\n\t"
7519 7523 "MOVSD $dst,$src\t# float\n"
7520 7524 "skip:" %}
7521 7525 ins_encode %{
7522 7526 Label skip;
7523 7527 // Invert sense of branch from sense of CMOV
7524 7528 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7525 7529 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7526 7530 __ bind(skip);
7527 7531 %}
7528 7532 ins_pipe( pipe_slow );
7529 7533 %}
7530 7534
7531 7535 // unsigned version
7532 7536 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7533 7537 predicate (UseSSE>=1);
7534 7538 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7535 7539 ins_cost(200);
7536 7540 format %{ "Jn$cop skip\n\t"
7537 7541 "MOVSS $dst,$src\t# float\n"
7538 7542 "skip:" %}
7539 7543 ins_encode %{
7540 7544 Label skip;
7541 7545 // Invert sense of branch from sense of CMOV
7542 7546 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7543 7547 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7544 7548 __ bind(skip);
7545 7549 %}
7546 7550 ins_pipe( pipe_slow );
7547 7551 %}
7548 7552
7549 7553 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7550 7554 predicate (UseSSE>=1);
7551 7555 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7552 7556 ins_cost(200);
7553 7557 expand %{
7554 7558 fcmovF_regU(cop, cr, dst, src);
7555 7559 %}
7556 7560 %}
7557 7561
7558 7562 // unsigned version
7559 7563 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7560 7564 predicate (UseSSE>=2);
7561 7565 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7562 7566 ins_cost(200);
7563 7567 format %{ "Jn$cop skip\n\t"
7564 7568 "MOVSD $dst,$src\t# float\n"
7565 7569 "skip:" %}
7566 7570 ins_encode %{
7567 7571 Label skip;
7568 7572 // Invert sense of branch from sense of CMOV
7569 7573 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7570 7574 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7571 7575 __ bind(skip);
7572 7576 %}
7573 7577 ins_pipe( pipe_slow );
7574 7578 %}
7575 7579
7576 7580 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7577 7581 predicate (UseSSE>=2);
7578 7582 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7579 7583 ins_cost(200);
7580 7584 expand %{
7581 7585 fcmovD_regU(cop, cr, dst, src);
7582 7586 %}
7583 7587 %}
7584 7588
7585 7589 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7586 7590 predicate(VM_Version::supports_cmov() );
7587 7591 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7588 7592 ins_cost(200);
7589 7593 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7590 7594 "CMOV$cop $dst.hi,$src.hi" %}
7591 7595 opcode(0x0F,0x40);
7592 7596 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7593 7597 ins_pipe( pipe_cmov_reg_long );
7594 7598 %}
7595 7599
7596 7600 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7597 7601 predicate(VM_Version::supports_cmov() );
7598 7602 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7599 7603 ins_cost(200);
7600 7604 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7601 7605 "CMOV$cop $dst.hi,$src.hi" %}
7602 7606 opcode(0x0F,0x40);
7603 7607 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7604 7608 ins_pipe( pipe_cmov_reg_long );
7605 7609 %}
7606 7610
7607 7611 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7608 7612 predicate(VM_Version::supports_cmov() );
7609 7613 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7610 7614 ins_cost(200);
7611 7615 expand %{
7612 7616 cmovL_regU(cop, cr, dst, src);
7613 7617 %}
7614 7618 %}
7615 7619
7616 7620 //----------Arithmetic Instructions--------------------------------------------
7617 7621 //----------Addition Instructions----------------------------------------------
7618 7622 // Integer Addition Instructions
7619 7623 instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
7620 7624 match(Set dst (AddI dst src));
7621 7625 effect(KILL cr);
7622 7626
7623 7627 size(2);
7624 7628 format %{ "ADD $dst,$src" %}
7625 7629 opcode(0x03);
7626 7630 ins_encode( OpcP, RegReg( dst, src) );
7627 7631 ins_pipe( ialu_reg_reg );
7628 7632 %}
7629 7633
7630 7634 instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
7631 7635 match(Set dst (AddI dst src));
7632 7636 effect(KILL cr);
7633 7637
7634 7638 format %{ "ADD $dst,$src" %}
7635 7639 opcode(0x81, 0x00); /* /0 id */
7636 7640 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7637 7641 ins_pipe( ialu_reg );
7638 7642 %}
7639 7643
7640 7644 instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
7641 7645 predicate(UseIncDec);
7642 7646 match(Set dst (AddI dst src));
7643 7647 effect(KILL cr);
7644 7648
7645 7649 size(1);
7646 7650 format %{ "INC $dst" %}
7647 7651 opcode(0x40); /* */
7648 7652 ins_encode( Opc_plus( primary, dst ) );
7649 7653 ins_pipe( ialu_reg );
7650 7654 %}
7651 7655
7652 7656 instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
7653 7657 match(Set dst (AddI src0 src1));
7654 7658 ins_cost(110);
7655 7659
7656 7660 format %{ "LEA $dst,[$src0 + $src1]" %}
7657 7661 opcode(0x8D); /* 0x8D /r */
7658 7662 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7659 7663 ins_pipe( ialu_reg_reg );
7660 7664 %}
7661 7665
7662 7666 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7663 7667 match(Set dst (AddP src0 src1));
7664 7668 ins_cost(110);
7665 7669
7666 7670 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %}
7667 7671 opcode(0x8D); /* 0x8D /r */
7668 7672 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7669 7673 ins_pipe( ialu_reg_reg );
7670 7674 %}
7671 7675
7672 7676 instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
7673 7677 predicate(UseIncDec);
7674 7678 match(Set dst (AddI dst src));
7675 7679 effect(KILL cr);
7676 7680
7677 7681 size(1);
7678 7682 format %{ "DEC $dst" %}
7679 7683 opcode(0x48); /* */
7680 7684 ins_encode( Opc_plus( primary, dst ) );
7681 7685 ins_pipe( ialu_reg );
7682 7686 %}
7683 7687
7684 7688 instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
7685 7689 match(Set dst (AddP dst src));
7686 7690 effect(KILL cr);
7687 7691
7688 7692 size(2);
7689 7693 format %{ "ADD $dst,$src" %}
7690 7694 opcode(0x03);
7691 7695 ins_encode( OpcP, RegReg( dst, src) );
7692 7696 ins_pipe( ialu_reg_reg );
7693 7697 %}
7694 7698
7695 7699 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7696 7700 match(Set dst (AddP dst src));
7697 7701 effect(KILL cr);
7698 7702
7699 7703 format %{ "ADD $dst,$src" %}
7700 7704 opcode(0x81,0x00); /* Opcode 81 /0 id */
7701 7705 // ins_encode( RegImm( dst, src) );
7702 7706 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7703 7707 ins_pipe( ialu_reg );
7704 7708 %}
7705 7709
7706 7710 instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
7707 7711 match(Set dst (AddI dst (LoadI src)));
7708 7712 effect(KILL cr);
7709 7713
7710 7714 ins_cost(125);
7711 7715 format %{ "ADD $dst,$src" %}
7712 7716 opcode(0x03);
7713 7717 ins_encode( OpcP, RegMem( dst, src) );
7714 7718 ins_pipe( ialu_reg_mem );
7715 7719 %}
7716 7720
7717 7721 instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
7718 7722 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7719 7723 effect(KILL cr);
7720 7724
7721 7725 ins_cost(150);
7722 7726 format %{ "ADD $dst,$src" %}
7723 7727 opcode(0x01); /* Opcode 01 /r */
7724 7728 ins_encode( OpcP, RegMem( src, dst ) );
7725 7729 ins_pipe( ialu_mem_reg );
7726 7730 %}
7727 7731
7728 7732 // Add Memory with Immediate
7729 7733 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7730 7734 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7731 7735 effect(KILL cr);
7732 7736
7733 7737 ins_cost(125);
7734 7738 format %{ "ADD $dst,$src" %}
7735 7739 opcode(0x81); /* Opcode 81 /0 id */
7736 7740 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7737 7741 ins_pipe( ialu_mem_imm );
7738 7742 %}
7739 7743
7740 7744 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7741 7745 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7742 7746 effect(KILL cr);
7743 7747
7744 7748 ins_cost(125);
7745 7749 format %{ "INC $dst" %}
7746 7750 opcode(0xFF); /* Opcode FF /0 */
7747 7751 ins_encode( OpcP, RMopc_Mem(0x00,dst));
7748 7752 ins_pipe( ialu_mem_imm );
7749 7753 %}
7750 7754
7751 7755 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7752 7756 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7753 7757 effect(KILL cr);
7754 7758
7755 7759 ins_cost(125);
7756 7760 format %{ "DEC $dst" %}
7757 7761 opcode(0xFF); /* Opcode FF /1 */
7758 7762 ins_encode( OpcP, RMopc_Mem(0x01,dst));
7759 7763 ins_pipe( ialu_mem_imm );
7760 7764 %}
7761 7765
7762 7766
7763 7767 instruct checkCastPP( eRegP dst ) %{
7764 7768 match(Set dst (CheckCastPP dst));
7765 7769
7766 7770 size(0);
7767 7771 format %{ "#checkcastPP of $dst" %}
7768 7772 ins_encode( /*empty encoding*/ );
7769 7773 ins_pipe( empty );
7770 7774 %}
7771 7775
7772 7776 instruct castPP( eRegP dst ) %{
7773 7777 match(Set dst (CastPP dst));
7774 7778 format %{ "#castPP of $dst" %}
7775 7779 ins_encode( /*empty encoding*/ );
7776 7780 ins_pipe( empty );
7777 7781 %}
7778 7782
7779 7783 instruct castII( eRegI dst ) %{
7780 7784 match(Set dst (CastII dst));
7781 7785 format %{ "#castII of $dst" %}
7782 7786 ins_encode( /*empty encoding*/ );
7783 7787 ins_cost(0);
7784 7788 ins_pipe( empty );
7785 7789 %}
7786 7790
7787 7791
7788 7792 // Load-locked - same as a regular pointer load when used with compare-swap
7789 7793 instruct loadPLocked(eRegP dst, memory mem) %{
7790 7794 match(Set dst (LoadPLocked mem));
7791 7795
7792 7796 ins_cost(125);
7793 7797 format %{ "MOV $dst,$mem\t# Load ptr. locked" %}
7794 7798 opcode(0x8B);
7795 7799 ins_encode( OpcP, RegMem(dst,mem));
7796 7800 ins_pipe( ialu_reg_mem );
7797 7801 %}
7798 7802
7799 7803 // LoadLong-locked - same as a volatile long load when used with compare-swap
7800 7804 instruct loadLLocked(stackSlotL dst, memory mem) %{
7801 7805 predicate(UseSSE<=1);
7802 7806 match(Set dst (LoadLLocked mem));
7803 7807
7804 7808 ins_cost(200);
7805 7809 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
7806 7810 "FISTp $dst" %}
7807 7811 ins_encode(enc_loadL_volatile(mem,dst));
7808 7812 ins_pipe( fpu_reg_mem );
7809 7813 %}
7810 7814
7811 7815 instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
7812 7816 predicate(UseSSE>=2);
7813 7817 match(Set dst (LoadLLocked mem));
7814 7818 effect(TEMP tmp);
7815 7819 ins_cost(180);
7816 7820 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
7817 7821 "MOVSD $dst,$tmp" %}
7818 7822 ins_encode %{
7819 7823 __ movdbl($tmp$$XMMRegister, $mem$$Address);
7820 7824 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
7821 7825 %}
7822 7826 ins_pipe( pipe_slow );
7823 7827 %}
7824 7828
7825 7829 instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
7826 7830 predicate(UseSSE>=2);
7827 7831 match(Set dst (LoadLLocked mem));
7828 7832 effect(TEMP tmp);
7829 7833 ins_cost(160);
7830 7834 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
7831 7835 "MOVD $dst.lo,$tmp\n\t"
7832 7836 "PSRLQ $tmp,32\n\t"
7833 7837 "MOVD $dst.hi,$tmp" %}
7834 7838 ins_encode %{
7835 7839 __ movdbl($tmp$$XMMRegister, $mem$$Address);
7836 7840 __ movdl($dst$$Register, $tmp$$XMMRegister);
7837 7841 __ psrlq($tmp$$XMMRegister, 32);
7838 7842 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
7839 7843 %}
7840 7844 ins_pipe( pipe_slow );
7841 7845 %}
7842 7846
7843 7847 // Conditional-store of the updated heap-top.
7844 7848 // Used during allocation of the shared heap.
7845 7849 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.
7846 7850 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7847 7851 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7848 7852 // EAX is killed if there is contention, but then it's also unused.
7849 7853 // In the common case of no contention, EAX holds the new oop address.
7850 7854 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7851 7855 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7852 7856 ins_pipe( pipe_cmpxchg );
7853 7857 %}
7854 7858
7855 7859 // Conditional-store of an int value.
7856 7860 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.
7857 7861 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
7858 7862 match(Set cr (StoreIConditional mem (Binary oldval newval)));
7859 7863 effect(KILL oldval);
7860 7864 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7861 7865 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7862 7866 ins_pipe( pipe_cmpxchg );
7863 7867 %}
7864 7868
7865 7869 // Conditional-store of a long value.
7866 7870 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel.
7867 7871 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7868 7872 match(Set cr (StoreLConditional mem (Binary oldval newval)));
7869 7873 effect(KILL oldval);
7870 7874 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7871 7875 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7872 7876 "XCHG EBX,ECX"
7873 7877 %}
7874 7878 ins_encode %{
7875 7879 // Note: we need to swap rbx, and rcx before and after the
7876 7880 // cmpxchg8 instruction because the instruction uses
7877 7881 // rcx as the high order word of the new value to store but
7878 7882 // our register encoding uses rbx.
7879 7883 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7880 7884 if( os::is_MP() )
7881 7885 __ lock();
7882 7886 __ cmpxchg8($mem$$Address);
7883 7887 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7884 7888 %}
7885 7889 ins_pipe( pipe_cmpxchg );
7886 7890 %}
7887 7891
7888 7892 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7889 7893
7890 7894 instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7891 7895 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7892 7896 effect(KILL cr, KILL oldval);
7893 7897 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7894 7898 "MOV $res,0\n\t"
7895 7899 "JNE,s fail\n\t"
7896 7900 "MOV $res,1\n"
7897 7901 "fail:" %}
7898 7902 ins_encode( enc_cmpxchg8(mem_ptr),
7899 7903 enc_flags_ne_to_boolean(res) );
7900 7904 ins_pipe( pipe_cmpxchg );
7901 7905 %}
7902 7906
7903 7907 instruct compareAndSwapP( eRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7904 7908 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7905 7909 effect(KILL cr, KILL oldval);
7906 7910 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7907 7911 "MOV $res,0\n\t"
7908 7912 "JNE,s fail\n\t"
7909 7913 "MOV $res,1\n"
7910 7914 "fail:" %}
7911 7915 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7912 7916 ins_pipe( pipe_cmpxchg );
7913 7917 %}
7914 7918
7915 7919 instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7916 7920 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7917 7921 effect(KILL cr, KILL oldval);
7918 7922 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7919 7923 "MOV $res,0\n\t"
7920 7924 "JNE,s fail\n\t"
7921 7925 "MOV $res,1\n"
7922 7926 "fail:" %}
7923 7927 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7924 7928 ins_pipe( pipe_cmpxchg );
7925 7929 %}
7926 7930
7927 7931 //----------Subtraction Instructions-------------------------------------------
7928 7932 // Integer Subtraction Instructions
7929 7933 instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
7930 7934 match(Set dst (SubI dst src));
7931 7935 effect(KILL cr);
7932 7936
7933 7937 size(2);
7934 7938 format %{ "SUB $dst,$src" %}
7935 7939 opcode(0x2B);
7936 7940 ins_encode( OpcP, RegReg( dst, src) );
7937 7941 ins_pipe( ialu_reg_reg );
7938 7942 %}
7939 7943
7940 7944 instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
7941 7945 match(Set dst (SubI dst src));
7942 7946 effect(KILL cr);
7943 7947
7944 7948 format %{ "SUB $dst,$src" %}
7945 7949 opcode(0x81,0x05); /* Opcode 81 /5 */
7946 7950 // ins_encode( RegImm( dst, src) );
7947 7951 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7948 7952 ins_pipe( ialu_reg );
7949 7953 %}
7950 7954
7951 7955 instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
7952 7956 match(Set dst (SubI dst (LoadI src)));
7953 7957 effect(KILL cr);
7954 7958
7955 7959 ins_cost(125);
7956 7960 format %{ "SUB $dst,$src" %}
7957 7961 opcode(0x2B);
7958 7962 ins_encode( OpcP, RegMem( dst, src) );
7959 7963 ins_pipe( ialu_reg_mem );
7960 7964 %}
7961 7965
7962 7966 instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
7963 7967 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7964 7968 effect(KILL cr);
7965 7969
7966 7970 ins_cost(150);
7967 7971 format %{ "SUB $dst,$src" %}
7968 7972 opcode(0x29); /* Opcode 29 /r */
7969 7973 ins_encode( OpcP, RegMem( src, dst ) );
7970 7974 ins_pipe( ialu_mem_reg );
7971 7975 %}
7972 7976
7973 7977 // Subtract from a pointer
7974 7978 instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
7975 7979 match(Set dst (AddP dst (SubI zero src)));
7976 7980 effect(KILL cr);
7977 7981
7978 7982 size(2);
7979 7983 format %{ "SUB $dst,$src" %}
7980 7984 opcode(0x2B);
7981 7985 ins_encode( OpcP, RegReg( dst, src) );
7982 7986 ins_pipe( ialu_reg_reg );
7983 7987 %}
7984 7988
7985 7989 instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
7986 7990 match(Set dst (SubI zero dst));
7987 7991 effect(KILL cr);
7988 7992
7989 7993 size(2);
7990 7994 format %{ "NEG $dst" %}
7991 7995 opcode(0xF7,0x03); // Opcode F7 /3
7992 7996 ins_encode( OpcP, RegOpc( dst ) );
7993 7997 ins_pipe( ialu_reg );
7994 7998 %}
7995 7999
7996 8000
7997 8001 //----------Multiplication/Division Instructions-------------------------------
7998 8002 // Integer Multiplication Instructions
7999 8003 // Multiply Register
8000 8004 instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8001 8005 match(Set dst (MulI dst src));
8002 8006 effect(KILL cr);
8003 8007
8004 8008 size(3);
8005 8009 ins_cost(300);
8006 8010 format %{ "IMUL $dst,$src" %}
8007 8011 opcode(0xAF, 0x0F);
8008 8012 ins_encode( OpcS, OpcP, RegReg( dst, src) );
8009 8013 ins_pipe( ialu_reg_reg_alu0 );
8010 8014 %}
8011 8015
8012 8016 // Multiply 32-bit Immediate
8013 8017 instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8014 8018 match(Set dst (MulI src imm));
8015 8019 effect(KILL cr);
8016 8020
8017 8021 ins_cost(300);
8018 8022 format %{ "IMUL $dst,$src,$imm" %}
8019 8023 opcode(0x69); /* 69 /r id */
8020 8024 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8021 8025 ins_pipe( ialu_reg_reg_alu0 );
8022 8026 %}
8023 8027
8024 8028 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8025 8029 match(Set dst src);
8026 8030 effect(KILL cr);
8027 8031
8028 8032 // Note that this is artificially increased to make it more expensive than loadConL
8029 8033 ins_cost(250);
8030 8034 format %{ "MOV EAX,$src\t// low word only" %}
8031 8035 opcode(0xB8);
8032 8036 ins_encode( LdImmL_Lo(dst, src) );
8033 8037 ins_pipe( ialu_reg_fat );
8034 8038 %}
8035 8039
8036 8040 // Multiply by 32-bit Immediate, taking the shifted high order results
8037 8041 // (special case for shift by 32)
8038 8042 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8039 8043 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8040 8044 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8041 8045 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8042 8046 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8043 8047 effect(USE src1, KILL cr);
8044 8048
8045 8049 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8046 8050 ins_cost(0*100 + 1*400 - 150);
8047 8051 format %{ "IMUL EDX:EAX,$src1" %}
8048 8052 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8049 8053 ins_pipe( pipe_slow );
8050 8054 %}
8051 8055
8052 8056 // Multiply by 32-bit Immediate, taking the shifted high order results
8053 8057 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8054 8058 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8055 8059 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8056 8060 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8057 8061 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8058 8062 effect(USE src1, KILL cr);
8059 8063
8060 8064 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8061 8065 ins_cost(1*100 + 1*400 - 150);
8062 8066 format %{ "IMUL EDX:EAX,$src1\n\t"
8063 8067 "SAR EDX,$cnt-32" %}
8064 8068 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8065 8069 ins_pipe( pipe_slow );
8066 8070 %}
8067 8071
8068 8072 // Multiply Memory 32-bit Immediate
8069 8073 instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8070 8074 match(Set dst (MulI (LoadI src) imm));
8071 8075 effect(KILL cr);
8072 8076
8073 8077 ins_cost(300);
8074 8078 format %{ "IMUL $dst,$src,$imm" %}
8075 8079 opcode(0x69); /* 69 /r id */
8076 8080 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8077 8081 ins_pipe( ialu_reg_mem_alu0 );
8078 8082 %}
8079 8083
8080 8084 // Multiply Memory
8081 8085 instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8082 8086 match(Set dst (MulI dst (LoadI src)));
8083 8087 effect(KILL cr);
8084 8088
8085 8089 ins_cost(350);
8086 8090 format %{ "IMUL $dst,$src" %}
8087 8091 opcode(0xAF, 0x0F);
8088 8092 ins_encode( OpcS, OpcP, RegMem( dst, src) );
8089 8093 ins_pipe( ialu_reg_mem_alu0 );
8090 8094 %}
8091 8095
8092 8096 // Multiply Register Int to Long
8093 8097 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8094 8098 // Basic Idea: long = (long)int * (long)int
8095 8099 match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8096 8100 effect(DEF dst, USE src, USE src1, KILL flags);
8097 8101
8098 8102 ins_cost(300);
8099 8103 format %{ "IMUL $dst,$src1" %}
8100 8104
8101 8105 ins_encode( long_int_multiply( dst, src1 ) );
8102 8106 ins_pipe( ialu_reg_reg_alu0 );
8103 8107 %}
8104 8108
8105 8109 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8106 8110 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
8107 8111 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8108 8112 effect(KILL flags);
8109 8113
8110 8114 ins_cost(300);
8111 8115 format %{ "MUL $dst,$src1" %}
8112 8116
8113 8117 ins_encode( long_uint_multiply(dst, src1) );
8114 8118 ins_pipe( ialu_reg_reg_alu0 );
8115 8119 %}
8116 8120
8117 8121 // Multiply Register Long
8118 8122 instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8119 8123 match(Set dst (MulL dst src));
8120 8124 effect(KILL cr, TEMP tmp);
8121 8125 ins_cost(4*100+3*400);
8122 8126 // Basic idea: lo(result) = lo(x_lo * y_lo)
8123 8127 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8124 8128 format %{ "MOV $tmp,$src.lo\n\t"
8125 8129 "IMUL $tmp,EDX\n\t"
8126 8130 "MOV EDX,$src.hi\n\t"
8127 8131 "IMUL EDX,EAX\n\t"
8128 8132 "ADD $tmp,EDX\n\t"
8129 8133 "MUL EDX:EAX,$src.lo\n\t"
8130 8134 "ADD EDX,$tmp" %}
8131 8135 ins_encode( long_multiply( dst, src, tmp ) );
8132 8136 ins_pipe( pipe_slow );
8133 8137 %}
8134 8138
8135 8139 // Multiply Register Long where the left operand's high 32 bits are zero
8136 8140 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8137 8141 predicate(is_operand_hi32_zero(n->in(1)));
8138 8142 match(Set dst (MulL dst src));
8139 8143 effect(KILL cr, TEMP tmp);
8140 8144 ins_cost(2*100+2*400);
8141 8145 // Basic idea: lo(result) = lo(x_lo * y_lo)
8142 8146 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
8143 8147 format %{ "MOV $tmp,$src.hi\n\t"
8144 8148 "IMUL $tmp,EAX\n\t"
8145 8149 "MUL EDX:EAX,$src.lo\n\t"
8146 8150 "ADD EDX,$tmp" %}
8147 8151 ins_encode %{
8148 8152 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
8149 8153 __ imull($tmp$$Register, rax);
8150 8154 __ mull($src$$Register);
8151 8155 __ addl(rdx, $tmp$$Register);
8152 8156 %}
8153 8157 ins_pipe( pipe_slow );
8154 8158 %}
8155 8159
8156 8160 // Multiply Register Long where the right operand's high 32 bits are zero
8157 8161 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8158 8162 predicate(is_operand_hi32_zero(n->in(2)));
8159 8163 match(Set dst (MulL dst src));
8160 8164 effect(KILL cr, TEMP tmp);
8161 8165 ins_cost(2*100+2*400);
8162 8166 // Basic idea: lo(result) = lo(x_lo * y_lo)
8163 8167 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
8164 8168 format %{ "MOV $tmp,$src.lo\n\t"
8165 8169 "IMUL $tmp,EDX\n\t"
8166 8170 "MUL EDX:EAX,$src.lo\n\t"
8167 8171 "ADD EDX,$tmp" %}
8168 8172 ins_encode %{
8169 8173 __ movl($tmp$$Register, $src$$Register);
8170 8174 __ imull($tmp$$Register, rdx);
8171 8175 __ mull($src$$Register);
8172 8176 __ addl(rdx, $tmp$$Register);
8173 8177 %}
8174 8178 ins_pipe( pipe_slow );
8175 8179 %}
8176 8180
8177 8181 // Multiply Register Long where the left and the right operands' high 32 bits are zero
8178 8182 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
8179 8183 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
8180 8184 match(Set dst (MulL dst src));
8181 8185 effect(KILL cr);
8182 8186 ins_cost(1*400);
8183 8187 // Basic idea: lo(result) = lo(x_lo * y_lo)
8184 8188 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
8185 8189 format %{ "MUL EDX:EAX,$src.lo\n\t" %}
8186 8190 ins_encode %{
8187 8191 __ mull($src$$Register);
8188 8192 %}
8189 8193 ins_pipe( pipe_slow );
8190 8194 %}
8191 8195
8192 8196 // Multiply Register Long by small constant
8193 8197 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8194 8198 match(Set dst (MulL dst src));
8195 8199 effect(KILL cr, TEMP tmp);
8196 8200 ins_cost(2*100+2*400);
8197 8201 size(12);
8198 8202 // Basic idea: lo(result) = lo(src * EAX)
8199 8203 // hi(result) = hi(src * EAX) + lo(src * EDX)
8200 8204 format %{ "IMUL $tmp,EDX,$src\n\t"
8201 8205 "MOV EDX,$src\n\t"
8202 8206 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t"
8203 8207 "ADD EDX,$tmp" %}
8204 8208 ins_encode( long_multiply_con( dst, src, tmp ) );
8205 8209 ins_pipe( pipe_slow );
8206 8210 %}
8207 8211
8208 8212 // Integer DIV with Register
8209 8213 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8210 8214 match(Set rax (DivI rax div));
8211 8215 effect(KILL rdx, KILL cr);
8212 8216 size(26);
8213 8217 ins_cost(30*100+10*100);
8214 8218 format %{ "CMP EAX,0x80000000\n\t"
8215 8219 "JNE,s normal\n\t"
8216 8220 "XOR EDX,EDX\n\t"
8217 8221 "CMP ECX,-1\n\t"
8218 8222 "JE,s done\n"
8219 8223 "normal: CDQ\n\t"
8220 8224 "IDIV $div\n\t"
8221 8225 "done:" %}
8222 8226 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8223 8227 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8224 8228 ins_pipe( ialu_reg_reg_alu0 );
8225 8229 %}
8226 8230
8227 8231 // Divide Register Long
8228 8232 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8229 8233 match(Set dst (DivL src1 src2));
8230 8234 effect( KILL cr, KILL cx, KILL bx );
8231 8235 ins_cost(10000);
8232 8236 format %{ "PUSH $src1.hi\n\t"
8233 8237 "PUSH $src1.lo\n\t"
8234 8238 "PUSH $src2.hi\n\t"
8235 8239 "PUSH $src2.lo\n\t"
8236 8240 "CALL SharedRuntime::ldiv\n\t"
8237 8241 "ADD ESP,16" %}
8238 8242 ins_encode( long_div(src1,src2) );
8239 8243 ins_pipe( pipe_slow );
8240 8244 %}
8241 8245
8242 8246 // Integer DIVMOD with Register, both quotient and mod results
8243 8247 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8244 8248 match(DivModI rax div);
8245 8249 effect(KILL cr);
8246 8250 size(26);
8247 8251 ins_cost(30*100+10*100);
8248 8252 format %{ "CMP EAX,0x80000000\n\t"
8249 8253 "JNE,s normal\n\t"
8250 8254 "XOR EDX,EDX\n\t"
8251 8255 "CMP ECX,-1\n\t"
8252 8256 "JE,s done\n"
8253 8257 "normal: CDQ\n\t"
8254 8258 "IDIV $div\n\t"
8255 8259 "done:" %}
8256 8260 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8257 8261 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8258 8262 ins_pipe( pipe_slow );
8259 8263 %}
8260 8264
8261 8265 // Integer MOD with Register
8262 8266 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8263 8267 match(Set rdx (ModI rax div));
8264 8268 effect(KILL rax, KILL cr);
8265 8269
8266 8270 size(26);
8267 8271 ins_cost(300);
8268 8272 format %{ "CDQ\n\t"
8269 8273 "IDIV $div" %}
8270 8274 opcode(0xF7, 0x7); /* Opcode F7 /7 */
8271 8275 ins_encode( cdq_enc, OpcP, RegOpc(div) );
8272 8276 ins_pipe( ialu_reg_reg_alu0 );
8273 8277 %}
8274 8278
8275 8279 // Remainder Register Long
8276 8280 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8277 8281 match(Set dst (ModL src1 src2));
8278 8282 effect( KILL cr, KILL cx, KILL bx );
8279 8283 ins_cost(10000);
8280 8284 format %{ "PUSH $src1.hi\n\t"
8281 8285 "PUSH $src1.lo\n\t"
8282 8286 "PUSH $src2.hi\n\t"
8283 8287 "PUSH $src2.lo\n\t"
8284 8288 "CALL SharedRuntime::lrem\n\t"
8285 8289 "ADD ESP,16" %}
8286 8290 ins_encode( long_mod(src1,src2) );
8287 8291 ins_pipe( pipe_slow );
8288 8292 %}
8289 8293
8290 8294 // Divide Register Long (no special case since divisor != -1)
8291 8295 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8292 8296 match(Set dst (DivL dst imm));
8293 8297 effect( TEMP tmp, TEMP tmp2, KILL cr );
8294 8298 ins_cost(1000);
8295 8299 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8296 8300 "XOR $tmp2,$tmp2\n\t"
8297 8301 "CMP $tmp,EDX\n\t"
8298 8302 "JA,s fast\n\t"
8299 8303 "MOV $tmp2,EAX\n\t"
8300 8304 "MOV EAX,EDX\n\t"
8301 8305 "MOV EDX,0\n\t"
8302 8306 "JLE,s pos\n\t"
8303 8307 "LNEG EAX : $tmp2\n\t"
8304 8308 "DIV $tmp # unsigned division\n\t"
8305 8309 "XCHG EAX,$tmp2\n\t"
8306 8310 "DIV $tmp\n\t"
8307 8311 "LNEG $tmp2 : EAX\n\t"
8308 8312 "JMP,s done\n"
8309 8313 "pos:\n\t"
8310 8314 "DIV $tmp\n\t"
8311 8315 "XCHG EAX,$tmp2\n"
8312 8316 "fast:\n\t"
8313 8317 "DIV $tmp\n"
8314 8318 "done:\n\t"
8315 8319 "MOV EDX,$tmp2\n\t"
8316 8320 "NEG EDX:EAX # if $imm < 0" %}
8317 8321 ins_encode %{
8318 8322 int con = (int)$imm$$constant;
8319 8323 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8320 8324 int pcon = (con > 0) ? con : -con;
8321 8325 Label Lfast, Lpos, Ldone;
8322 8326
8323 8327 __ movl($tmp$$Register, pcon);
8324 8328 __ xorl($tmp2$$Register,$tmp2$$Register);
8325 8329 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8326 8330 __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8327 8331
8328 8332 __ movl($tmp2$$Register, $dst$$Register); // save
8329 8333 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8330 8334 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8331 8335 __ jccb(Assembler::lessEqual, Lpos); // result is positive
8332 8336
8333 8337 // Negative dividend.
8334 8338 // convert value to positive to use unsigned division
8335 8339 __ lneg($dst$$Register, $tmp2$$Register);
8336 8340 __ divl($tmp$$Register);
8337 8341 __ xchgl($dst$$Register, $tmp2$$Register);
8338 8342 __ divl($tmp$$Register);
8339 8343 // revert result back to negative
8340 8344 __ lneg($tmp2$$Register, $dst$$Register);
8341 8345 __ jmpb(Ldone);
8342 8346
8343 8347 __ bind(Lpos);
8344 8348 __ divl($tmp$$Register); // Use unsigned division
8345 8349 __ xchgl($dst$$Register, $tmp2$$Register);
8346 8350 // Fallthrow for final divide, tmp2 has 32 bit hi result
8347 8351
8348 8352 __ bind(Lfast);
8349 8353 // fast path: src is positive
8350 8354 __ divl($tmp$$Register); // Use unsigned division
8351 8355
8352 8356 __ bind(Ldone);
8353 8357 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8354 8358 if (con < 0) {
8355 8359 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8356 8360 }
8357 8361 %}
8358 8362 ins_pipe( pipe_slow );
8359 8363 %}
8360 8364
8361 8365 // Remainder Register Long (remainder fit into 32 bits)
8362 8366 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8363 8367 match(Set dst (ModL dst imm));
8364 8368 effect( TEMP tmp, TEMP tmp2, KILL cr );
8365 8369 ins_cost(1000);
8366 8370 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8367 8371 "CMP $tmp,EDX\n\t"
8368 8372 "JA,s fast\n\t"
8369 8373 "MOV $tmp2,EAX\n\t"
8370 8374 "MOV EAX,EDX\n\t"
8371 8375 "MOV EDX,0\n\t"
8372 8376 "JLE,s pos\n\t"
8373 8377 "LNEG EAX : $tmp2\n\t"
8374 8378 "DIV $tmp # unsigned division\n\t"
8375 8379 "MOV EAX,$tmp2\n\t"
8376 8380 "DIV $tmp\n\t"
8377 8381 "NEG EDX\n\t"
8378 8382 "JMP,s done\n"
8379 8383 "pos:\n\t"
8380 8384 "DIV $tmp\n\t"
8381 8385 "MOV EAX,$tmp2\n"
8382 8386 "fast:\n\t"
8383 8387 "DIV $tmp\n"
8384 8388 "done:\n\t"
8385 8389 "MOV EAX,EDX\n\t"
8386 8390 "SAR EDX,31\n\t" %}
8387 8391 ins_encode %{
8388 8392 int con = (int)$imm$$constant;
8389 8393 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8390 8394 int pcon = (con > 0) ? con : -con;
8391 8395 Label Lfast, Lpos, Ldone;
8392 8396
8393 8397 __ movl($tmp$$Register, pcon);
8394 8398 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8395 8399 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8396 8400
8397 8401 __ movl($tmp2$$Register, $dst$$Register); // save
8398 8402 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8399 8403 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8400 8404 __ jccb(Assembler::lessEqual, Lpos); // result is positive
8401 8405
8402 8406 // Negative dividend.
8403 8407 // convert value to positive to use unsigned division
8404 8408 __ lneg($dst$$Register, $tmp2$$Register);
8405 8409 __ divl($tmp$$Register);
8406 8410 __ movl($dst$$Register, $tmp2$$Register);
8407 8411 __ divl($tmp$$Register);
8408 8412 // revert remainder back to negative
8409 8413 __ negl(HIGH_FROM_LOW($dst$$Register));
8410 8414 __ jmpb(Ldone);
8411 8415
8412 8416 __ bind(Lpos);
8413 8417 __ divl($tmp$$Register);
8414 8418 __ movl($dst$$Register, $tmp2$$Register);
8415 8419
8416 8420 __ bind(Lfast);
8417 8421 // fast path: src is positive
8418 8422 __ divl($tmp$$Register);
8419 8423
8420 8424 __ bind(Ldone);
8421 8425 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8422 8426 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8423 8427
8424 8428 %}
8425 8429 ins_pipe( pipe_slow );
8426 8430 %}
8427 8431
8428 8432 // Integer Shift Instructions
8429 8433 // Shift Left by one
8430 8434 instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8431 8435 match(Set dst (LShiftI dst shift));
8432 8436 effect(KILL cr);
8433 8437
8434 8438 size(2);
8435 8439 format %{ "SHL $dst,$shift" %}
8436 8440 opcode(0xD1, 0x4); /* D1 /4 */
8437 8441 ins_encode( OpcP, RegOpc( dst ) );
8438 8442 ins_pipe( ialu_reg );
8439 8443 %}
8440 8444
8441 8445 // Shift Left by 8-bit immediate
8442 8446 instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8443 8447 match(Set dst (LShiftI dst shift));
8444 8448 effect(KILL cr);
8445 8449
8446 8450 size(3);
8447 8451 format %{ "SHL $dst,$shift" %}
8448 8452 opcode(0xC1, 0x4); /* C1 /4 ib */
8449 8453 ins_encode( RegOpcImm( dst, shift) );
8450 8454 ins_pipe( ialu_reg );
8451 8455 %}
8452 8456
8453 8457 // Shift Left by variable
8454 8458 instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8455 8459 match(Set dst (LShiftI dst shift));
8456 8460 effect(KILL cr);
8457 8461
8458 8462 size(2);
8459 8463 format %{ "SHL $dst,$shift" %}
8460 8464 opcode(0xD3, 0x4); /* D3 /4 */
8461 8465 ins_encode( OpcP, RegOpc( dst ) );
8462 8466 ins_pipe( ialu_reg_reg );
8463 8467 %}
8464 8468
8465 8469 // Arithmetic shift right by one
8466 8470 instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8467 8471 match(Set dst (RShiftI dst shift));
8468 8472 effect(KILL cr);
8469 8473
8470 8474 size(2);
8471 8475 format %{ "SAR $dst,$shift" %}
8472 8476 opcode(0xD1, 0x7); /* D1 /7 */
8473 8477 ins_encode( OpcP, RegOpc( dst ) );
8474 8478 ins_pipe( ialu_reg );
8475 8479 %}
8476 8480
8477 8481 // Arithmetic shift right by one
8478 8482 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8479 8483 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8480 8484 effect(KILL cr);
8481 8485 format %{ "SAR $dst,$shift" %}
8482 8486 opcode(0xD1, 0x7); /* D1 /7 */
8483 8487 ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8484 8488 ins_pipe( ialu_mem_imm );
8485 8489 %}
8486 8490
8487 8491 // Arithmetic Shift Right by 8-bit immediate
8488 8492 instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8489 8493 match(Set dst (RShiftI dst shift));
8490 8494 effect(KILL cr);
8491 8495
8492 8496 size(3);
8493 8497 format %{ "SAR $dst,$shift" %}
8494 8498 opcode(0xC1, 0x7); /* C1 /7 ib */
8495 8499 ins_encode( RegOpcImm( dst, shift ) );
8496 8500 ins_pipe( ialu_mem_imm );
8497 8501 %}
8498 8502
8499 8503 // Arithmetic Shift Right by 8-bit immediate
8500 8504 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8501 8505 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8502 8506 effect(KILL cr);
8503 8507
8504 8508 format %{ "SAR $dst,$shift" %}
8505 8509 opcode(0xC1, 0x7); /* C1 /7 ib */
8506 8510 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8507 8511 ins_pipe( ialu_mem_imm );
8508 8512 %}
8509 8513
8510 8514 // Arithmetic Shift Right by variable
8511 8515 instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8512 8516 match(Set dst (RShiftI dst shift));
8513 8517 effect(KILL cr);
8514 8518
8515 8519 size(2);
8516 8520 format %{ "SAR $dst,$shift" %}
8517 8521 opcode(0xD3, 0x7); /* D3 /7 */
8518 8522 ins_encode( OpcP, RegOpc( dst ) );
8519 8523 ins_pipe( ialu_reg_reg );
8520 8524 %}
8521 8525
8522 8526 // Logical shift right by one
8523 8527 instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8524 8528 match(Set dst (URShiftI dst shift));
8525 8529 effect(KILL cr);
8526 8530
8527 8531 size(2);
8528 8532 format %{ "SHR $dst,$shift" %}
8529 8533 opcode(0xD1, 0x5); /* D1 /5 */
8530 8534 ins_encode( OpcP, RegOpc( dst ) );
8531 8535 ins_pipe( ialu_reg );
8532 8536 %}
8533 8537
8534 8538 // Logical Shift Right by 8-bit immediate
8535 8539 instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8536 8540 match(Set dst (URShiftI dst shift));
8537 8541 effect(KILL cr);
8538 8542
8539 8543 size(3);
8540 8544 format %{ "SHR $dst,$shift" %}
8541 8545 opcode(0xC1, 0x5); /* C1 /5 ib */
8542 8546 ins_encode( RegOpcImm( dst, shift) );
8543 8547 ins_pipe( ialu_reg );
8544 8548 %}
8545 8549
8546 8550
8547 8551 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8548 8552 // This idiom is used by the compiler for the i2b bytecode.
8549 8553 instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
8550 8554 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8551 8555
8552 8556 size(3);
8553 8557 format %{ "MOVSX $dst,$src :8" %}
8554 8558 ins_encode %{
8555 8559 __ movsbl($dst$$Register, $src$$Register);
8556 8560 %}
8557 8561 ins_pipe(ialu_reg_reg);
8558 8562 %}
8559 8563
8560 8564 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8561 8565 // This idiom is used by the compiler the i2s bytecode.
8562 8566 instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
8563 8567 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8564 8568
8565 8569 size(3);
8566 8570 format %{ "MOVSX $dst,$src :16" %}
8567 8571 ins_encode %{
8568 8572 __ movswl($dst$$Register, $src$$Register);
8569 8573 %}
8570 8574 ins_pipe(ialu_reg_reg);
8571 8575 %}
8572 8576
8573 8577
8574 8578 // Logical Shift Right by variable
8575 8579 instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8576 8580 match(Set dst (URShiftI dst shift));
8577 8581 effect(KILL cr);
8578 8582
8579 8583 size(2);
8580 8584 format %{ "SHR $dst,$shift" %}
8581 8585 opcode(0xD3, 0x5); /* D3 /5 */
8582 8586 ins_encode( OpcP, RegOpc( dst ) );
8583 8587 ins_pipe( ialu_reg_reg );
8584 8588 %}
8585 8589
8586 8590
8587 8591 //----------Logical Instructions-----------------------------------------------
8588 8592 //----------Integer Logical Instructions---------------------------------------
8589 8593 // And Instructions
8590 8594 // And Register with Register
8591 8595 instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8592 8596 match(Set dst (AndI dst src));
8593 8597 effect(KILL cr);
8594 8598
8595 8599 size(2);
8596 8600 format %{ "AND $dst,$src" %}
8597 8601 opcode(0x23);
8598 8602 ins_encode( OpcP, RegReg( dst, src) );
8599 8603 ins_pipe( ialu_reg_reg );
8600 8604 %}
8601 8605
8602 8606 // And Register with Immediate
8603 8607 instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8604 8608 match(Set dst (AndI dst src));
8605 8609 effect(KILL cr);
8606 8610
8607 8611 format %{ "AND $dst,$src" %}
8608 8612 opcode(0x81,0x04); /* Opcode 81 /4 */
8609 8613 // ins_encode( RegImm( dst, src) );
8610 8614 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8611 8615 ins_pipe( ialu_reg );
8612 8616 %}
8613 8617
8614 8618 // And Register with Memory
8615 8619 instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8616 8620 match(Set dst (AndI dst (LoadI src)));
8617 8621 effect(KILL cr);
8618 8622
8619 8623 ins_cost(125);
8620 8624 format %{ "AND $dst,$src" %}
8621 8625 opcode(0x23);
8622 8626 ins_encode( OpcP, RegMem( dst, src) );
8623 8627 ins_pipe( ialu_reg_mem );
8624 8628 %}
8625 8629
8626 8630 // And Memory with Register
8627 8631 instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8628 8632 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8629 8633 effect(KILL cr);
8630 8634
8631 8635 ins_cost(150);
8632 8636 format %{ "AND $dst,$src" %}
8633 8637 opcode(0x21); /* Opcode 21 /r */
8634 8638 ins_encode( OpcP, RegMem( src, dst ) );
8635 8639 ins_pipe( ialu_mem_reg );
8636 8640 %}
8637 8641
8638 8642 // And Memory with Immediate
8639 8643 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8640 8644 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8641 8645 effect(KILL cr);
8642 8646
8643 8647 ins_cost(125);
8644 8648 format %{ "AND $dst,$src" %}
8645 8649 opcode(0x81, 0x4); /* Opcode 81 /4 id */
8646 8650 // ins_encode( MemImm( dst, src) );
8647 8651 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8648 8652 ins_pipe( ialu_mem_imm );
8649 8653 %}
8650 8654
8651 8655 // Or Instructions
8652 8656 // Or Register with Register
8653 8657 instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8654 8658 match(Set dst (OrI dst src));
8655 8659 effect(KILL cr);
8656 8660
8657 8661 size(2);
8658 8662 format %{ "OR $dst,$src" %}
8659 8663 opcode(0x0B);
8660 8664 ins_encode( OpcP, RegReg( dst, src) );
8661 8665 ins_pipe( ialu_reg_reg );
8662 8666 %}
8663 8667
8664 8668 instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
8665 8669 match(Set dst (OrI dst (CastP2X src)));
8666 8670 effect(KILL cr);
8667 8671
8668 8672 size(2);
8669 8673 format %{ "OR $dst,$src" %}
8670 8674 opcode(0x0B);
8671 8675 ins_encode( OpcP, RegReg( dst, src) );
8672 8676 ins_pipe( ialu_reg_reg );
8673 8677 %}
8674 8678
8675 8679
8676 8680 // Or Register with Immediate
8677 8681 instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8678 8682 match(Set dst (OrI dst src));
8679 8683 effect(KILL cr);
8680 8684
8681 8685 format %{ "OR $dst,$src" %}
8682 8686 opcode(0x81,0x01); /* Opcode 81 /1 id */
8683 8687 // ins_encode( RegImm( dst, src) );
8684 8688 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8685 8689 ins_pipe( ialu_reg );
8686 8690 %}
8687 8691
8688 8692 // Or Register with Memory
8689 8693 instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8690 8694 match(Set dst (OrI dst (LoadI src)));
8691 8695 effect(KILL cr);
8692 8696
8693 8697 ins_cost(125);
8694 8698 format %{ "OR $dst,$src" %}
8695 8699 opcode(0x0B);
8696 8700 ins_encode( OpcP, RegMem( dst, src) );
8697 8701 ins_pipe( ialu_reg_mem );
8698 8702 %}
8699 8703
8700 8704 // Or Memory with Register
8701 8705 instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8702 8706 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8703 8707 effect(KILL cr);
8704 8708
8705 8709 ins_cost(150);
8706 8710 format %{ "OR $dst,$src" %}
8707 8711 opcode(0x09); /* Opcode 09 /r */
8708 8712 ins_encode( OpcP, RegMem( src, dst ) );
8709 8713 ins_pipe( ialu_mem_reg );
8710 8714 %}
8711 8715
8712 8716 // Or Memory with Immediate
8713 8717 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8714 8718 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8715 8719 effect(KILL cr);
8716 8720
8717 8721 ins_cost(125);
8718 8722 format %{ "OR $dst,$src" %}
8719 8723 opcode(0x81,0x1); /* Opcode 81 /1 id */
8720 8724 // ins_encode( MemImm( dst, src) );
8721 8725 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8722 8726 ins_pipe( ialu_mem_imm );
8723 8727 %}
8724 8728
8725 8729 // ROL/ROR
8726 8730 // ROL expand
8727 8731 instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8728 8732 effect(USE_DEF dst, USE shift, KILL cr);
8729 8733
8730 8734 format %{ "ROL $dst, $shift" %}
8731 8735 opcode(0xD1, 0x0); /* Opcode D1 /0 */
8732 8736 ins_encode( OpcP, RegOpc( dst ));
8733 8737 ins_pipe( ialu_reg );
8734 8738 %}
8735 8739
8736 8740 instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
8737 8741 effect(USE_DEF dst, USE shift, KILL cr);
8738 8742
8739 8743 format %{ "ROL $dst, $shift" %}
8740 8744 opcode(0xC1, 0x0); /*Opcode /C1 /0 */
8741 8745 ins_encode( RegOpcImm(dst, shift) );
8742 8746 ins_pipe(ialu_reg);
8743 8747 %}
8744 8748
8745 8749 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8746 8750 effect(USE_DEF dst, USE shift, KILL cr);
8747 8751
8748 8752 format %{ "ROL $dst, $shift" %}
8749 8753 opcode(0xD3, 0x0); /* Opcode D3 /0 */
8750 8754 ins_encode(OpcP, RegOpc(dst));
8751 8755 ins_pipe( ialu_reg_reg );
8752 8756 %}
8753 8757 // end of ROL expand
8754 8758
8755 8759 // ROL 32bit by one once
8756 8760 instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8757 8761 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8758 8762
8759 8763 expand %{
8760 8764 rolI_eReg_imm1(dst, lshift, cr);
8761 8765 %}
8762 8766 %}
8763 8767
8764 8768 // ROL 32bit var by imm8 once
8765 8769 instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8766 8770 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8767 8771 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8768 8772
8769 8773 expand %{
8770 8774 rolI_eReg_imm8(dst, lshift, cr);
8771 8775 %}
8772 8776 %}
8773 8777
8774 8778 // ROL 32bit var by var once
8775 8779 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8776 8780 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8777 8781
8778 8782 expand %{
8779 8783 rolI_eReg_CL(dst, shift, cr);
8780 8784 %}
8781 8785 %}
8782 8786
8783 8787 // ROL 32bit var by var once
8784 8788 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8785 8789 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8786 8790
8787 8791 expand %{
8788 8792 rolI_eReg_CL(dst, shift, cr);
8789 8793 %}
8790 8794 %}
8791 8795
8792 8796 // ROR expand
8793 8797 instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8794 8798 effect(USE_DEF dst, USE shift, KILL cr);
8795 8799
8796 8800 format %{ "ROR $dst, $shift" %}
8797 8801 opcode(0xD1,0x1); /* Opcode D1 /1 */
8798 8802 ins_encode( OpcP, RegOpc( dst ) );
8799 8803 ins_pipe( ialu_reg );
8800 8804 %}
8801 8805
8802 8806 instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
8803 8807 effect (USE_DEF dst, USE shift, KILL cr);
8804 8808
8805 8809 format %{ "ROR $dst, $shift" %}
8806 8810 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8807 8811 ins_encode( RegOpcImm(dst, shift) );
8808 8812 ins_pipe( ialu_reg );
8809 8813 %}
8810 8814
8811 8815 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8812 8816 effect(USE_DEF dst, USE shift, KILL cr);
8813 8817
8814 8818 format %{ "ROR $dst, $shift" %}
8815 8819 opcode(0xD3, 0x1); /* Opcode D3 /1 */
8816 8820 ins_encode(OpcP, RegOpc(dst));
8817 8821 ins_pipe( ialu_reg_reg );
8818 8822 %}
8819 8823 // end of ROR expand
8820 8824
8821 8825 // ROR right once
8822 8826 instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8823 8827 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8824 8828
8825 8829 expand %{
8826 8830 rorI_eReg_imm1(dst, rshift, cr);
8827 8831 %}
8828 8832 %}
8829 8833
8830 8834 // ROR 32bit by immI8 once
8831 8835 instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8832 8836 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8833 8837 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8834 8838
8835 8839 expand %{
8836 8840 rorI_eReg_imm8(dst, rshift, cr);
8837 8841 %}
8838 8842 %}
8839 8843
8840 8844 // ROR 32bit var by var once
8841 8845 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8842 8846 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8843 8847
8844 8848 expand %{
8845 8849 rorI_eReg_CL(dst, shift, cr);
8846 8850 %}
8847 8851 %}
8848 8852
8849 8853 // ROR 32bit var by var once
8850 8854 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8851 8855 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8852 8856
8853 8857 expand %{
8854 8858 rorI_eReg_CL(dst, shift, cr);
8855 8859 %}
8856 8860 %}
8857 8861
8858 8862 // Xor Instructions
8859 8863 // Xor Register with Register
8860 8864 instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8861 8865 match(Set dst (XorI dst src));
8862 8866 effect(KILL cr);
8863 8867
8864 8868 size(2);
8865 8869 format %{ "XOR $dst,$src" %}
8866 8870 opcode(0x33);
8867 8871 ins_encode( OpcP, RegReg( dst, src) );
8868 8872 ins_pipe( ialu_reg_reg );
8869 8873 %}
8870 8874
8871 8875 // Xor Register with Immediate -1
8872 8876 instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
8873 8877 match(Set dst (XorI dst imm));
8874 8878
8875 8879 size(2);
8876 8880 format %{ "NOT $dst" %}
8877 8881 ins_encode %{
8878 8882 __ notl($dst$$Register);
8879 8883 %}
8880 8884 ins_pipe( ialu_reg );
8881 8885 %}
8882 8886
8883 8887 // Xor Register with Immediate
8884 8888 instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8885 8889 match(Set dst (XorI dst src));
8886 8890 effect(KILL cr);
8887 8891
8888 8892 format %{ "XOR $dst,$src" %}
8889 8893 opcode(0x81,0x06); /* Opcode 81 /6 id */
8890 8894 // ins_encode( RegImm( dst, src) );
8891 8895 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8892 8896 ins_pipe( ialu_reg );
8893 8897 %}
8894 8898
8895 8899 // Xor Register with Memory
8896 8900 instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8897 8901 match(Set dst (XorI dst (LoadI src)));
8898 8902 effect(KILL cr);
8899 8903
8900 8904 ins_cost(125);
8901 8905 format %{ "XOR $dst,$src" %}
8902 8906 opcode(0x33);
8903 8907 ins_encode( OpcP, RegMem(dst, src) );
8904 8908 ins_pipe( ialu_reg_mem );
8905 8909 %}
8906 8910
8907 8911 // Xor Memory with Register
8908 8912 instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8909 8913 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8910 8914 effect(KILL cr);
8911 8915
8912 8916 ins_cost(150);
8913 8917 format %{ "XOR $dst,$src" %}
8914 8918 opcode(0x31); /* Opcode 31 /r */
8915 8919 ins_encode( OpcP, RegMem( src, dst ) );
8916 8920 ins_pipe( ialu_mem_reg );
8917 8921 %}
8918 8922
8919 8923 // Xor Memory with Immediate
8920 8924 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8921 8925 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8922 8926 effect(KILL cr);
8923 8927
8924 8928 ins_cost(125);
8925 8929 format %{ "XOR $dst,$src" %}
8926 8930 opcode(0x81,0x6); /* Opcode 81 /6 id */
8927 8931 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8928 8932 ins_pipe( ialu_mem_imm );
8929 8933 %}
8930 8934
8931 8935 //----------Convert Int to Boolean---------------------------------------------
8932 8936
8933 8937 instruct movI_nocopy(eRegI dst, eRegI src) %{
8934 8938 effect( DEF dst, USE src );
8935 8939 format %{ "MOV $dst,$src" %}
8936 8940 ins_encode( enc_Copy( dst, src) );
8937 8941 ins_pipe( ialu_reg_reg );
8938 8942 %}
8939 8943
8940 8944 instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
8941 8945 effect( USE_DEF dst, USE src, KILL cr );
8942 8946
8943 8947 size(4);
8944 8948 format %{ "NEG $dst\n\t"
8945 8949 "ADC $dst,$src" %}
8946 8950 ins_encode( neg_reg(dst),
8947 8951 OpcRegReg(0x13,dst,src) );
8948 8952 ins_pipe( ialu_reg_reg_long );
8949 8953 %}
8950 8954
8951 8955 instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
8952 8956 match(Set dst (Conv2B src));
8953 8957
8954 8958 expand %{
8955 8959 movI_nocopy(dst,src);
8956 8960 ci2b(dst,src,cr);
8957 8961 %}
8958 8962 %}
8959 8963
8960 8964 instruct movP_nocopy(eRegI dst, eRegP src) %{
8961 8965 effect( DEF dst, USE src );
8962 8966 format %{ "MOV $dst,$src" %}
8963 8967 ins_encode( enc_Copy( dst, src) );
8964 8968 ins_pipe( ialu_reg_reg );
8965 8969 %}
8966 8970
8967 8971 instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
8968 8972 effect( USE_DEF dst, USE src, KILL cr );
8969 8973 format %{ "NEG $dst\n\t"
8970 8974 "ADC $dst,$src" %}
8971 8975 ins_encode( neg_reg(dst),
8972 8976 OpcRegReg(0x13,dst,src) );
8973 8977 ins_pipe( ialu_reg_reg_long );
8974 8978 %}
8975 8979
8976 8980 instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
8977 8981 match(Set dst (Conv2B src));
8978 8982
8979 8983 expand %{
8980 8984 movP_nocopy(dst,src);
8981 8985 cp2b(dst,src,cr);
8982 8986 %}
8983 8987 %}
8984 8988
8985 8989 instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
8986 8990 match(Set dst (CmpLTMask p q));
8987 8991 effect( KILL cr );
8988 8992 ins_cost(400);
8989 8993
8990 8994 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8991 8995 format %{ "XOR $dst,$dst\n\t"
8992 8996 "CMP $p,$q\n\t"
8993 8997 "SETlt $dst\n\t"
8994 8998 "NEG $dst" %}
8995 8999 ins_encode( OpcRegReg(0x33,dst,dst),
8996 9000 OpcRegReg(0x3B,p,q),
8997 9001 setLT_reg(dst), neg_reg(dst) );
8998 9002 ins_pipe( pipe_slow );
8999 9003 %}
9000 9004
9001 9005 instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9002 9006 match(Set dst (CmpLTMask dst zero));
9003 9007 effect( DEF dst, KILL cr );
9004 9008 ins_cost(100);
9005 9009
9006 9010 format %{ "SAR $dst,31" %}
9007 9011 opcode(0xC1, 0x7); /* C1 /7 ib */
9008 9012 ins_encode( RegOpcImm( dst, 0x1F ) );
9009 9013 ins_pipe( ialu_reg );
9010 9014 %}
9011 9015
9012 9016
9013 9017 instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9014 9018 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9015 9019 effect( KILL tmp, KILL cr );
9016 9020 ins_cost(400);
9017 9021 // annoyingly, $tmp has no edges so you cant ask for it in
9018 9022 // any format or encoding
9019 9023 format %{ "SUB $p,$q\n\t"
9020 9024 "SBB ECX,ECX\n\t"
9021 9025 "AND ECX,$y\n\t"
9022 9026 "ADD $p,ECX" %}
9023 9027 ins_encode( enc_cmpLTP(p,q,y,tmp) );
9024 9028 ins_pipe( pipe_cmplt );
9025 9029 %}
9026 9030
9027 9031 /* If I enable this, I encourage spilling in the inner loop of compress.
9028 9032 instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9029 9033 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9030 9034 effect( USE_KILL tmp, KILL cr );
9031 9035 ins_cost(400);
9032 9036
9033 9037 format %{ "SUB $p,$q\n\t"
9034 9038 "SBB ECX,ECX\n\t"
9035 9039 "AND ECX,$y\n\t"
9036 9040 "ADD $p,ECX" %}
9037 9041 ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9038 9042 %}
9039 9043 */
9040 9044
9041 9045 //----------Long Instructions------------------------------------------------
9042 9046 // Add Long Register with Register
9043 9047 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9044 9048 match(Set dst (AddL dst src));
9045 9049 effect(KILL cr);
9046 9050 ins_cost(200);
9047 9051 format %{ "ADD $dst.lo,$src.lo\n\t"
9048 9052 "ADC $dst.hi,$src.hi" %}
9049 9053 opcode(0x03, 0x13);
9050 9054 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9051 9055 ins_pipe( ialu_reg_reg_long );
9052 9056 %}
9053 9057
9054 9058 // Add Long Register with Immediate
9055 9059 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9056 9060 match(Set dst (AddL dst src));
9057 9061 effect(KILL cr);
9058 9062 format %{ "ADD $dst.lo,$src.lo\n\t"
9059 9063 "ADC $dst.hi,$src.hi" %}
9060 9064 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */
9061 9065 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9062 9066 ins_pipe( ialu_reg_long );
9063 9067 %}
9064 9068
9065 9069 // Add Long Register with Memory
9066 9070 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9067 9071 match(Set dst (AddL dst (LoadL mem)));
9068 9072 effect(KILL cr);
9069 9073 ins_cost(125);
9070 9074 format %{ "ADD $dst.lo,$mem\n\t"
9071 9075 "ADC $dst.hi,$mem+4" %}
9072 9076 opcode(0x03, 0x13);
9073 9077 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9074 9078 ins_pipe( ialu_reg_long_mem );
9075 9079 %}
9076 9080
9077 9081 // Subtract Long Register with Register.
9078 9082 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9079 9083 match(Set dst (SubL dst src));
9080 9084 effect(KILL cr);
9081 9085 ins_cost(200);
9082 9086 format %{ "SUB $dst.lo,$src.lo\n\t"
9083 9087 "SBB $dst.hi,$src.hi" %}
9084 9088 opcode(0x2B, 0x1B);
9085 9089 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9086 9090 ins_pipe( ialu_reg_reg_long );
9087 9091 %}
9088 9092
9089 9093 // Subtract Long Register with Immediate
9090 9094 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9091 9095 match(Set dst (SubL dst src));
9092 9096 effect(KILL cr);
9093 9097 format %{ "SUB $dst.lo,$src.lo\n\t"
9094 9098 "SBB $dst.hi,$src.hi" %}
9095 9099 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */
9096 9100 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9097 9101 ins_pipe( ialu_reg_long );
9098 9102 %}
9099 9103
9100 9104 // Subtract Long Register with Memory
9101 9105 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9102 9106 match(Set dst (SubL dst (LoadL mem)));
9103 9107 effect(KILL cr);
9104 9108 ins_cost(125);
9105 9109 format %{ "SUB $dst.lo,$mem\n\t"
9106 9110 "SBB $dst.hi,$mem+4" %}
9107 9111 opcode(0x2B, 0x1B);
9108 9112 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9109 9113 ins_pipe( ialu_reg_long_mem );
9110 9114 %}
9111 9115
9112 9116 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9113 9117 match(Set dst (SubL zero dst));
9114 9118 effect(KILL cr);
9115 9119 ins_cost(300);
9116 9120 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %}
9117 9121 ins_encode( neg_long(dst) );
9118 9122 ins_pipe( ialu_reg_reg_long );
9119 9123 %}
9120 9124
9121 9125 // And Long Register with Register
9122 9126 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9123 9127 match(Set dst (AndL dst src));
9124 9128 effect(KILL cr);
9125 9129 format %{ "AND $dst.lo,$src.lo\n\t"
9126 9130 "AND $dst.hi,$src.hi" %}
9127 9131 opcode(0x23,0x23);
9128 9132 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9129 9133 ins_pipe( ialu_reg_reg_long );
9130 9134 %}
9131 9135
9132 9136 // And Long Register with Immediate
9133 9137 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9134 9138 match(Set dst (AndL dst src));
9135 9139 effect(KILL cr);
9136 9140 format %{ "AND $dst.lo,$src.lo\n\t"
9137 9141 "AND $dst.hi,$src.hi" %}
9138 9142 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */
9139 9143 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9140 9144 ins_pipe( ialu_reg_long );
9141 9145 %}
9142 9146
9143 9147 // And Long Register with Memory
9144 9148 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9145 9149 match(Set dst (AndL dst (LoadL mem)));
9146 9150 effect(KILL cr);
9147 9151 ins_cost(125);
9148 9152 format %{ "AND $dst.lo,$mem\n\t"
9149 9153 "AND $dst.hi,$mem+4" %}
9150 9154 opcode(0x23, 0x23);
9151 9155 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9152 9156 ins_pipe( ialu_reg_long_mem );
9153 9157 %}
9154 9158
9155 9159 // Or Long Register with Register
9156 9160 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9157 9161 match(Set dst (OrL dst src));
9158 9162 effect(KILL cr);
9159 9163 format %{ "OR $dst.lo,$src.lo\n\t"
9160 9164 "OR $dst.hi,$src.hi" %}
9161 9165 opcode(0x0B,0x0B);
9162 9166 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9163 9167 ins_pipe( ialu_reg_reg_long );
9164 9168 %}
9165 9169
9166 9170 // Or Long Register with Immediate
9167 9171 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9168 9172 match(Set dst (OrL dst src));
9169 9173 effect(KILL cr);
9170 9174 format %{ "OR $dst.lo,$src.lo\n\t"
9171 9175 "OR $dst.hi,$src.hi" %}
9172 9176 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */
9173 9177 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9174 9178 ins_pipe( ialu_reg_long );
9175 9179 %}
9176 9180
9177 9181 // Or Long Register with Memory
9178 9182 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9179 9183 match(Set dst (OrL dst (LoadL mem)));
9180 9184 effect(KILL cr);
9181 9185 ins_cost(125);
9182 9186 format %{ "OR $dst.lo,$mem\n\t"
9183 9187 "OR $dst.hi,$mem+4" %}
9184 9188 opcode(0x0B,0x0B);
9185 9189 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9186 9190 ins_pipe( ialu_reg_long_mem );
9187 9191 %}
9188 9192
9189 9193 // Xor Long Register with Register
9190 9194 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9191 9195 match(Set dst (XorL dst src));
9192 9196 effect(KILL cr);
9193 9197 format %{ "XOR $dst.lo,$src.lo\n\t"
9194 9198 "XOR $dst.hi,$src.hi" %}
9195 9199 opcode(0x33,0x33);
9196 9200 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9197 9201 ins_pipe( ialu_reg_reg_long );
9198 9202 %}
9199 9203
9200 9204 // Xor Long Register with Immediate -1
9201 9205 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9202 9206 match(Set dst (XorL dst imm));
9203 9207 format %{ "NOT $dst.lo\n\t"
9204 9208 "NOT $dst.hi" %}
9205 9209 ins_encode %{
9206 9210 __ notl($dst$$Register);
9207 9211 __ notl(HIGH_FROM_LOW($dst$$Register));
9208 9212 %}
9209 9213 ins_pipe( ialu_reg_long );
9210 9214 %}
9211 9215
9212 9216 // Xor Long Register with Immediate
9213 9217 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9214 9218 match(Set dst (XorL dst src));
9215 9219 effect(KILL cr);
9216 9220 format %{ "XOR $dst.lo,$src.lo\n\t"
9217 9221 "XOR $dst.hi,$src.hi" %}
9218 9222 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */
9219 9223 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9220 9224 ins_pipe( ialu_reg_long );
9221 9225 %}
9222 9226
9223 9227 // Xor Long Register with Memory
9224 9228 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9225 9229 match(Set dst (XorL dst (LoadL mem)));
9226 9230 effect(KILL cr);
9227 9231 ins_cost(125);
9228 9232 format %{ "XOR $dst.lo,$mem\n\t"
9229 9233 "XOR $dst.hi,$mem+4" %}
9230 9234 opcode(0x33,0x33);
9231 9235 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9232 9236 ins_pipe( ialu_reg_long_mem );
9233 9237 %}
9234 9238
9235 9239 // Shift Left Long by 1
9236 9240 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9237 9241 predicate(UseNewLongLShift);
9238 9242 match(Set dst (LShiftL dst cnt));
9239 9243 effect(KILL cr);
9240 9244 ins_cost(100);
9241 9245 format %{ "ADD $dst.lo,$dst.lo\n\t"
9242 9246 "ADC $dst.hi,$dst.hi" %}
9243 9247 ins_encode %{
9244 9248 __ addl($dst$$Register,$dst$$Register);
9245 9249 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9246 9250 %}
9247 9251 ins_pipe( ialu_reg_long );
9248 9252 %}
9249 9253
9250 9254 // Shift Left Long by 2
9251 9255 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9252 9256 predicate(UseNewLongLShift);
9253 9257 match(Set dst (LShiftL dst cnt));
9254 9258 effect(KILL cr);
9255 9259 ins_cost(100);
9256 9260 format %{ "ADD $dst.lo,$dst.lo\n\t"
9257 9261 "ADC $dst.hi,$dst.hi\n\t"
9258 9262 "ADD $dst.lo,$dst.lo\n\t"
9259 9263 "ADC $dst.hi,$dst.hi" %}
9260 9264 ins_encode %{
9261 9265 __ addl($dst$$Register,$dst$$Register);
9262 9266 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9263 9267 __ addl($dst$$Register,$dst$$Register);
9264 9268 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9265 9269 %}
9266 9270 ins_pipe( ialu_reg_long );
9267 9271 %}
9268 9272
9269 9273 // Shift Left Long by 3
9270 9274 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9271 9275 predicate(UseNewLongLShift);
9272 9276 match(Set dst (LShiftL dst cnt));
9273 9277 effect(KILL cr);
9274 9278 ins_cost(100);
9275 9279 format %{ "ADD $dst.lo,$dst.lo\n\t"
9276 9280 "ADC $dst.hi,$dst.hi\n\t"
9277 9281 "ADD $dst.lo,$dst.lo\n\t"
9278 9282 "ADC $dst.hi,$dst.hi\n\t"
9279 9283 "ADD $dst.lo,$dst.lo\n\t"
9280 9284 "ADC $dst.hi,$dst.hi" %}
9281 9285 ins_encode %{
9282 9286 __ addl($dst$$Register,$dst$$Register);
9283 9287 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9284 9288 __ addl($dst$$Register,$dst$$Register);
9285 9289 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9286 9290 __ addl($dst$$Register,$dst$$Register);
9287 9291 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9288 9292 %}
9289 9293 ins_pipe( ialu_reg_long );
9290 9294 %}
9291 9295
9292 9296 // Shift Left Long by 1-31
9293 9297 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9294 9298 match(Set dst (LShiftL dst cnt));
9295 9299 effect(KILL cr);
9296 9300 ins_cost(200);
9297 9301 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t"
9298 9302 "SHL $dst.lo,$cnt" %}
9299 9303 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */
9300 9304 ins_encode( move_long_small_shift(dst,cnt) );
9301 9305 ins_pipe( ialu_reg_long );
9302 9306 %}
9303 9307
9304 9308 // Shift Left Long by 32-63
9305 9309 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9306 9310 match(Set dst (LShiftL dst cnt));
9307 9311 effect(KILL cr);
9308 9312 ins_cost(300);
9309 9313 format %{ "MOV $dst.hi,$dst.lo\n"
9310 9314 "\tSHL $dst.hi,$cnt-32\n"
9311 9315 "\tXOR $dst.lo,$dst.lo" %}
9312 9316 opcode(0xC1, 0x4); /* C1 /4 ib */
9313 9317 ins_encode( move_long_big_shift_clr(dst,cnt) );
9314 9318 ins_pipe( ialu_reg_long );
9315 9319 %}
9316 9320
9317 9321 // Shift Left Long by variable
9318 9322 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9319 9323 match(Set dst (LShiftL dst shift));
9320 9324 effect(KILL cr);
9321 9325 ins_cost(500+200);
9322 9326 size(17);
9323 9327 format %{ "TEST $shift,32\n\t"
9324 9328 "JEQ,s small\n\t"
9325 9329 "MOV $dst.hi,$dst.lo\n\t"
9326 9330 "XOR $dst.lo,$dst.lo\n"
9327 9331 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t"
9328 9332 "SHL $dst.lo,$shift" %}
9329 9333 ins_encode( shift_left_long( dst, shift ) );
9330 9334 ins_pipe( pipe_slow );
9331 9335 %}
9332 9336
9333 9337 // Shift Right Long by 1-31
9334 9338 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9335 9339 match(Set dst (URShiftL dst cnt));
9336 9340 effect(KILL cr);
9337 9341 ins_cost(200);
9338 9342 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9339 9343 "SHR $dst.hi,$cnt" %}
9340 9344 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */
9341 9345 ins_encode( move_long_small_shift(dst,cnt) );
9342 9346 ins_pipe( ialu_reg_long );
9343 9347 %}
9344 9348
9345 9349 // Shift Right Long by 32-63
9346 9350 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9347 9351 match(Set dst (URShiftL dst cnt));
9348 9352 effect(KILL cr);
9349 9353 ins_cost(300);
9350 9354 format %{ "MOV $dst.lo,$dst.hi\n"
9351 9355 "\tSHR $dst.lo,$cnt-32\n"
9352 9356 "\tXOR $dst.hi,$dst.hi" %}
9353 9357 opcode(0xC1, 0x5); /* C1 /5 ib */
9354 9358 ins_encode( move_long_big_shift_clr(dst,cnt) );
9355 9359 ins_pipe( ialu_reg_long );
9356 9360 %}
9357 9361
9358 9362 // Shift Right Long by variable
9359 9363 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9360 9364 match(Set dst (URShiftL dst shift));
9361 9365 effect(KILL cr);
9362 9366 ins_cost(600);
9363 9367 size(17);
9364 9368 format %{ "TEST $shift,32\n\t"
9365 9369 "JEQ,s small\n\t"
9366 9370 "MOV $dst.lo,$dst.hi\n\t"
9367 9371 "XOR $dst.hi,$dst.hi\n"
9368 9372 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9369 9373 "SHR $dst.hi,$shift" %}
9370 9374 ins_encode( shift_right_long( dst, shift ) );
9371 9375 ins_pipe( pipe_slow );
9372 9376 %}
9373 9377
9374 9378 // Shift Right Long by 1-31
9375 9379 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9376 9380 match(Set dst (RShiftL dst cnt));
9377 9381 effect(KILL cr);
9378 9382 ins_cost(200);
9379 9383 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9380 9384 "SAR $dst.hi,$cnt" %}
9381 9385 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */
9382 9386 ins_encode( move_long_small_shift(dst,cnt) );
9383 9387 ins_pipe( ialu_reg_long );
9384 9388 %}
9385 9389
9386 9390 // Shift Right Long by 32-63
9387 9391 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9388 9392 match(Set dst (RShiftL dst cnt));
9389 9393 effect(KILL cr);
9390 9394 ins_cost(300);
9391 9395 format %{ "MOV $dst.lo,$dst.hi\n"
9392 9396 "\tSAR $dst.lo,$cnt-32\n"
9393 9397 "\tSAR $dst.hi,31" %}
9394 9398 opcode(0xC1, 0x7); /* C1 /7 ib */
9395 9399 ins_encode( move_long_big_shift_sign(dst,cnt) );
9396 9400 ins_pipe( ialu_reg_long );
9397 9401 %}
9398 9402
9399 9403 // Shift Right arithmetic Long by variable
9400 9404 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9401 9405 match(Set dst (RShiftL dst shift));
9402 9406 effect(KILL cr);
9403 9407 ins_cost(600);
9404 9408 size(18);
9405 9409 format %{ "TEST $shift,32\n\t"
9406 9410 "JEQ,s small\n\t"
9407 9411 "MOV $dst.lo,$dst.hi\n\t"
9408 9412 "SAR $dst.hi,31\n"
9409 9413 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9410 9414 "SAR $dst.hi,$shift" %}
9411 9415 ins_encode( shift_right_arith_long( dst, shift ) );
9412 9416 ins_pipe( pipe_slow );
9413 9417 %}
9414 9418
9415 9419
9416 9420 //----------Double Instructions------------------------------------------------
9417 9421 // Double Math
9418 9422
9419 9423 // Compare & branch
9420 9424
9421 9425 // P6 version of float compare, sets condition codes in EFLAGS
9422 9426 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9423 9427 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9424 9428 match(Set cr (CmpD src1 src2));
9425 9429 effect(KILL rax);
9426 9430 ins_cost(150);
9427 9431 format %{ "FLD $src1\n\t"
9428 9432 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9429 9433 "JNP exit\n\t"
9430 9434 "MOV ah,1 // saw a NaN, set CF\n\t"
9431 9435 "SAHF\n"
9432 9436 "exit:\tNOP // avoid branch to branch" %}
9433 9437 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9434 9438 ins_encode( Push_Reg_DPR(src1),
9435 9439 OpcP, RegOpc(src2),
9436 9440 cmpF_P6_fixup );
9437 9441 ins_pipe( pipe_slow );
9438 9442 %}
9439 9443
9440 9444 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9441 9445 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9442 9446 match(Set cr (CmpD src1 src2));
9443 9447 ins_cost(150);
9444 9448 format %{ "FLD $src1\n\t"
9445 9449 "FUCOMIP ST,$src2 // P6 instruction" %}
9446 9450 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9447 9451 ins_encode( Push_Reg_DPR(src1),
9448 9452 OpcP, RegOpc(src2));
9449 9453 ins_pipe( pipe_slow );
9450 9454 %}
9451 9455
9452 9456 // Compare & branch
9453 9457 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9454 9458 predicate(UseSSE<=1);
9455 9459 match(Set cr (CmpD src1 src2));
9456 9460 effect(KILL rax);
9457 9461 ins_cost(200);
9458 9462 format %{ "FLD $src1\n\t"
9459 9463 "FCOMp $src2\n\t"
9460 9464 "FNSTSW AX\n\t"
9461 9465 "TEST AX,0x400\n\t"
9462 9466 "JZ,s flags\n\t"
9463 9467 "MOV AH,1\t# unordered treat as LT\n"
9464 9468 "flags:\tSAHF" %}
9465 9469 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9466 9470 ins_encode( Push_Reg_DPR(src1),
9467 9471 OpcP, RegOpc(src2),
9468 9472 fpu_flags);
9469 9473 ins_pipe( pipe_slow );
9470 9474 %}
9471 9475
9472 9476 // Compare vs zero into -1,0,1
9473 9477 instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9474 9478 predicate(UseSSE<=1);
9475 9479 match(Set dst (CmpD3 src1 zero));
9476 9480 effect(KILL cr, KILL rax);
9477 9481 ins_cost(280);
9478 9482 format %{ "FTSTD $dst,$src1" %}
9479 9483 opcode(0xE4, 0xD9);
9480 9484 ins_encode( Push_Reg_DPR(src1),
9481 9485 OpcS, OpcP, PopFPU,
9482 9486 CmpF_Result(dst));
9483 9487 ins_pipe( pipe_slow );
9484 9488 %}
9485 9489
9486 9490 // Compare into -1,0,1
9487 9491 instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9488 9492 predicate(UseSSE<=1);
9489 9493 match(Set dst (CmpD3 src1 src2));
9490 9494 effect(KILL cr, KILL rax);
9491 9495 ins_cost(300);
9492 9496 format %{ "FCMPD $dst,$src1,$src2" %}
9493 9497 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9494 9498 ins_encode( Push_Reg_DPR(src1),
9495 9499 OpcP, RegOpc(src2),
9496 9500 CmpF_Result(dst));
9497 9501 ins_pipe( pipe_slow );
9498 9502 %}
9499 9503
9500 9504 // float compare and set condition codes in EFLAGS by XMM regs
9501 9505 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9502 9506 predicate(UseSSE>=2);
9503 9507 match(Set cr (CmpD src1 src2));
9504 9508 ins_cost(145);
9505 9509 format %{ "UCOMISD $src1,$src2\n\t"
9506 9510 "JNP,s exit\n\t"
9507 9511 "PUSHF\t# saw NaN, set CF\n\t"
9508 9512 "AND [rsp], #0xffffff2b\n\t"
9509 9513 "POPF\n"
9510 9514 "exit:" %}
9511 9515 ins_encode %{
9512 9516 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9513 9517 emit_cmpfp_fixup(_masm);
9514 9518 %}
9515 9519 ins_pipe( pipe_slow );
9516 9520 %}
9517 9521
9518 9522 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9519 9523 predicate(UseSSE>=2);
9520 9524 match(Set cr (CmpD src1 src2));
9521 9525 ins_cost(100);
9522 9526 format %{ "UCOMISD $src1,$src2" %}
9523 9527 ins_encode %{
9524 9528 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9525 9529 %}
9526 9530 ins_pipe( pipe_slow );
9527 9531 %}
9528 9532
9529 9533 // float compare and set condition codes in EFLAGS by XMM regs
9530 9534 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9531 9535 predicate(UseSSE>=2);
9532 9536 match(Set cr (CmpD src1 (LoadD src2)));
9533 9537 ins_cost(145);
9534 9538 format %{ "UCOMISD $src1,$src2\n\t"
9535 9539 "JNP,s exit\n\t"
9536 9540 "PUSHF\t# saw NaN, set CF\n\t"
9537 9541 "AND [rsp], #0xffffff2b\n\t"
9538 9542 "POPF\n"
9539 9543 "exit:" %}
9540 9544 ins_encode %{
9541 9545 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9542 9546 emit_cmpfp_fixup(_masm);
9543 9547 %}
9544 9548 ins_pipe( pipe_slow );
9545 9549 %}
9546 9550
9547 9551 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9548 9552 predicate(UseSSE>=2);
9549 9553 match(Set cr (CmpD src1 (LoadD src2)));
9550 9554 ins_cost(100);
9551 9555 format %{ "UCOMISD $src1,$src2" %}
9552 9556 ins_encode %{
9553 9557 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9554 9558 %}
9555 9559 ins_pipe( pipe_slow );
9556 9560 %}
9557 9561
9558 9562 // Compare into -1,0,1 in XMM
9559 9563 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9560 9564 predicate(UseSSE>=2);
9561 9565 match(Set dst (CmpD3 src1 src2));
9562 9566 effect(KILL cr);
9563 9567 ins_cost(255);
9564 9568 format %{ "UCOMISD $src1, $src2\n\t"
9565 9569 "MOV $dst, #-1\n\t"
9566 9570 "JP,s done\n\t"
9567 9571 "JB,s done\n\t"
9568 9572 "SETNE $dst\n\t"
9569 9573 "MOVZB $dst, $dst\n"
9570 9574 "done:" %}
9571 9575 ins_encode %{
9572 9576 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9573 9577 emit_cmpfp3(_masm, $dst$$Register);
9574 9578 %}
9575 9579 ins_pipe( pipe_slow );
9576 9580 %}
9577 9581
9578 9582 // Compare into -1,0,1 in XMM and memory
9579 9583 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9580 9584 predicate(UseSSE>=2);
9581 9585 match(Set dst (CmpD3 src1 (LoadD src2)));
9582 9586 effect(KILL cr);
9583 9587 ins_cost(275);
9584 9588 format %{ "UCOMISD $src1, $src2\n\t"
9585 9589 "MOV $dst, #-1\n\t"
9586 9590 "JP,s done\n\t"
9587 9591 "JB,s done\n\t"
9588 9592 "SETNE $dst\n\t"
9589 9593 "MOVZB $dst, $dst\n"
9590 9594 "done:" %}
9591 9595 ins_encode %{
9592 9596 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9593 9597 emit_cmpfp3(_masm, $dst$$Register);
9594 9598 %}
9595 9599 ins_pipe( pipe_slow );
9596 9600 %}
9597 9601
9598 9602
9599 9603 instruct subDPR_reg(regDPR dst, regDPR src) %{
9600 9604 predicate (UseSSE <=1);
9601 9605 match(Set dst (SubD dst src));
9602 9606
9603 9607 format %{ "FLD $src\n\t"
9604 9608 "DSUBp $dst,ST" %}
9605 9609 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9606 9610 ins_cost(150);
9607 9611 ins_encode( Push_Reg_DPR(src),
9608 9612 OpcP, RegOpc(dst) );
9609 9613 ins_pipe( fpu_reg_reg );
9610 9614 %}
9611 9615
9612 9616 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9613 9617 predicate (UseSSE <=1);
9614 9618 match(Set dst (RoundDouble (SubD src1 src2)));
9615 9619 ins_cost(250);
9616 9620
9617 9621 format %{ "FLD $src2\n\t"
9618 9622 "DSUB ST,$src1\n\t"
9619 9623 "FSTP_D $dst\t# D-round" %}
9620 9624 opcode(0xD8, 0x5);
9621 9625 ins_encode( Push_Reg_DPR(src2),
9622 9626 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9623 9627 ins_pipe( fpu_mem_reg_reg );
9624 9628 %}
9625 9629
9626 9630
9627 9631 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9628 9632 predicate (UseSSE <=1);
9629 9633 match(Set dst (SubD dst (LoadD src)));
9630 9634 ins_cost(150);
9631 9635
9632 9636 format %{ "FLD $src\n\t"
9633 9637 "DSUBp $dst,ST" %}
9634 9638 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9635 9639 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9636 9640 OpcP, RegOpc(dst) );
9637 9641 ins_pipe( fpu_reg_mem );
9638 9642 %}
9639 9643
9640 9644 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9641 9645 predicate (UseSSE<=1);
9642 9646 match(Set dst (AbsD src));
9643 9647 ins_cost(100);
9644 9648 format %{ "FABS" %}
9645 9649 opcode(0xE1, 0xD9);
9646 9650 ins_encode( OpcS, OpcP );
9647 9651 ins_pipe( fpu_reg_reg );
9648 9652 %}
9649 9653
9650 9654 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9651 9655 predicate(UseSSE<=1);
9652 9656 match(Set dst (NegD src));
9653 9657 ins_cost(100);
9654 9658 format %{ "FCHS" %}
9655 9659 opcode(0xE0, 0xD9);
9656 9660 ins_encode( OpcS, OpcP );
9657 9661 ins_pipe( fpu_reg_reg );
9658 9662 %}
9659 9663
9660 9664 instruct addDPR_reg(regDPR dst, regDPR src) %{
9661 9665 predicate(UseSSE<=1);
9662 9666 match(Set dst (AddD dst src));
9663 9667 format %{ "FLD $src\n\t"
9664 9668 "DADD $dst,ST" %}
9665 9669 size(4);
9666 9670 ins_cost(150);
9667 9671 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9668 9672 ins_encode( Push_Reg_DPR(src),
9669 9673 OpcP, RegOpc(dst) );
9670 9674 ins_pipe( fpu_reg_reg );
9671 9675 %}
9672 9676
9673 9677
9674 9678 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9675 9679 predicate(UseSSE<=1);
9676 9680 match(Set dst (RoundDouble (AddD src1 src2)));
9677 9681 ins_cost(250);
9678 9682
9679 9683 format %{ "FLD $src2\n\t"
9680 9684 "DADD ST,$src1\n\t"
9681 9685 "FSTP_D $dst\t# D-round" %}
9682 9686 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9683 9687 ins_encode( Push_Reg_DPR(src2),
9684 9688 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9685 9689 ins_pipe( fpu_mem_reg_reg );
9686 9690 %}
9687 9691
9688 9692
9689 9693 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9690 9694 predicate(UseSSE<=1);
9691 9695 match(Set dst (AddD dst (LoadD src)));
9692 9696 ins_cost(150);
9693 9697
9694 9698 format %{ "FLD $src\n\t"
9695 9699 "DADDp $dst,ST" %}
9696 9700 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9697 9701 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9698 9702 OpcP, RegOpc(dst) );
9699 9703 ins_pipe( fpu_reg_mem );
9700 9704 %}
9701 9705
9702 9706 // add-to-memory
9703 9707 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9704 9708 predicate(UseSSE<=1);
9705 9709 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9706 9710 ins_cost(150);
9707 9711
9708 9712 format %{ "FLD_D $dst\n\t"
9709 9713 "DADD ST,$src\n\t"
9710 9714 "FST_D $dst" %}
9711 9715 opcode(0xDD, 0x0);
9712 9716 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9713 9717 Opcode(0xD8), RegOpc(src),
9714 9718 set_instruction_start,
9715 9719 Opcode(0xDD), RMopc_Mem(0x03,dst) );
9716 9720 ins_pipe( fpu_reg_mem );
9717 9721 %}
9718 9722
9719 9723 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9720 9724 predicate(UseSSE<=1);
9721 9725 match(Set dst (AddD dst con));
9722 9726 ins_cost(125);
9723 9727 format %{ "FLD1\n\t"
9724 9728 "DADDp $dst,ST" %}
9725 9729 ins_encode %{
9726 9730 __ fld1();
9727 9731 __ faddp($dst$$reg);
9728 9732 %}
9729 9733 ins_pipe(fpu_reg);
9730 9734 %}
9731 9735
9732 9736 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9733 9737 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9734 9738 match(Set dst (AddD dst con));
9735 9739 ins_cost(200);
9736 9740 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9737 9741 "DADDp $dst,ST" %}
9738 9742 ins_encode %{
9739 9743 __ fld_d($constantaddress($con));
9740 9744 __ faddp($dst$$reg);
9741 9745 %}
9742 9746 ins_pipe(fpu_reg_mem);
9743 9747 %}
9744 9748
9745 9749 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9746 9750 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9747 9751 match(Set dst (RoundDouble (AddD src con)));
9748 9752 ins_cost(200);
9749 9753 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9750 9754 "DADD ST,$src\n\t"
9751 9755 "FSTP_D $dst\t# D-round" %}
9752 9756 ins_encode %{
9753 9757 __ fld_d($constantaddress($con));
9754 9758 __ fadd($src$$reg);
9755 9759 __ fstp_d(Address(rsp, $dst$$disp));
9756 9760 %}
9757 9761 ins_pipe(fpu_mem_reg_con);
9758 9762 %}
9759 9763
9760 9764 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9761 9765 predicate(UseSSE<=1);
9762 9766 match(Set dst (MulD dst src));
9763 9767 format %{ "FLD $src\n\t"
9764 9768 "DMULp $dst,ST" %}
9765 9769 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9766 9770 ins_cost(150);
9767 9771 ins_encode( Push_Reg_DPR(src),
9768 9772 OpcP, RegOpc(dst) );
9769 9773 ins_pipe( fpu_reg_reg );
9770 9774 %}
9771 9775
9772 9776 // Strict FP instruction biases argument before multiply then
9773 9777 // biases result to avoid double rounding of subnormals.
9774 9778 //
9775 9779 // scale arg1 by multiplying arg1 by 2^(-15360)
9776 9780 // load arg2
9777 9781 // multiply scaled arg1 by arg2
9778 9782 // rescale product by 2^(15360)
9779 9783 //
9780 9784 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9781 9785 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9782 9786 match(Set dst (MulD dst src));
9783 9787 ins_cost(1); // Select this instruction for all strict FP double multiplies
9784 9788
9785 9789 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
9786 9790 "DMULp $dst,ST\n\t"
9787 9791 "FLD $src\n\t"
9788 9792 "DMULp $dst,ST\n\t"
9789 9793 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
9790 9794 "DMULp $dst,ST\n\t" %}
9791 9795 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9792 9796 ins_encode( strictfp_bias1(dst),
9793 9797 Push_Reg_DPR(src),
9794 9798 OpcP, RegOpc(dst),
9795 9799 strictfp_bias2(dst) );
9796 9800 ins_pipe( fpu_reg_reg );
9797 9801 %}
9798 9802
9799 9803 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9800 9804 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9801 9805 match(Set dst (MulD dst con));
9802 9806 ins_cost(200);
9803 9807 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9804 9808 "DMULp $dst,ST" %}
9805 9809 ins_encode %{
9806 9810 __ fld_d($constantaddress($con));
9807 9811 __ fmulp($dst$$reg);
9808 9812 %}
9809 9813 ins_pipe(fpu_reg_mem);
9810 9814 %}
9811 9815
9812 9816
9813 9817 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9814 9818 predicate( UseSSE<=1 );
9815 9819 match(Set dst (MulD dst (LoadD src)));
9816 9820 ins_cost(200);
9817 9821 format %{ "FLD_D $src\n\t"
9818 9822 "DMULp $dst,ST" %}
9819 9823 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
9820 9824 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9821 9825 OpcP, RegOpc(dst) );
9822 9826 ins_pipe( fpu_reg_mem );
9823 9827 %}
9824 9828
9825 9829 //
9826 9830 // Cisc-alternate to reg-reg multiply
9827 9831 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9828 9832 predicate( UseSSE<=1 );
9829 9833 match(Set dst (MulD src (LoadD mem)));
9830 9834 ins_cost(250);
9831 9835 format %{ "FLD_D $mem\n\t"
9832 9836 "DMUL ST,$src\n\t"
9833 9837 "FSTP_D $dst" %}
9834 9838 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
9835 9839 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9836 9840 OpcReg_FPR(src),
9837 9841 Pop_Reg_DPR(dst) );
9838 9842 ins_pipe( fpu_reg_reg_mem );
9839 9843 %}
9840 9844
9841 9845
9842 9846 // MACRO3 -- addDPR a mulDPR
9843 9847 // This instruction is a '2-address' instruction in that the result goes
9844 9848 // back to src2. This eliminates a move from the macro; possibly the
9845 9849 // register allocator will have to add it back (and maybe not).
9846 9850 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9847 9851 predicate( UseSSE<=1 );
9848 9852 match(Set src2 (AddD (MulD src0 src1) src2));
9849 9853 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9850 9854 "DMUL ST,$src1\n\t"
9851 9855 "DADDp $src2,ST" %}
9852 9856 ins_cost(250);
9853 9857 opcode(0xDD); /* LoadD DD /0 */
9854 9858 ins_encode( Push_Reg_FPR(src0),
9855 9859 FMul_ST_reg(src1),
9856 9860 FAddP_reg_ST(src2) );
9857 9861 ins_pipe( fpu_reg_reg_reg );
9858 9862 %}
9859 9863
9860 9864
9861 9865 // MACRO3 -- subDPR a mulDPR
9862 9866 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9863 9867 predicate( UseSSE<=1 );
9864 9868 match(Set src2 (SubD (MulD src0 src1) src2));
9865 9869 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9866 9870 "DMUL ST,$src1\n\t"
9867 9871 "DSUBRp $src2,ST" %}
9868 9872 ins_cost(250);
9869 9873 ins_encode( Push_Reg_FPR(src0),
9870 9874 FMul_ST_reg(src1),
9871 9875 Opcode(0xDE), Opc_plus(0xE0,src2));
9872 9876 ins_pipe( fpu_reg_reg_reg );
9873 9877 %}
9874 9878
9875 9879
9876 9880 instruct divDPR_reg(regDPR dst, regDPR src) %{
9877 9881 predicate( UseSSE<=1 );
9878 9882 match(Set dst (DivD dst src));
9879 9883
9880 9884 format %{ "FLD $src\n\t"
9881 9885 "FDIVp $dst,ST" %}
9882 9886 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9883 9887 ins_cost(150);
9884 9888 ins_encode( Push_Reg_DPR(src),
9885 9889 OpcP, RegOpc(dst) );
9886 9890 ins_pipe( fpu_reg_reg );
9887 9891 %}
9888 9892
9889 9893 // Strict FP instruction biases argument before division then
9890 9894 // biases result, to avoid double rounding of subnormals.
9891 9895 //
9892 9896 // scale dividend by multiplying dividend by 2^(-15360)
9893 9897 // load divisor
9894 9898 // divide scaled dividend by divisor
9895 9899 // rescale quotient by 2^(15360)
9896 9900 //
9897 9901 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9898 9902 predicate (UseSSE<=1);
9899 9903 match(Set dst (DivD dst src));
9900 9904 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9901 9905 ins_cost(01);
9902 9906
9903 9907 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
9904 9908 "DMULp $dst,ST\n\t"
9905 9909 "FLD $src\n\t"
9906 9910 "FDIVp $dst,ST\n\t"
9907 9911 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
9908 9912 "DMULp $dst,ST\n\t" %}
9909 9913 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9910 9914 ins_encode( strictfp_bias1(dst),
9911 9915 Push_Reg_DPR(src),
9912 9916 OpcP, RegOpc(dst),
9913 9917 strictfp_bias2(dst) );
9914 9918 ins_pipe( fpu_reg_reg );
9915 9919 %}
9916 9920
9917 9921 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9918 9922 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9919 9923 match(Set dst (RoundDouble (DivD src1 src2)));
9920 9924
9921 9925 format %{ "FLD $src1\n\t"
9922 9926 "FDIV ST,$src2\n\t"
9923 9927 "FSTP_D $dst\t# D-round" %}
9924 9928 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9925 9929 ins_encode( Push_Reg_DPR(src1),
9926 9930 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9927 9931 ins_pipe( fpu_mem_reg_reg );
9928 9932 %}
9929 9933
9930 9934
9931 9935 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9932 9936 predicate(UseSSE<=1);
9933 9937 match(Set dst (ModD dst src));
9934 9938 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9935 9939
9936 9940 format %{ "DMOD $dst,$src" %}
9937 9941 ins_cost(250);
9938 9942 ins_encode(Push_Reg_Mod_DPR(dst, src),
9939 9943 emitModDPR(),
9940 9944 Push_Result_Mod_DPR(src),
9941 9945 Pop_Reg_DPR(dst));
9942 9946 ins_pipe( pipe_slow );
9943 9947 %}
9944 9948
9945 9949 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9946 9950 predicate(UseSSE>=2);
9947 9951 match(Set dst (ModD src0 src1));
9948 9952 effect(KILL rax, KILL cr);
9949 9953
9950 9954 format %{ "SUB ESP,8\t # DMOD\n"
9951 9955 "\tMOVSD [ESP+0],$src1\n"
9952 9956 "\tFLD_D [ESP+0]\n"
9953 9957 "\tMOVSD [ESP+0],$src0\n"
9954 9958 "\tFLD_D [ESP+0]\n"
9955 9959 "loop:\tFPREM\n"
9956 9960 "\tFWAIT\n"
9957 9961 "\tFNSTSW AX\n"
9958 9962 "\tSAHF\n"
9959 9963 "\tJP loop\n"
9960 9964 "\tFSTP_D [ESP+0]\n"
9961 9965 "\tMOVSD $dst,[ESP+0]\n"
9962 9966 "\tADD ESP,8\n"
9963 9967 "\tFSTP ST0\t # Restore FPU Stack"
9964 9968 %}
9965 9969 ins_cost(250);
9966 9970 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9967 9971 ins_pipe( pipe_slow );
9968 9972 %}
9969 9973
9970 9974 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9971 9975 predicate (UseSSE<=1);
9972 9976 match(Set dst (SinD src));
9973 9977 ins_cost(1800);
9974 9978 format %{ "DSIN $dst" %}
9975 9979 opcode(0xD9, 0xFE);
9976 9980 ins_encode( OpcP, OpcS );
9977 9981 ins_pipe( pipe_slow );
9978 9982 %}
9979 9983
9980 9984 instruct sinD_reg(regD dst, eFlagsReg cr) %{
9981 9985 predicate (UseSSE>=2);
9982 9986 match(Set dst (SinD dst));
9983 9987 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9984 9988 ins_cost(1800);
9985 9989 format %{ "DSIN $dst" %}
9986 9990 opcode(0xD9, 0xFE);
9987 9991 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9988 9992 ins_pipe( pipe_slow );
9989 9993 %}
9990 9994
9991 9995 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9992 9996 predicate (UseSSE<=1);
9993 9997 match(Set dst (CosD src));
9994 9998 ins_cost(1800);
9995 9999 format %{ "DCOS $dst" %}
9996 10000 opcode(0xD9, 0xFF);
9997 10001 ins_encode( OpcP, OpcS );
9998 10002 ins_pipe( pipe_slow );
9999 10003 %}
10000 10004
10001 10005 instruct cosD_reg(regD dst, eFlagsReg cr) %{
10002 10006 predicate (UseSSE>=2);
10003 10007 match(Set dst (CosD dst));
10004 10008 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10005 10009 ins_cost(1800);
10006 10010 format %{ "DCOS $dst" %}
10007 10011 opcode(0xD9, 0xFF);
10008 10012 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
10009 10013 ins_pipe( pipe_slow );
10010 10014 %}
10011 10015
10012 10016 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
10013 10017 predicate (UseSSE<=1);
10014 10018 match(Set dst(TanD src));
10015 10019 format %{ "DTAN $dst" %}
10016 10020 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
10017 10021 Opcode(0xDD), Opcode(0xD8)); // fstp st
10018 10022 ins_pipe( pipe_slow );
10019 10023 %}
10020 10024
10021 10025 instruct tanD_reg(regD dst, eFlagsReg cr) %{
10022 10026 predicate (UseSSE>=2);
10023 10027 match(Set dst(TanD dst));
10024 10028 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10025 10029 format %{ "DTAN $dst" %}
10026 10030 ins_encode( Push_SrcD(dst),
10027 10031 Opcode(0xD9), Opcode(0xF2), // fptan
10028 10032 Opcode(0xDD), Opcode(0xD8), // fstp st
10029 10033 Push_ResultD(dst) );
10030 10034 ins_pipe( pipe_slow );
10031 10035 %}
10032 10036
10033 10037 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10034 10038 predicate (UseSSE<=1);
10035 10039 match(Set dst(AtanD dst src));
10036 10040 format %{ "DATA $dst,$src" %}
10037 10041 opcode(0xD9, 0xF3);
10038 10042 ins_encode( Push_Reg_DPR(src),
10039 10043 OpcP, OpcS, RegOpc(dst) );
10040 10044 ins_pipe( pipe_slow );
10041 10045 %}
10042 10046
10043 10047 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10044 10048 predicate (UseSSE>=2);
10045 10049 match(Set dst(AtanD dst src));
10046 10050 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10047 10051 format %{ "DATA $dst,$src" %}
10048 10052 opcode(0xD9, 0xF3);
10049 10053 ins_encode( Push_SrcD(src),
10050 10054 OpcP, OpcS, Push_ResultD(dst) );
10051 10055 ins_pipe( pipe_slow );
10052 10056 %}
10053 10057
10054 10058 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10055 10059 predicate (UseSSE<=1);
10056 10060 match(Set dst (SqrtD src));
10057 10061 format %{ "DSQRT $dst,$src" %}
10058 10062 opcode(0xFA, 0xD9);
10059 10063 ins_encode( Push_Reg_DPR(src),
10060 10064 OpcS, OpcP, Pop_Reg_DPR(dst) );
10061 10065 ins_pipe( pipe_slow );
10062 10066 %}
10063 10067
10064 10068 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
10065 10069 predicate (UseSSE<=1);
10066 10070 match(Set Y (PowD X Y)); // Raise X to the Yth power
10067 10071 effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
10068 10072 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %}
10069 10073 ins_encode %{
10070 10074 __ subptr(rsp, 8);
10071 10075 __ fld_s($X$$reg - 1);
10072 10076 __ fast_pow();
10073 10077 __ addptr(rsp, 8);
10074 10078 %}
10075 10079 ins_pipe( pipe_slow );
10076 10080 %}
10077 10081
10078 10082 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
10079 10083 predicate (UseSSE>=2);
10080 10084 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
10081 10085 effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
10082 10086 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %}
10083 10087 ins_encode %{
10084 10088 __ subptr(rsp, 8);
10085 10089 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
10086 10090 __ fld_d(Address(rsp, 0));
10087 10091 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
10088 10092 __ fld_d(Address(rsp, 0));
10089 10093 __ fast_pow();
10090 10094 __ fstp_d(Address(rsp, 0));
10091 10095 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
10092 10096 __ addptr(rsp, 8);
10093 10097 %}
10094 10098 ins_pipe( pipe_slow );
10095 10099 %}
10096 10100
10097 10101
10098 10102 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
10099 10103 predicate (UseSSE<=1);
10100 10104 match(Set dpr1 (ExpD dpr1));
10101 10105 effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
10102 10106 format %{ "fast_exp $dpr1 -> $dpr1 // KILL $rax, $rcx, $rdx" %}
10103 10107 ins_encode %{
10104 10108 __ fast_exp();
10105 10109 %}
10106 10110 ins_pipe( pipe_slow );
10107 10111 %}
10108 10112
10109 10113 instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
10110 10114 predicate (UseSSE>=2);
10111 10115 match(Set dst (ExpD src));
10112 10116 effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
10113 10117 format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %}
10114 10118 ins_encode %{
10115 10119 __ subptr(rsp, 8);
10116 10120 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10117 10121 __ fld_d(Address(rsp, 0));
10118 10122 __ fast_exp();
10119 10123 __ fstp_d(Address(rsp, 0));
10120 10124 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
10121 10125 __ addptr(rsp, 8);
10122 10126 %}
10123 10127 ins_pipe( pipe_slow );
10124 10128 %}
10125 10129
10126 10130 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
10127 10131 predicate (UseSSE<=1);
10128 10132 // The source Double operand on FPU stack
10129 10133 match(Set dst (Log10D src));
10130 10134 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10131 10135 // fxch ; swap ST(0) with ST(1)
10132 10136 // fyl2x ; compute log_10(2) * log_2(x)
10133 10137 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10134 10138 "FXCH \n\t"
10135 10139 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10136 10140 %}
10137 10141 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10138 10142 Opcode(0xD9), Opcode(0xC9), // fxch
10139 10143 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10140 10144
10141 10145 ins_pipe( pipe_slow );
10142 10146 %}
10143 10147
10144 10148 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
10145 10149 predicate (UseSSE>=2);
10146 10150 effect(KILL cr);
10147 10151 match(Set dst (Log10D src));
10148 10152 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10149 10153 // fyl2x ; compute log_10(2) * log_2(x)
10150 10154 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10151 10155 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10152 10156 %}
10153 10157 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10154 10158 Push_SrcD(src),
10155 10159 Opcode(0xD9), Opcode(0xF1), // fyl2x
10156 10160 Push_ResultD(dst));
10157 10161
10158 10162 ins_pipe( pipe_slow );
10159 10163 %}
10160 10164
10161 10165 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
10162 10166 predicate (UseSSE<=1);
10163 10167 // The source Double operand on FPU stack
10164 10168 match(Set dst (LogD src));
10165 10169 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10166 10170 // fxch ; swap ST(0) with ST(1)
10167 10171 // fyl2x ; compute log_e(2) * log_2(x)
10168 10172 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10169 10173 "FXCH \n\t"
10170 10174 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10171 10175 %}
10172 10176 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10173 10177 Opcode(0xD9), Opcode(0xC9), // fxch
10174 10178 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10175 10179
10176 10180 ins_pipe( pipe_slow );
10177 10181 %}
10178 10182
10179 10183 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
10180 10184 predicate (UseSSE>=2);
10181 10185 effect(KILL cr);
10182 10186 // The source and result Double operands in XMM registers
10183 10187 match(Set dst (LogD src));
10184 10188 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10185 10189 // fyl2x ; compute log_e(2) * log_2(x)
10186 10190 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10187 10191 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10188 10192 %}
10189 10193 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10190 10194 Push_SrcD(src),
10191 10195 Opcode(0xD9), Opcode(0xF1), // fyl2x
10192 10196 Push_ResultD(dst));
10193 10197 ins_pipe( pipe_slow );
10194 10198 %}
10195 10199
10196 10200 //-------------Float Instructions-------------------------------
10197 10201 // Float Math
10198 10202
10199 10203 // Code for float compare:
10200 10204 // fcompp();
10201 10205 // fwait(); fnstsw_ax();
10202 10206 // sahf();
10203 10207 // movl(dst, unordered_result);
10204 10208 // jcc(Assembler::parity, exit);
10205 10209 // movl(dst, less_result);
10206 10210 // jcc(Assembler::below, exit);
10207 10211 // movl(dst, equal_result);
10208 10212 // jcc(Assembler::equal, exit);
10209 10213 // movl(dst, greater_result);
10210 10214 // exit:
10211 10215
10212 10216 // P6 version of float compare, sets condition codes in EFLAGS
10213 10217 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10214 10218 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10215 10219 match(Set cr (CmpF src1 src2));
10216 10220 effect(KILL rax);
10217 10221 ins_cost(150);
10218 10222 format %{ "FLD $src1\n\t"
10219 10223 "FUCOMIP ST,$src2 // P6 instruction\n\t"
10220 10224 "JNP exit\n\t"
10221 10225 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
10222 10226 "SAHF\n"
10223 10227 "exit:\tNOP // avoid branch to branch" %}
10224 10228 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10225 10229 ins_encode( Push_Reg_DPR(src1),
10226 10230 OpcP, RegOpc(src2),
10227 10231 cmpF_P6_fixup );
10228 10232 ins_pipe( pipe_slow );
10229 10233 %}
10230 10234
10231 10235 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10232 10236 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10233 10237 match(Set cr (CmpF src1 src2));
10234 10238 ins_cost(100);
10235 10239 format %{ "FLD $src1\n\t"
10236 10240 "FUCOMIP ST,$src2 // P6 instruction" %}
10237 10241 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10238 10242 ins_encode( Push_Reg_DPR(src1),
10239 10243 OpcP, RegOpc(src2));
10240 10244 ins_pipe( pipe_slow );
10241 10245 %}
10242 10246
10243 10247
10244 10248 // Compare & branch
10245 10249 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10246 10250 predicate(UseSSE == 0);
10247 10251 match(Set cr (CmpF src1 src2));
10248 10252 effect(KILL rax);
10249 10253 ins_cost(200);
10250 10254 format %{ "FLD $src1\n\t"
10251 10255 "FCOMp $src2\n\t"
10252 10256 "FNSTSW AX\n\t"
10253 10257 "TEST AX,0x400\n\t"
10254 10258 "JZ,s flags\n\t"
10255 10259 "MOV AH,1\t# unordered treat as LT\n"
10256 10260 "flags:\tSAHF" %}
10257 10261 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10258 10262 ins_encode( Push_Reg_DPR(src1),
10259 10263 OpcP, RegOpc(src2),
10260 10264 fpu_flags);
10261 10265 ins_pipe( pipe_slow );
10262 10266 %}
10263 10267
10264 10268 // Compare vs zero into -1,0,1
10265 10269 instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10266 10270 predicate(UseSSE == 0);
10267 10271 match(Set dst (CmpF3 src1 zero));
10268 10272 effect(KILL cr, KILL rax);
10269 10273 ins_cost(280);
10270 10274 format %{ "FTSTF $dst,$src1" %}
10271 10275 opcode(0xE4, 0xD9);
10272 10276 ins_encode( Push_Reg_DPR(src1),
10273 10277 OpcS, OpcP, PopFPU,
10274 10278 CmpF_Result(dst));
10275 10279 ins_pipe( pipe_slow );
10276 10280 %}
10277 10281
10278 10282 // Compare into -1,0,1
10279 10283 instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10280 10284 predicate(UseSSE == 0);
10281 10285 match(Set dst (CmpF3 src1 src2));
10282 10286 effect(KILL cr, KILL rax);
10283 10287 ins_cost(300);
10284 10288 format %{ "FCMPF $dst,$src1,$src2" %}
10285 10289 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10286 10290 ins_encode( Push_Reg_DPR(src1),
10287 10291 OpcP, RegOpc(src2),
10288 10292 CmpF_Result(dst));
10289 10293 ins_pipe( pipe_slow );
10290 10294 %}
10291 10295
10292 10296 // float compare and set condition codes in EFLAGS by XMM regs
10293 10297 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10294 10298 predicate(UseSSE>=1);
10295 10299 match(Set cr (CmpF src1 src2));
10296 10300 ins_cost(145);
10297 10301 format %{ "UCOMISS $src1,$src2\n\t"
10298 10302 "JNP,s exit\n\t"
10299 10303 "PUSHF\t# saw NaN, set CF\n\t"
10300 10304 "AND [rsp], #0xffffff2b\n\t"
10301 10305 "POPF\n"
10302 10306 "exit:" %}
10303 10307 ins_encode %{
10304 10308 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10305 10309 emit_cmpfp_fixup(_masm);
10306 10310 %}
10307 10311 ins_pipe( pipe_slow );
10308 10312 %}
10309 10313
10310 10314 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10311 10315 predicate(UseSSE>=1);
10312 10316 match(Set cr (CmpF src1 src2));
10313 10317 ins_cost(100);
10314 10318 format %{ "UCOMISS $src1,$src2" %}
10315 10319 ins_encode %{
10316 10320 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10317 10321 %}
10318 10322 ins_pipe( pipe_slow );
10319 10323 %}
10320 10324
10321 10325 // float compare and set condition codes in EFLAGS by XMM regs
10322 10326 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10323 10327 predicate(UseSSE>=1);
10324 10328 match(Set cr (CmpF src1 (LoadF src2)));
10325 10329 ins_cost(165);
10326 10330 format %{ "UCOMISS $src1,$src2\n\t"
10327 10331 "JNP,s exit\n\t"
10328 10332 "PUSHF\t# saw NaN, set CF\n\t"
10329 10333 "AND [rsp], #0xffffff2b\n\t"
10330 10334 "POPF\n"
10331 10335 "exit:" %}
10332 10336 ins_encode %{
10333 10337 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10334 10338 emit_cmpfp_fixup(_masm);
10335 10339 %}
10336 10340 ins_pipe( pipe_slow );
10337 10341 %}
10338 10342
10339 10343 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10340 10344 predicate(UseSSE>=1);
10341 10345 match(Set cr (CmpF src1 (LoadF src2)));
10342 10346 ins_cost(100);
10343 10347 format %{ "UCOMISS $src1,$src2" %}
10344 10348 ins_encode %{
10345 10349 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10346 10350 %}
10347 10351 ins_pipe( pipe_slow );
10348 10352 %}
10349 10353
10350 10354 // Compare into -1,0,1 in XMM
10351 10355 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10352 10356 predicate(UseSSE>=1);
10353 10357 match(Set dst (CmpF3 src1 src2));
10354 10358 effect(KILL cr);
10355 10359 ins_cost(255);
10356 10360 format %{ "UCOMISS $src1, $src2\n\t"
10357 10361 "MOV $dst, #-1\n\t"
10358 10362 "JP,s done\n\t"
10359 10363 "JB,s done\n\t"
10360 10364 "SETNE $dst\n\t"
10361 10365 "MOVZB $dst, $dst\n"
10362 10366 "done:" %}
10363 10367 ins_encode %{
10364 10368 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10365 10369 emit_cmpfp3(_masm, $dst$$Register);
10366 10370 %}
10367 10371 ins_pipe( pipe_slow );
10368 10372 %}
10369 10373
10370 10374 // Compare into -1,0,1 in XMM and memory
10371 10375 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10372 10376 predicate(UseSSE>=1);
10373 10377 match(Set dst (CmpF3 src1 (LoadF src2)));
10374 10378 effect(KILL cr);
10375 10379 ins_cost(275);
10376 10380 format %{ "UCOMISS $src1, $src2\n\t"
10377 10381 "MOV $dst, #-1\n\t"
10378 10382 "JP,s done\n\t"
10379 10383 "JB,s done\n\t"
10380 10384 "SETNE $dst\n\t"
10381 10385 "MOVZB $dst, $dst\n"
10382 10386 "done:" %}
10383 10387 ins_encode %{
10384 10388 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10385 10389 emit_cmpfp3(_masm, $dst$$Register);
10386 10390 %}
10387 10391 ins_pipe( pipe_slow );
10388 10392 %}
10389 10393
10390 10394 // Spill to obtain 24-bit precision
10391 10395 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10392 10396 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10393 10397 match(Set dst (SubF src1 src2));
10394 10398
10395 10399 format %{ "FSUB $dst,$src1 - $src2" %}
10396 10400 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10397 10401 ins_encode( Push_Reg_FPR(src1),
10398 10402 OpcReg_FPR(src2),
10399 10403 Pop_Mem_FPR(dst) );
10400 10404 ins_pipe( fpu_mem_reg_reg );
10401 10405 %}
10402 10406 //
10403 10407 // This instruction does not round to 24-bits
10404 10408 instruct subFPR_reg(regFPR dst, regFPR src) %{
10405 10409 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10406 10410 match(Set dst (SubF dst src));
10407 10411
10408 10412 format %{ "FSUB $dst,$src" %}
10409 10413 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10410 10414 ins_encode( Push_Reg_FPR(src),
10411 10415 OpcP, RegOpc(dst) );
10412 10416 ins_pipe( fpu_reg_reg );
10413 10417 %}
10414 10418
10415 10419 // Spill to obtain 24-bit precision
10416 10420 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10417 10421 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10418 10422 match(Set dst (AddF src1 src2));
10419 10423
10420 10424 format %{ "FADD $dst,$src1,$src2" %}
10421 10425 opcode(0xD8, 0x0); /* D8 C0+i */
10422 10426 ins_encode( Push_Reg_FPR(src2),
10423 10427 OpcReg_FPR(src1),
10424 10428 Pop_Mem_FPR(dst) );
10425 10429 ins_pipe( fpu_mem_reg_reg );
10426 10430 %}
10427 10431 //
10428 10432 // This instruction does not round to 24-bits
10429 10433 instruct addFPR_reg(regFPR dst, regFPR src) %{
10430 10434 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10431 10435 match(Set dst (AddF dst src));
10432 10436
10433 10437 format %{ "FLD $src\n\t"
10434 10438 "FADDp $dst,ST" %}
10435 10439 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10436 10440 ins_encode( Push_Reg_FPR(src),
10437 10441 OpcP, RegOpc(dst) );
10438 10442 ins_pipe( fpu_reg_reg );
10439 10443 %}
10440 10444
10441 10445 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10442 10446 predicate(UseSSE==0);
10443 10447 match(Set dst (AbsF src));
10444 10448 ins_cost(100);
10445 10449 format %{ "FABS" %}
10446 10450 opcode(0xE1, 0xD9);
10447 10451 ins_encode( OpcS, OpcP );
10448 10452 ins_pipe( fpu_reg_reg );
10449 10453 %}
10450 10454
10451 10455 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10452 10456 predicate(UseSSE==0);
10453 10457 match(Set dst (NegF src));
10454 10458 ins_cost(100);
10455 10459 format %{ "FCHS" %}
10456 10460 opcode(0xE0, 0xD9);
10457 10461 ins_encode( OpcS, OpcP );
10458 10462 ins_pipe( fpu_reg_reg );
10459 10463 %}
10460 10464
10461 10465 // Cisc-alternate to addFPR_reg
10462 10466 // Spill to obtain 24-bit precision
10463 10467 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10464 10468 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10465 10469 match(Set dst (AddF src1 (LoadF src2)));
10466 10470
10467 10471 format %{ "FLD $src2\n\t"
10468 10472 "FADD ST,$src1\n\t"
10469 10473 "FSTP_S $dst" %}
10470 10474 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10471 10475 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10472 10476 OpcReg_FPR(src1),
10473 10477 Pop_Mem_FPR(dst) );
10474 10478 ins_pipe( fpu_mem_reg_mem );
10475 10479 %}
10476 10480 //
10477 10481 // Cisc-alternate to addFPR_reg
10478 10482 // This instruction does not round to 24-bits
10479 10483 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10480 10484 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10481 10485 match(Set dst (AddF dst (LoadF src)));
10482 10486
10483 10487 format %{ "FADD $dst,$src" %}
10484 10488 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
10485 10489 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10486 10490 OpcP, RegOpc(dst) );
10487 10491 ins_pipe( fpu_reg_mem );
10488 10492 %}
10489 10493
10490 10494 // // Following two instructions for _222_mpegaudio
10491 10495 // Spill to obtain 24-bit precision
10492 10496 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10493 10497 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10494 10498 match(Set dst (AddF src1 src2));
10495 10499
10496 10500 format %{ "FADD $dst,$src1,$src2" %}
10497 10501 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10498 10502 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10499 10503 OpcReg_FPR(src2),
10500 10504 Pop_Mem_FPR(dst) );
10501 10505 ins_pipe( fpu_mem_reg_mem );
10502 10506 %}
10503 10507
10504 10508 // Cisc-spill variant
10505 10509 // Spill to obtain 24-bit precision
10506 10510 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10507 10511 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10508 10512 match(Set dst (AddF src1 (LoadF src2)));
10509 10513
10510 10514 format %{ "FADD $dst,$src1,$src2 cisc" %}
10511 10515 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10512 10516 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10513 10517 set_instruction_start,
10514 10518 OpcP, RMopc_Mem(secondary,src1),
10515 10519 Pop_Mem_FPR(dst) );
10516 10520 ins_pipe( fpu_mem_mem_mem );
10517 10521 %}
10518 10522
10519 10523 // Spill to obtain 24-bit precision
10520 10524 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10521 10525 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10522 10526 match(Set dst (AddF src1 src2));
10523 10527
10524 10528 format %{ "FADD $dst,$src1,$src2" %}
10525 10529 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
10526 10530 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10527 10531 set_instruction_start,
10528 10532 OpcP, RMopc_Mem(secondary,src1),
10529 10533 Pop_Mem_FPR(dst) );
10530 10534 ins_pipe( fpu_mem_mem_mem );
10531 10535 %}
10532 10536
10533 10537
10534 10538 // Spill to obtain 24-bit precision
10535 10539 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10536 10540 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10537 10541 match(Set dst (AddF src con));
10538 10542 format %{ "FLD $src\n\t"
10539 10543 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10540 10544 "FSTP_S $dst" %}
10541 10545 ins_encode %{
10542 10546 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10543 10547 __ fadd_s($constantaddress($con));
10544 10548 __ fstp_s(Address(rsp, $dst$$disp));
10545 10549 %}
10546 10550 ins_pipe(fpu_mem_reg_con);
10547 10551 %}
10548 10552 //
10549 10553 // This instruction does not round to 24-bits
10550 10554 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10551 10555 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10552 10556 match(Set dst (AddF src con));
10553 10557 format %{ "FLD $src\n\t"
10554 10558 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10555 10559 "FSTP $dst" %}
10556 10560 ins_encode %{
10557 10561 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10558 10562 __ fadd_s($constantaddress($con));
10559 10563 __ fstp_d($dst$$reg);
10560 10564 %}
10561 10565 ins_pipe(fpu_reg_reg_con);
10562 10566 %}
10563 10567
10564 10568 // Spill to obtain 24-bit precision
10565 10569 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10566 10570 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10567 10571 match(Set dst (MulF src1 src2));
10568 10572
10569 10573 format %{ "FLD $src1\n\t"
10570 10574 "FMUL $src2\n\t"
10571 10575 "FSTP_S $dst" %}
10572 10576 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10573 10577 ins_encode( Push_Reg_FPR(src1),
10574 10578 OpcReg_FPR(src2),
10575 10579 Pop_Mem_FPR(dst) );
10576 10580 ins_pipe( fpu_mem_reg_reg );
10577 10581 %}
10578 10582 //
10579 10583 // This instruction does not round to 24-bits
10580 10584 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10581 10585 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10582 10586 match(Set dst (MulF src1 src2));
10583 10587
10584 10588 format %{ "FLD $src1\n\t"
10585 10589 "FMUL $src2\n\t"
10586 10590 "FSTP_S $dst" %}
10587 10591 opcode(0xD8, 0x1); /* D8 C8+i */
10588 10592 ins_encode( Push_Reg_FPR(src2),
10589 10593 OpcReg_FPR(src1),
10590 10594 Pop_Reg_FPR(dst) );
10591 10595 ins_pipe( fpu_reg_reg_reg );
10592 10596 %}
10593 10597
10594 10598
10595 10599 // Spill to obtain 24-bit precision
10596 10600 // Cisc-alternate to reg-reg multiply
10597 10601 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10598 10602 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10599 10603 match(Set dst (MulF src1 (LoadF src2)));
10600 10604
10601 10605 format %{ "FLD_S $src2\n\t"
10602 10606 "FMUL $src1\n\t"
10603 10607 "FSTP_S $dst" %}
10604 10608 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
10605 10609 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10606 10610 OpcReg_FPR(src1),
10607 10611 Pop_Mem_FPR(dst) );
10608 10612 ins_pipe( fpu_mem_reg_mem );
10609 10613 %}
10610 10614 //
10611 10615 // This instruction does not round to 24-bits
10612 10616 // Cisc-alternate to reg-reg multiply
10613 10617 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10614 10618 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10615 10619 match(Set dst (MulF src1 (LoadF src2)));
10616 10620
10617 10621 format %{ "FMUL $dst,$src1,$src2" %}
10618 10622 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
10619 10623 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10620 10624 OpcReg_FPR(src1),
10621 10625 Pop_Reg_FPR(dst) );
10622 10626 ins_pipe( fpu_reg_reg_mem );
10623 10627 %}
10624 10628
10625 10629 // Spill to obtain 24-bit precision
10626 10630 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10627 10631 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10628 10632 match(Set dst (MulF src1 src2));
10629 10633
10630 10634 format %{ "FMUL $dst,$src1,$src2" %}
10631 10635 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
10632 10636 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10633 10637 set_instruction_start,
10634 10638 OpcP, RMopc_Mem(secondary,src1),
10635 10639 Pop_Mem_FPR(dst) );
10636 10640 ins_pipe( fpu_mem_mem_mem );
10637 10641 %}
10638 10642
10639 10643 // Spill to obtain 24-bit precision
10640 10644 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10641 10645 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10642 10646 match(Set dst (MulF src con));
10643 10647
10644 10648 format %{ "FLD $src\n\t"
10645 10649 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10646 10650 "FSTP_S $dst" %}
10647 10651 ins_encode %{
10648 10652 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10649 10653 __ fmul_s($constantaddress($con));
10650 10654 __ fstp_s(Address(rsp, $dst$$disp));
10651 10655 %}
10652 10656 ins_pipe(fpu_mem_reg_con);
10653 10657 %}
10654 10658 //
10655 10659 // This instruction does not round to 24-bits
10656 10660 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10657 10661 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10658 10662 match(Set dst (MulF src con));
10659 10663
10660 10664 format %{ "FLD $src\n\t"
10661 10665 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10662 10666 "FSTP $dst" %}
10663 10667 ins_encode %{
10664 10668 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10665 10669 __ fmul_s($constantaddress($con));
10666 10670 __ fstp_d($dst$$reg);
10667 10671 %}
10668 10672 ins_pipe(fpu_reg_reg_con);
10669 10673 %}
10670 10674
10671 10675
10672 10676 //
10673 10677 // MACRO1 -- subsume unshared load into mulFPR
10674 10678 // This instruction does not round to 24-bits
10675 10679 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10676 10680 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10677 10681 match(Set dst (MulF (LoadF mem1) src));
10678 10682
10679 10683 format %{ "FLD $mem1 ===MACRO1===\n\t"
10680 10684 "FMUL ST,$src\n\t"
10681 10685 "FSTP $dst" %}
10682 10686 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
10683 10687 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10684 10688 OpcReg_FPR(src),
10685 10689 Pop_Reg_FPR(dst) );
10686 10690 ins_pipe( fpu_reg_reg_mem );
10687 10691 %}
10688 10692 //
10689 10693 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10690 10694 // This instruction does not round to 24-bits
10691 10695 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10692 10696 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10693 10697 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10694 10698 ins_cost(95);
10695 10699
10696 10700 format %{ "FLD $mem1 ===MACRO2===\n\t"
10697 10701 "FMUL ST,$src1 subsume mulFPR left load\n\t"
10698 10702 "FADD ST,$src2\n\t"
10699 10703 "FSTP $dst" %}
10700 10704 opcode(0xD9); /* LoadF D9 /0 */
10701 10705 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10702 10706 FMul_ST_reg(src1),
10703 10707 FAdd_ST_reg(src2),
10704 10708 Pop_Reg_FPR(dst) );
10705 10709 ins_pipe( fpu_reg_mem_reg_reg );
10706 10710 %}
10707 10711
10708 10712 // MACRO3 -- addFPR a mulFPR
10709 10713 // This instruction does not round to 24-bits. It is a '2-address'
10710 10714 // instruction in that the result goes back to src2. This eliminates
10711 10715 // a move from the macro; possibly the register allocator will have
10712 10716 // to add it back (and maybe not).
10713 10717 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10714 10718 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10715 10719 match(Set src2 (AddF (MulF src0 src1) src2));
10716 10720
10717 10721 format %{ "FLD $src0 ===MACRO3===\n\t"
10718 10722 "FMUL ST,$src1\n\t"
10719 10723 "FADDP $src2,ST" %}
10720 10724 opcode(0xD9); /* LoadF D9 /0 */
10721 10725 ins_encode( Push_Reg_FPR(src0),
10722 10726 FMul_ST_reg(src1),
10723 10727 FAddP_reg_ST(src2) );
10724 10728 ins_pipe( fpu_reg_reg_reg );
10725 10729 %}
10726 10730
10727 10731 // MACRO4 -- divFPR subFPR
10728 10732 // This instruction does not round to 24-bits
10729 10733 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10730 10734 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10731 10735 match(Set dst (DivF (SubF src2 src1) src3));
10732 10736
10733 10737 format %{ "FLD $src2 ===MACRO4===\n\t"
10734 10738 "FSUB ST,$src1\n\t"
10735 10739 "FDIV ST,$src3\n\t"
10736 10740 "FSTP $dst" %}
10737 10741 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10738 10742 ins_encode( Push_Reg_FPR(src2),
10739 10743 subFPR_divFPR_encode(src1,src3),
10740 10744 Pop_Reg_FPR(dst) );
10741 10745 ins_pipe( fpu_reg_reg_reg_reg );
10742 10746 %}
10743 10747
10744 10748 // Spill to obtain 24-bit precision
10745 10749 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10746 10750 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10747 10751 match(Set dst (DivF src1 src2));
10748 10752
10749 10753 format %{ "FDIV $dst,$src1,$src2" %}
10750 10754 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10751 10755 ins_encode( Push_Reg_FPR(src1),
10752 10756 OpcReg_FPR(src2),
10753 10757 Pop_Mem_FPR(dst) );
10754 10758 ins_pipe( fpu_mem_reg_reg );
10755 10759 %}
10756 10760 //
10757 10761 // This instruction does not round to 24-bits
10758 10762 instruct divFPR_reg(regFPR dst, regFPR src) %{
10759 10763 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10760 10764 match(Set dst (DivF dst src));
10761 10765
10762 10766 format %{ "FDIV $dst,$src" %}
10763 10767 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10764 10768 ins_encode( Push_Reg_FPR(src),
10765 10769 OpcP, RegOpc(dst) );
10766 10770 ins_pipe( fpu_reg_reg );
10767 10771 %}
10768 10772
10769 10773
10770 10774 // Spill to obtain 24-bit precision
10771 10775 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10772 10776 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10773 10777 match(Set dst (ModF src1 src2));
10774 10778 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10775 10779
10776 10780 format %{ "FMOD $dst,$src1,$src2" %}
10777 10781 ins_encode( Push_Reg_Mod_DPR(src1, src2),
10778 10782 emitModDPR(),
10779 10783 Push_Result_Mod_DPR(src2),
10780 10784 Pop_Mem_FPR(dst));
10781 10785 ins_pipe( pipe_slow );
10782 10786 %}
10783 10787 //
10784 10788 // This instruction does not round to 24-bits
10785 10789 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10786 10790 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10787 10791 match(Set dst (ModF dst src));
10788 10792 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10789 10793
10790 10794 format %{ "FMOD $dst,$src" %}
10791 10795 ins_encode(Push_Reg_Mod_DPR(dst, src),
10792 10796 emitModDPR(),
10793 10797 Push_Result_Mod_DPR(src),
10794 10798 Pop_Reg_FPR(dst));
10795 10799 ins_pipe( pipe_slow );
10796 10800 %}
10797 10801
10798 10802 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10799 10803 predicate(UseSSE>=1);
10800 10804 match(Set dst (ModF src0 src1));
10801 10805 effect(KILL rax, KILL cr);
10802 10806 format %{ "SUB ESP,4\t # FMOD\n"
10803 10807 "\tMOVSS [ESP+0],$src1\n"
10804 10808 "\tFLD_S [ESP+0]\n"
10805 10809 "\tMOVSS [ESP+0],$src0\n"
10806 10810 "\tFLD_S [ESP+0]\n"
10807 10811 "loop:\tFPREM\n"
10808 10812 "\tFWAIT\n"
10809 10813 "\tFNSTSW AX\n"
10810 10814 "\tSAHF\n"
10811 10815 "\tJP loop\n"
10812 10816 "\tFSTP_S [ESP+0]\n"
10813 10817 "\tMOVSS $dst,[ESP+0]\n"
10814 10818 "\tADD ESP,4\n"
10815 10819 "\tFSTP ST0\t # Restore FPU Stack"
10816 10820 %}
10817 10821 ins_cost(250);
10818 10822 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10819 10823 ins_pipe( pipe_slow );
10820 10824 %}
10821 10825
10822 10826
10823 10827 //----------Arithmetic Conversion Instructions---------------------------------
10824 10828 // The conversions operations are all Alpha sorted. Please keep it that way!
10825 10829
10826 10830 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10827 10831 predicate(UseSSE==0);
10828 10832 match(Set dst (RoundFloat src));
10829 10833 ins_cost(125);
10830 10834 format %{ "FST_S $dst,$src\t# F-round" %}
10831 10835 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10832 10836 ins_pipe( fpu_mem_reg );
10833 10837 %}
10834 10838
10835 10839 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10836 10840 predicate(UseSSE<=1);
10837 10841 match(Set dst (RoundDouble src));
10838 10842 ins_cost(125);
10839 10843 format %{ "FST_D $dst,$src\t# D-round" %}
10840 10844 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10841 10845 ins_pipe( fpu_mem_reg );
10842 10846 %}
10843 10847
10844 10848 // Force rounding to 24-bit precision and 6-bit exponent
10845 10849 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10846 10850 predicate(UseSSE==0);
10847 10851 match(Set dst (ConvD2F src));
10848 10852 format %{ "FST_S $dst,$src\t# F-round" %}
10849 10853 expand %{
10850 10854 roundFloat_mem_reg(dst,src);
10851 10855 %}
10852 10856 %}
10853 10857
10854 10858 // Force rounding to 24-bit precision and 6-bit exponent
10855 10859 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10856 10860 predicate(UseSSE==1);
10857 10861 match(Set dst (ConvD2F src));
10858 10862 effect( KILL cr );
10859 10863 format %{ "SUB ESP,4\n\t"
10860 10864 "FST_S [ESP],$src\t# F-round\n\t"
10861 10865 "MOVSS $dst,[ESP]\n\t"
10862 10866 "ADD ESP,4" %}
10863 10867 ins_encode %{
10864 10868 __ subptr(rsp, 4);
10865 10869 if ($src$$reg != FPR1L_enc) {
10866 10870 __ fld_s($src$$reg-1);
10867 10871 __ fstp_s(Address(rsp, 0));
10868 10872 } else {
10869 10873 __ fst_s(Address(rsp, 0));
10870 10874 }
10871 10875 __ movflt($dst$$XMMRegister, Address(rsp, 0));
10872 10876 __ addptr(rsp, 4);
10873 10877 %}
10874 10878 ins_pipe( pipe_slow );
10875 10879 %}
10876 10880
10877 10881 // Force rounding double precision to single precision
10878 10882 instruct convD2F_reg(regF dst, regD src) %{
10879 10883 predicate(UseSSE>=2);
10880 10884 match(Set dst (ConvD2F src));
10881 10885 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10882 10886 ins_encode %{
10883 10887 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10884 10888 %}
10885 10889 ins_pipe( pipe_slow );
10886 10890 %}
10887 10891
10888 10892 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10889 10893 predicate(UseSSE==0);
10890 10894 match(Set dst (ConvF2D src));
10891 10895 format %{ "FST_S $dst,$src\t# D-round" %}
10892 10896 ins_encode( Pop_Reg_Reg_DPR(dst, src));
10893 10897 ins_pipe( fpu_reg_reg );
10894 10898 %}
10895 10899
10896 10900 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10897 10901 predicate(UseSSE==1);
10898 10902 match(Set dst (ConvF2D src));
10899 10903 format %{ "FST_D $dst,$src\t# D-round" %}
10900 10904 expand %{
10901 10905 roundDouble_mem_reg(dst,src);
10902 10906 %}
10903 10907 %}
10904 10908
10905 10909 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10906 10910 predicate(UseSSE==1);
10907 10911 match(Set dst (ConvF2D src));
10908 10912 effect( KILL cr );
10909 10913 format %{ "SUB ESP,4\n\t"
10910 10914 "MOVSS [ESP] $src\n\t"
10911 10915 "FLD_S [ESP]\n\t"
10912 10916 "ADD ESP,4\n\t"
10913 10917 "FSTP $dst\t# D-round" %}
10914 10918 ins_encode %{
10915 10919 __ subptr(rsp, 4);
10916 10920 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10917 10921 __ fld_s(Address(rsp, 0));
10918 10922 __ addptr(rsp, 4);
10919 10923 __ fstp_d($dst$$reg);
10920 10924 %}
10921 10925 ins_pipe( pipe_slow );
10922 10926 %}
10923 10927
10924 10928 instruct convF2D_reg(regD dst, regF src) %{
10925 10929 predicate(UseSSE>=2);
10926 10930 match(Set dst (ConvF2D src));
10927 10931 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10928 10932 ins_encode %{
10929 10933 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10930 10934 %}
10931 10935 ins_pipe( pipe_slow );
10932 10936 %}
10933 10937
10934 10938 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10935 10939 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10936 10940 predicate(UseSSE<=1);
10937 10941 match(Set dst (ConvD2I src));
10938 10942 effect( KILL tmp, KILL cr );
10939 10943 format %{ "FLD $src\t# Convert double to int \n\t"
10940 10944 "FLDCW trunc mode\n\t"
10941 10945 "SUB ESP,4\n\t"
10942 10946 "FISTp [ESP + #0]\n\t"
10943 10947 "FLDCW std/24-bit mode\n\t"
10944 10948 "POP EAX\n\t"
10945 10949 "CMP EAX,0x80000000\n\t"
10946 10950 "JNE,s fast\n\t"
10947 10951 "FLD_D $src\n\t"
10948 10952 "CALL d2i_wrapper\n"
10949 10953 "fast:" %}
10950 10954 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10951 10955 ins_pipe( pipe_slow );
10952 10956 %}
10953 10957
10954 10958 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10955 10959 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10956 10960 predicate(UseSSE>=2);
10957 10961 match(Set dst (ConvD2I src));
10958 10962 effect( KILL tmp, KILL cr );
10959 10963 format %{ "CVTTSD2SI $dst, $src\n\t"
10960 10964 "CMP $dst,0x80000000\n\t"
10961 10965 "JNE,s fast\n\t"
10962 10966 "SUB ESP, 8\n\t"
10963 10967 "MOVSD [ESP], $src\n\t"
10964 10968 "FLD_D [ESP]\n\t"
10965 10969 "ADD ESP, 8\n\t"
10966 10970 "CALL d2i_wrapper\n"
10967 10971 "fast:" %}
10968 10972 ins_encode %{
10969 10973 Label fast;
10970 10974 __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10971 10975 __ cmpl($dst$$Register, 0x80000000);
10972 10976 __ jccb(Assembler::notEqual, fast);
10973 10977 __ subptr(rsp, 8);
10974 10978 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10975 10979 __ fld_d(Address(rsp, 0));
10976 10980 __ addptr(rsp, 8);
10977 10981 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10978 10982 __ bind(fast);
10979 10983 %}
10980 10984 ins_pipe( pipe_slow );
10981 10985 %}
10982 10986
10983 10987 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10984 10988 predicate(UseSSE<=1);
10985 10989 match(Set dst (ConvD2L src));
10986 10990 effect( KILL cr );
10987 10991 format %{ "FLD $src\t# Convert double to long\n\t"
10988 10992 "FLDCW trunc mode\n\t"
10989 10993 "SUB ESP,8\n\t"
10990 10994 "FISTp [ESP + #0]\n\t"
10991 10995 "FLDCW std/24-bit mode\n\t"
10992 10996 "POP EAX\n\t"
10993 10997 "POP EDX\n\t"
10994 10998 "CMP EDX,0x80000000\n\t"
10995 10999 "JNE,s fast\n\t"
10996 11000 "TEST EAX,EAX\n\t"
10997 11001 "JNE,s fast\n\t"
10998 11002 "FLD $src\n\t"
10999 11003 "CALL d2l_wrapper\n"
11000 11004 "fast:" %}
11001 11005 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
11002 11006 ins_pipe( pipe_slow );
11003 11007 %}
11004 11008
11005 11009 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11006 11010 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11007 11011 predicate (UseSSE>=2);
11008 11012 match(Set dst (ConvD2L src));
11009 11013 effect( KILL cr );
11010 11014 format %{ "SUB ESP,8\t# Convert double to long\n\t"
11011 11015 "MOVSD [ESP],$src\n\t"
11012 11016 "FLD_D [ESP]\n\t"
11013 11017 "FLDCW trunc mode\n\t"
11014 11018 "FISTp [ESP + #0]\n\t"
11015 11019 "FLDCW std/24-bit mode\n\t"
11016 11020 "POP EAX\n\t"
11017 11021 "POP EDX\n\t"
11018 11022 "CMP EDX,0x80000000\n\t"
11019 11023 "JNE,s fast\n\t"
11020 11024 "TEST EAX,EAX\n\t"
11021 11025 "JNE,s fast\n\t"
11022 11026 "SUB ESP,8\n\t"
11023 11027 "MOVSD [ESP],$src\n\t"
11024 11028 "FLD_D [ESP]\n\t"
11025 11029 "ADD ESP,8\n\t"
11026 11030 "CALL d2l_wrapper\n"
11027 11031 "fast:" %}
11028 11032 ins_encode %{
11029 11033 Label fast;
11030 11034 __ subptr(rsp, 8);
11031 11035 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11032 11036 __ fld_d(Address(rsp, 0));
11033 11037 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11034 11038 __ fistp_d(Address(rsp, 0));
11035 11039 // Restore the rounding mode, mask the exception
11036 11040 if (Compile::current()->in_24_bit_fp_mode()) {
11037 11041 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11038 11042 } else {
11039 11043 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11040 11044 }
11041 11045 // Load the converted long, adjust CPU stack
11042 11046 __ pop(rax);
11043 11047 __ pop(rdx);
11044 11048 __ cmpl(rdx, 0x80000000);
11045 11049 __ jccb(Assembler::notEqual, fast);
11046 11050 __ testl(rax, rax);
11047 11051 __ jccb(Assembler::notEqual, fast);
11048 11052 __ subptr(rsp, 8);
11049 11053 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11050 11054 __ fld_d(Address(rsp, 0));
11051 11055 __ addptr(rsp, 8);
11052 11056 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11053 11057 __ bind(fast);
11054 11058 %}
11055 11059 ins_pipe( pipe_slow );
11056 11060 %}
11057 11061
11058 11062 // Convert a double to an int. Java semantics require we do complex
11059 11063 // manglations in the corner cases. So we set the rounding mode to
11060 11064 // 'zero', store the darned double down as an int, and reset the
11061 11065 // rounding mode to 'nearest'. The hardware stores a flag value down
11062 11066 // if we would overflow or converted a NAN; we check for this and
11063 11067 // and go the slow path if needed.
11064 11068 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
11065 11069 predicate(UseSSE==0);
11066 11070 match(Set dst (ConvF2I src));
11067 11071 effect( KILL tmp, KILL cr );
11068 11072 format %{ "FLD $src\t# Convert float to int \n\t"
11069 11073 "FLDCW trunc mode\n\t"
11070 11074 "SUB ESP,4\n\t"
11071 11075 "FISTp [ESP + #0]\n\t"
11072 11076 "FLDCW std/24-bit mode\n\t"
11073 11077 "POP EAX\n\t"
11074 11078 "CMP EAX,0x80000000\n\t"
11075 11079 "JNE,s fast\n\t"
11076 11080 "FLD $src\n\t"
11077 11081 "CALL d2i_wrapper\n"
11078 11082 "fast:" %}
11079 11083 // DPR2I_encoding works for FPR2I
11080 11084 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11081 11085 ins_pipe( pipe_slow );
11082 11086 %}
11083 11087
11084 11088 // Convert a float in xmm to an int reg.
11085 11089 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11086 11090 predicate(UseSSE>=1);
11087 11091 match(Set dst (ConvF2I src));
11088 11092 effect( KILL tmp, KILL cr );
11089 11093 format %{ "CVTTSS2SI $dst, $src\n\t"
11090 11094 "CMP $dst,0x80000000\n\t"
11091 11095 "JNE,s fast\n\t"
11092 11096 "SUB ESP, 4\n\t"
11093 11097 "MOVSS [ESP], $src\n\t"
11094 11098 "FLD [ESP]\n\t"
11095 11099 "ADD ESP, 4\n\t"
11096 11100 "CALL d2i_wrapper\n"
11097 11101 "fast:" %}
11098 11102 ins_encode %{
11099 11103 Label fast;
11100 11104 __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11101 11105 __ cmpl($dst$$Register, 0x80000000);
11102 11106 __ jccb(Assembler::notEqual, fast);
11103 11107 __ subptr(rsp, 4);
11104 11108 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11105 11109 __ fld_s(Address(rsp, 0));
11106 11110 __ addptr(rsp, 4);
11107 11111 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11108 11112 __ bind(fast);
11109 11113 %}
11110 11114 ins_pipe( pipe_slow );
11111 11115 %}
11112 11116
11113 11117 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11114 11118 predicate(UseSSE==0);
11115 11119 match(Set dst (ConvF2L src));
11116 11120 effect( KILL cr );
11117 11121 format %{ "FLD $src\t# Convert float to long\n\t"
11118 11122 "FLDCW trunc mode\n\t"
11119 11123 "SUB ESP,8\n\t"
11120 11124 "FISTp [ESP + #0]\n\t"
11121 11125 "FLDCW std/24-bit mode\n\t"
11122 11126 "POP EAX\n\t"
11123 11127 "POP EDX\n\t"
11124 11128 "CMP EDX,0x80000000\n\t"
11125 11129 "JNE,s fast\n\t"
11126 11130 "TEST EAX,EAX\n\t"
11127 11131 "JNE,s fast\n\t"
11128 11132 "FLD $src\n\t"
11129 11133 "CALL d2l_wrapper\n"
11130 11134 "fast:" %}
11131 11135 // DPR2L_encoding works for FPR2L
11132 11136 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11133 11137 ins_pipe( pipe_slow );
11134 11138 %}
11135 11139
11136 11140 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11137 11141 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11138 11142 predicate (UseSSE>=1);
11139 11143 match(Set dst (ConvF2L src));
11140 11144 effect( KILL cr );
11141 11145 format %{ "SUB ESP,8\t# Convert float to long\n\t"
11142 11146 "MOVSS [ESP],$src\n\t"
11143 11147 "FLD_S [ESP]\n\t"
11144 11148 "FLDCW trunc mode\n\t"
11145 11149 "FISTp [ESP + #0]\n\t"
11146 11150 "FLDCW std/24-bit mode\n\t"
11147 11151 "POP EAX\n\t"
11148 11152 "POP EDX\n\t"
11149 11153 "CMP EDX,0x80000000\n\t"
11150 11154 "JNE,s fast\n\t"
11151 11155 "TEST EAX,EAX\n\t"
11152 11156 "JNE,s fast\n\t"
11153 11157 "SUB ESP,4\t# Convert float to long\n\t"
11154 11158 "MOVSS [ESP],$src\n\t"
11155 11159 "FLD_S [ESP]\n\t"
11156 11160 "ADD ESP,4\n\t"
11157 11161 "CALL d2l_wrapper\n"
11158 11162 "fast:" %}
11159 11163 ins_encode %{
11160 11164 Label fast;
11161 11165 __ subptr(rsp, 8);
11162 11166 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11163 11167 __ fld_s(Address(rsp, 0));
11164 11168 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11165 11169 __ fistp_d(Address(rsp, 0));
11166 11170 // Restore the rounding mode, mask the exception
11167 11171 if (Compile::current()->in_24_bit_fp_mode()) {
11168 11172 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11169 11173 } else {
11170 11174 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11171 11175 }
11172 11176 // Load the converted long, adjust CPU stack
11173 11177 __ pop(rax);
11174 11178 __ pop(rdx);
11175 11179 __ cmpl(rdx, 0x80000000);
11176 11180 __ jccb(Assembler::notEqual, fast);
11177 11181 __ testl(rax, rax);
11178 11182 __ jccb(Assembler::notEqual, fast);
11179 11183 __ subptr(rsp, 4);
11180 11184 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11181 11185 __ fld_s(Address(rsp, 0));
11182 11186 __ addptr(rsp, 4);
11183 11187 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11184 11188 __ bind(fast);
11185 11189 %}
11186 11190 ins_pipe( pipe_slow );
11187 11191 %}
11188 11192
11189 11193 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11190 11194 predicate( UseSSE<=1 );
11191 11195 match(Set dst (ConvI2D src));
11192 11196 format %{ "FILD $src\n\t"
11193 11197 "FSTP $dst" %}
11194 11198 opcode(0xDB, 0x0); /* DB /0 */
11195 11199 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11196 11200 ins_pipe( fpu_reg_mem );
11197 11201 %}
11198 11202
11199 11203 instruct convI2D_reg(regD dst, eRegI src) %{
11200 11204 predicate( UseSSE>=2 && !UseXmmI2D );
11201 11205 match(Set dst (ConvI2D src));
11202 11206 format %{ "CVTSI2SD $dst,$src" %}
11203 11207 ins_encode %{
11204 11208 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11205 11209 %}
11206 11210 ins_pipe( pipe_slow );
11207 11211 %}
11208 11212
11209 11213 instruct convI2D_mem(regD dst, memory mem) %{
11210 11214 predicate( UseSSE>=2 );
11211 11215 match(Set dst (ConvI2D (LoadI mem)));
11212 11216 format %{ "CVTSI2SD $dst,$mem" %}
11213 11217 ins_encode %{
11214 11218 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11215 11219 %}
11216 11220 ins_pipe( pipe_slow );
11217 11221 %}
11218 11222
11219 11223 instruct convXI2D_reg(regD dst, eRegI src)
11220 11224 %{
11221 11225 predicate( UseSSE>=2 && UseXmmI2D );
11222 11226 match(Set dst (ConvI2D src));
11223 11227
11224 11228 format %{ "MOVD $dst,$src\n\t"
11225 11229 "CVTDQ2PD $dst,$dst\t# i2d" %}
11226 11230 ins_encode %{
11227 11231 __ movdl($dst$$XMMRegister, $src$$Register);
11228 11232 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11229 11233 %}
11230 11234 ins_pipe(pipe_slow); // XXX
11231 11235 %}
11232 11236
11233 11237 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11234 11238 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11235 11239 match(Set dst (ConvI2D (LoadI mem)));
11236 11240 format %{ "FILD $mem\n\t"
11237 11241 "FSTP $dst" %}
11238 11242 opcode(0xDB); /* DB /0 */
11239 11243 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11240 11244 Pop_Reg_DPR(dst));
11241 11245 ins_pipe( fpu_reg_mem );
11242 11246 %}
11243 11247
11244 11248 // Convert a byte to a float; no rounding step needed.
11245 11249 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11246 11250 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11247 11251 match(Set dst (ConvI2F src));
11248 11252 format %{ "FILD $src\n\t"
11249 11253 "FSTP $dst" %}
11250 11254
11251 11255 opcode(0xDB, 0x0); /* DB /0 */
11252 11256 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11253 11257 ins_pipe( fpu_reg_mem );
11254 11258 %}
11255 11259
11256 11260 // In 24-bit mode, force exponent rounding by storing back out
11257 11261 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11258 11262 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11259 11263 match(Set dst (ConvI2F src));
11260 11264 ins_cost(200);
11261 11265 format %{ "FILD $src\n\t"
11262 11266 "FSTP_S $dst" %}
11263 11267 opcode(0xDB, 0x0); /* DB /0 */
11264 11268 ins_encode( Push_Mem_I(src),
11265 11269 Pop_Mem_FPR(dst));
11266 11270 ins_pipe( fpu_mem_mem );
11267 11271 %}
11268 11272
11269 11273 // In 24-bit mode, force exponent rounding by storing back out
11270 11274 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11271 11275 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11272 11276 match(Set dst (ConvI2F (LoadI mem)));
11273 11277 ins_cost(200);
11274 11278 format %{ "FILD $mem\n\t"
11275 11279 "FSTP_S $dst" %}
11276 11280 opcode(0xDB); /* DB /0 */
11277 11281 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11278 11282 Pop_Mem_FPR(dst));
11279 11283 ins_pipe( fpu_mem_mem );
11280 11284 %}
11281 11285
11282 11286 // This instruction does not round to 24-bits
11283 11287 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11284 11288 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11285 11289 match(Set dst (ConvI2F src));
11286 11290 format %{ "FILD $src\n\t"
11287 11291 "FSTP $dst" %}
11288 11292 opcode(0xDB, 0x0); /* DB /0 */
11289 11293 ins_encode( Push_Mem_I(src),
11290 11294 Pop_Reg_FPR(dst));
11291 11295 ins_pipe( fpu_reg_mem );
11292 11296 %}
11293 11297
11294 11298 // This instruction does not round to 24-bits
11295 11299 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11296 11300 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11297 11301 match(Set dst (ConvI2F (LoadI mem)));
11298 11302 format %{ "FILD $mem\n\t"
11299 11303 "FSTP $dst" %}
11300 11304 opcode(0xDB); /* DB /0 */
11301 11305 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11302 11306 Pop_Reg_FPR(dst));
11303 11307 ins_pipe( fpu_reg_mem );
11304 11308 %}
11305 11309
11306 11310 // Convert an int to a float in xmm; no rounding step needed.
11307 11311 instruct convI2F_reg(regF dst, eRegI src) %{
11308 11312 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11309 11313 match(Set dst (ConvI2F src));
11310 11314 format %{ "CVTSI2SS $dst, $src" %}
11311 11315 ins_encode %{
11312 11316 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11313 11317 %}
11314 11318 ins_pipe( pipe_slow );
11315 11319 %}
11316 11320
11317 11321 instruct convXI2F_reg(regF dst, eRegI src)
11318 11322 %{
11319 11323 predicate( UseSSE>=2 && UseXmmI2F );
11320 11324 match(Set dst (ConvI2F src));
11321 11325
11322 11326 format %{ "MOVD $dst,$src\n\t"
11323 11327 "CVTDQ2PS $dst,$dst\t# i2f" %}
11324 11328 ins_encode %{
11325 11329 __ movdl($dst$$XMMRegister, $src$$Register);
11326 11330 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11327 11331 %}
11328 11332 ins_pipe(pipe_slow); // XXX
11329 11333 %}
11330 11334
11331 11335 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
11332 11336 match(Set dst (ConvI2L src));
11333 11337 effect(KILL cr);
11334 11338 ins_cost(375);
11335 11339 format %{ "MOV $dst.lo,$src\n\t"
11336 11340 "MOV $dst.hi,$src\n\t"
11337 11341 "SAR $dst.hi,31" %}
11338 11342 ins_encode(convert_int_long(dst,src));
11339 11343 ins_pipe( ialu_reg_reg_long );
11340 11344 %}
11341 11345
11342 11346 // Zero-extend convert int to long
11343 11347 instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
11344 11348 match(Set dst (AndL (ConvI2L src) mask) );
11345 11349 effect( KILL flags );
11346 11350 ins_cost(250);
11347 11351 format %{ "MOV $dst.lo,$src\n\t"
11348 11352 "XOR $dst.hi,$dst.hi" %}
11349 11353 opcode(0x33); // XOR
11350 11354 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11351 11355 ins_pipe( ialu_reg_reg_long );
11352 11356 %}
11353 11357
11354 11358 // Zero-extend long
11355 11359 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11356 11360 match(Set dst (AndL src mask) );
11357 11361 effect( KILL flags );
11358 11362 ins_cost(250);
11359 11363 format %{ "MOV $dst.lo,$src.lo\n\t"
11360 11364 "XOR $dst.hi,$dst.hi\n\t" %}
11361 11365 opcode(0x33); // XOR
11362 11366 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11363 11367 ins_pipe( ialu_reg_reg_long );
11364 11368 %}
11365 11369
11366 11370 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11367 11371 predicate (UseSSE<=1);
11368 11372 match(Set dst (ConvL2D src));
11369 11373 effect( KILL cr );
11370 11374 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11371 11375 "PUSH $src.lo\n\t"
11372 11376 "FILD ST,[ESP + #0]\n\t"
11373 11377 "ADD ESP,8\n\t"
11374 11378 "FSTP_D $dst\t# D-round" %}
11375 11379 opcode(0xDF, 0x5); /* DF /5 */
11376 11380 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11377 11381 ins_pipe( pipe_slow );
11378 11382 %}
11379 11383
11380 11384 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11381 11385 predicate (UseSSE>=2);
11382 11386 match(Set dst (ConvL2D src));
11383 11387 effect( KILL cr );
11384 11388 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11385 11389 "PUSH $src.lo\n\t"
11386 11390 "FILD_D [ESP]\n\t"
11387 11391 "FSTP_D [ESP]\n\t"
11388 11392 "MOVSD $dst,[ESP]\n\t"
11389 11393 "ADD ESP,8" %}
11390 11394 opcode(0xDF, 0x5); /* DF /5 */
11391 11395 ins_encode(convert_long_double2(src), Push_ResultD(dst));
11392 11396 ins_pipe( pipe_slow );
11393 11397 %}
11394 11398
11395 11399 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11396 11400 predicate (UseSSE>=1);
11397 11401 match(Set dst (ConvL2F src));
11398 11402 effect( KILL cr );
11399 11403 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11400 11404 "PUSH $src.lo\n\t"
11401 11405 "FILD_D [ESP]\n\t"
11402 11406 "FSTP_S [ESP]\n\t"
11403 11407 "MOVSS $dst,[ESP]\n\t"
11404 11408 "ADD ESP,8" %}
11405 11409 opcode(0xDF, 0x5); /* DF /5 */
11406 11410 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11407 11411 ins_pipe( pipe_slow );
11408 11412 %}
11409 11413
11410 11414 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11411 11415 match(Set dst (ConvL2F src));
11412 11416 effect( KILL cr );
11413 11417 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11414 11418 "PUSH $src.lo\n\t"
11415 11419 "FILD ST,[ESP + #0]\n\t"
11416 11420 "ADD ESP,8\n\t"
11417 11421 "FSTP_S $dst\t# F-round" %}
11418 11422 opcode(0xDF, 0x5); /* DF /5 */
11419 11423 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11420 11424 ins_pipe( pipe_slow );
11421 11425 %}
11422 11426
11423 11427 instruct convL2I_reg( eRegI dst, eRegL src ) %{
11424 11428 match(Set dst (ConvL2I src));
11425 11429 effect( DEF dst, USE src );
11426 11430 format %{ "MOV $dst,$src.lo" %}
11427 11431 ins_encode(enc_CopyL_Lo(dst,src));
11428 11432 ins_pipe( ialu_reg_reg );
11429 11433 %}
11430 11434
11431 11435
11432 11436 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
11433 11437 match(Set dst (MoveF2I src));
11434 11438 effect( DEF dst, USE src );
11435 11439 ins_cost(100);
11436 11440 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
11437 11441 ins_encode %{
11438 11442 __ movl($dst$$Register, Address(rsp, $src$$disp));
11439 11443 %}
11440 11444 ins_pipe( ialu_reg_mem );
11441 11445 %}
11442 11446
11443 11447 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11444 11448 predicate(UseSSE==0);
11445 11449 match(Set dst (MoveF2I src));
11446 11450 effect( DEF dst, USE src );
11447 11451
11448 11452 ins_cost(125);
11449 11453 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
11450 11454 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11451 11455 ins_pipe( fpu_mem_reg );
11452 11456 %}
11453 11457
11454 11458 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11455 11459 predicate(UseSSE>=1);
11456 11460 match(Set dst (MoveF2I src));
11457 11461 effect( DEF dst, USE src );
11458 11462
11459 11463 ins_cost(95);
11460 11464 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
11461 11465 ins_encode %{
11462 11466 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11463 11467 %}
11464 11468 ins_pipe( pipe_slow );
11465 11469 %}
11466 11470
11467 11471 instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
11468 11472 predicate(UseSSE>=2);
11469 11473 match(Set dst (MoveF2I src));
11470 11474 effect( DEF dst, USE src );
11471 11475 ins_cost(85);
11472 11476 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
11473 11477 ins_encode %{
11474 11478 __ movdl($dst$$Register, $src$$XMMRegister);
11475 11479 %}
11476 11480 ins_pipe( pipe_slow );
11477 11481 %}
11478 11482
11479 11483 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
11480 11484 match(Set dst (MoveI2F src));
11481 11485 effect( DEF dst, USE src );
11482 11486
11483 11487 ins_cost(100);
11484 11488 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
11485 11489 ins_encode %{
11486 11490 __ movl(Address(rsp, $dst$$disp), $src$$Register);
11487 11491 %}
11488 11492 ins_pipe( ialu_mem_reg );
11489 11493 %}
11490 11494
11491 11495
11492 11496 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11493 11497 predicate(UseSSE==0);
11494 11498 match(Set dst (MoveI2F src));
11495 11499 effect(DEF dst, USE src);
11496 11500
11497 11501 ins_cost(125);
11498 11502 format %{ "FLD_S $src\n\t"
11499 11503 "FSTP $dst\t# MoveI2F_stack_reg" %}
11500 11504 opcode(0xD9); /* D9 /0, FLD m32real */
11501 11505 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11502 11506 Pop_Reg_FPR(dst) );
11503 11507 ins_pipe( fpu_reg_mem );
11504 11508 %}
11505 11509
11506 11510 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11507 11511 predicate(UseSSE>=1);
11508 11512 match(Set dst (MoveI2F src));
11509 11513 effect( DEF dst, USE src );
11510 11514
11511 11515 ins_cost(95);
11512 11516 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
11513 11517 ins_encode %{
11514 11518 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11515 11519 %}
11516 11520 ins_pipe( pipe_slow );
11517 11521 %}
11518 11522
11519 11523 instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
11520 11524 predicate(UseSSE>=2);
11521 11525 match(Set dst (MoveI2F src));
11522 11526 effect( DEF dst, USE src );
11523 11527
11524 11528 ins_cost(85);
11525 11529 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
11526 11530 ins_encode %{
11527 11531 __ movdl($dst$$XMMRegister, $src$$Register);
11528 11532 %}
11529 11533 ins_pipe( pipe_slow );
11530 11534 %}
11531 11535
11532 11536 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11533 11537 match(Set dst (MoveD2L src));
11534 11538 effect(DEF dst, USE src);
11535 11539
11536 11540 ins_cost(250);
11537 11541 format %{ "MOV $dst.lo,$src\n\t"
11538 11542 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11539 11543 opcode(0x8B, 0x8B);
11540 11544 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11541 11545 ins_pipe( ialu_mem_long_reg );
11542 11546 %}
11543 11547
11544 11548 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11545 11549 predicate(UseSSE<=1);
11546 11550 match(Set dst (MoveD2L src));
11547 11551 effect(DEF dst, USE src);
11548 11552
11549 11553 ins_cost(125);
11550 11554 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
11551 11555 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11552 11556 ins_pipe( fpu_mem_reg );
11553 11557 %}
11554 11558
11555 11559 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11556 11560 predicate(UseSSE>=2);
11557 11561 match(Set dst (MoveD2L src));
11558 11562 effect(DEF dst, USE src);
11559 11563 ins_cost(95);
11560 11564 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
11561 11565 ins_encode %{
11562 11566 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11563 11567 %}
11564 11568 ins_pipe( pipe_slow );
11565 11569 %}
11566 11570
11567 11571 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11568 11572 predicate(UseSSE>=2);
11569 11573 match(Set dst (MoveD2L src));
11570 11574 effect(DEF dst, USE src, TEMP tmp);
11571 11575 ins_cost(85);
11572 11576 format %{ "MOVD $dst.lo,$src\n\t"
11573 11577 "PSHUFLW $tmp,$src,0x4E\n\t"
11574 11578 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11575 11579 ins_encode %{
11576 11580 __ movdl($dst$$Register, $src$$XMMRegister);
11577 11581 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11578 11582 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11579 11583 %}
11580 11584 ins_pipe( pipe_slow );
11581 11585 %}
11582 11586
11583 11587 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11584 11588 match(Set dst (MoveL2D src));
11585 11589 effect(DEF dst, USE src);
11586 11590
11587 11591 ins_cost(200);
11588 11592 format %{ "MOV $dst,$src.lo\n\t"
11589 11593 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11590 11594 opcode(0x89, 0x89);
11591 11595 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11592 11596 ins_pipe( ialu_mem_long_reg );
11593 11597 %}
11594 11598
11595 11599
11596 11600 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11597 11601 predicate(UseSSE<=1);
11598 11602 match(Set dst (MoveL2D src));
11599 11603 effect(DEF dst, USE src);
11600 11604 ins_cost(125);
11601 11605
11602 11606 format %{ "FLD_D $src\n\t"
11603 11607 "FSTP $dst\t# MoveL2D_stack_reg" %}
11604 11608 opcode(0xDD); /* DD /0, FLD m64real */
11605 11609 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11606 11610 Pop_Reg_DPR(dst) );
11607 11611 ins_pipe( fpu_reg_mem );
11608 11612 %}
11609 11613
11610 11614
11611 11615 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11612 11616 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11613 11617 match(Set dst (MoveL2D src));
11614 11618 effect(DEF dst, USE src);
11615 11619
11616 11620 ins_cost(95);
11617 11621 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11618 11622 ins_encode %{
11619 11623 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11620 11624 %}
11621 11625 ins_pipe( pipe_slow );
11622 11626 %}
11623 11627
11624 11628 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11625 11629 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11626 11630 match(Set dst (MoveL2D src));
11627 11631 effect(DEF dst, USE src);
11628 11632
11629 11633 ins_cost(95);
11630 11634 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11631 11635 ins_encode %{
11632 11636 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11633 11637 %}
11634 11638 ins_pipe( pipe_slow );
11635 11639 %}
11636 11640
11637 11641 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11638 11642 predicate(UseSSE>=2);
11639 11643 match(Set dst (MoveL2D src));
11640 11644 effect(TEMP dst, USE src, TEMP tmp);
11641 11645 ins_cost(85);
11642 11646 format %{ "MOVD $dst,$src.lo\n\t"
11643 11647 "MOVD $tmp,$src.hi\n\t"
11644 11648 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11645 11649 ins_encode %{
11646 11650 __ movdl($dst$$XMMRegister, $src$$Register);
11647 11651 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11648 11652 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11649 11653 %}
11650 11654 ins_pipe( pipe_slow );
11651 11655 %}
11652 11656
11653 11657 // Replicate scalar to packed byte (1 byte) values in xmm
11654 11658 instruct Repl8B_reg(regD dst, regD src) %{
11655 11659 predicate(UseSSE>=2);
11656 11660 match(Set dst (Replicate8B src));
11657 11661 format %{ "MOVDQA $dst,$src\n\t"
11658 11662 "PUNPCKLBW $dst,$dst\n\t"
11659 11663 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11660 11664 ins_encode %{
11661 11665 if ($dst$$reg != $src$$reg) {
11662 11666 __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
11663 11667 }
11664 11668 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
11665 11669 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11666 11670 %}
11667 11671 ins_pipe( pipe_slow );
11668 11672 %}
11669 11673
11670 11674 // Replicate scalar to packed byte (1 byte) values in xmm
11671 11675 instruct Repl8B_eRegI(regD dst, eRegI src) %{
11672 11676 predicate(UseSSE>=2);
11673 11677 match(Set dst (Replicate8B src));
11674 11678 format %{ "MOVD $dst,$src\n\t"
11675 11679 "PUNPCKLBW $dst,$dst\n\t"
11676 11680 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11677 11681 ins_encode %{
11678 11682 __ movdl($dst$$XMMRegister, $src$$Register);
11679 11683 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
11680 11684 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11681 11685 %}
11682 11686 ins_pipe( pipe_slow );
11683 11687 %}
11684 11688
11685 11689 // Replicate scalar zero to packed byte (1 byte) values in xmm
11686 11690 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11687 11691 predicate(UseSSE>=2);
11688 11692 match(Set dst (Replicate8B zero));
11689 11693 format %{ "PXOR $dst,$dst\t! replicate8B" %}
11690 11694 ins_encode %{
11691 11695 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11692 11696 %}
11693 11697 ins_pipe( fpu_reg_reg );
11694 11698 %}
11695 11699
11696 11700 // Replicate scalar to packed shore (2 byte) values in xmm
11697 11701 instruct Repl4S_reg(regD dst, regD src) %{
11698 11702 predicate(UseSSE>=2);
11699 11703 match(Set dst (Replicate4S src));
11700 11704 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11701 11705 ins_encode %{
11702 11706 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11703 11707 %}
11704 11708 ins_pipe( fpu_reg_reg );
11705 11709 %}
11706 11710
11707 11711 // Replicate scalar to packed shore (2 byte) values in xmm
11708 11712 instruct Repl4S_eRegI(regD dst, eRegI src) %{
11709 11713 predicate(UseSSE>=2);
11710 11714 match(Set dst (Replicate4S src));
11711 11715 format %{ "MOVD $dst,$src\n\t"
11712 11716 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11713 11717 ins_encode %{
11714 11718 __ movdl($dst$$XMMRegister, $src$$Register);
11715 11719 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11716 11720 %}
11717 11721 ins_pipe( fpu_reg_reg );
11718 11722 %}
11719 11723
11720 11724 // Replicate scalar zero to packed short (2 byte) values in xmm
11721 11725 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11722 11726 predicate(UseSSE>=2);
11723 11727 match(Set dst (Replicate4S zero));
11724 11728 format %{ "PXOR $dst,$dst\t! replicate4S" %}
11725 11729 ins_encode %{
11726 11730 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11727 11731 %}
11728 11732 ins_pipe( fpu_reg_reg );
11729 11733 %}
11730 11734
11731 11735 // Replicate scalar to packed char (2 byte) values in xmm
11732 11736 instruct Repl4C_reg(regD dst, regD src) %{
11733 11737 predicate(UseSSE>=2);
11734 11738 match(Set dst (Replicate4C src));
11735 11739 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11736 11740 ins_encode %{
11737 11741 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11738 11742 %}
11739 11743 ins_pipe( fpu_reg_reg );
11740 11744 %}
11741 11745
11742 11746 // Replicate scalar to packed char (2 byte) values in xmm
11743 11747 instruct Repl4C_eRegI(regD dst, eRegI src) %{
11744 11748 predicate(UseSSE>=2);
11745 11749 match(Set dst (Replicate4C src));
11746 11750 format %{ "MOVD $dst,$src\n\t"
11747 11751 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11748 11752 ins_encode %{
11749 11753 __ movdl($dst$$XMMRegister, $src$$Register);
11750 11754 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11751 11755 %}
11752 11756 ins_pipe( fpu_reg_reg );
11753 11757 %}
11754 11758
11755 11759 // Replicate scalar zero to packed char (2 byte) values in xmm
11756 11760 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11757 11761 predicate(UseSSE>=2);
11758 11762 match(Set dst (Replicate4C zero));
11759 11763 format %{ "PXOR $dst,$dst\t! replicate4C" %}
11760 11764 ins_encode %{
11761 11765 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11762 11766 %}
11763 11767 ins_pipe( fpu_reg_reg );
11764 11768 %}
11765 11769
11766 11770 // Replicate scalar to packed integer (4 byte) values in xmm
11767 11771 instruct Repl2I_reg(regD dst, regD src) %{
11768 11772 predicate(UseSSE>=2);
11769 11773 match(Set dst (Replicate2I src));
11770 11774 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11771 11775 ins_encode %{
11772 11776 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11773 11777 %}
11774 11778 ins_pipe( fpu_reg_reg );
11775 11779 %}
11776 11780
11777 11781 // Replicate scalar to packed integer (4 byte) values in xmm
11778 11782 instruct Repl2I_eRegI(regD dst, eRegI src) %{
11779 11783 predicate(UseSSE>=2);
11780 11784 match(Set dst (Replicate2I src));
11781 11785 format %{ "MOVD $dst,$src\n\t"
11782 11786 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11783 11787 ins_encode %{
11784 11788 __ movdl($dst$$XMMRegister, $src$$Register);
11785 11789 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11786 11790 %}
11787 11791 ins_pipe( fpu_reg_reg );
11788 11792 %}
11789 11793
11790 11794 // Replicate scalar zero to packed integer (2 byte) values in xmm
11791 11795 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11792 11796 predicate(UseSSE>=2);
11793 11797 match(Set dst (Replicate2I zero));
11794 11798 format %{ "PXOR $dst,$dst\t! replicate2I" %}
11795 11799 ins_encode %{
11796 11800 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11797 11801 %}
11798 11802 ins_pipe( fpu_reg_reg );
11799 11803 %}
11800 11804
11801 11805 // Replicate scalar to packed single precision floating point values in xmm
11802 11806 instruct Repl2F_reg(regD dst, regD src) %{
11803 11807 predicate(UseSSE>=2);
11804 11808 match(Set dst (Replicate2F src));
11805 11809 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11806 11810 ins_encode %{
11807 11811 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
11808 11812 %}
11809 11813 ins_pipe( fpu_reg_reg );
11810 11814 %}
11811 11815
11812 11816 // Replicate scalar to packed single precision floating point values in xmm
11813 11817 instruct Repl2F_regF(regD dst, regF src) %{
11814 11818 predicate(UseSSE>=2);
11815 11819 match(Set dst (Replicate2F src));
11816 11820 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11817 11821 ins_encode %{
11818 11822 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
11819 11823 %}
11820 11824 ins_pipe( fpu_reg_reg );
11821 11825 %}
11822 11826
11823 11827 // Replicate scalar to packed single precision floating point values in xmm
11824 11828 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11825 11829 predicate(UseSSE>=2);
11826 11830 match(Set dst (Replicate2F zero));
11827 11831 format %{ "PXOR $dst,$dst\t! replicate2F" %}
11828 11832 ins_encode %{
11829 11833 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11830 11834 %}
11831 11835 ins_pipe( fpu_reg_reg );
11832 11836 %}
11833 11837
11834 11838 // =======================================================================
11835 11839 // fast clearing of an array
11836 11840 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11837 11841 match(Set dummy (ClearArray cnt base));
11838 11842 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11839 11843 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
11840 11844 "XOR EAX,EAX\n\t"
11841 11845 "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11842 11846 opcode(0,0x4);
11843 11847 ins_encode( Opcode(0xD1), RegOpc(ECX),
11844 11848 OpcRegReg(0x33,EAX,EAX),
11845 11849 Opcode(0xF3), Opcode(0xAB) );
11846 11850 ins_pipe( pipe_slow );
11847 11851 %}
11848 11852
11849 11853 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11850 11854 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11851 11855 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11852 11856 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11853 11857
11854 11858 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11855 11859 ins_encode %{
11856 11860 __ string_compare($str1$$Register, $str2$$Register,
11857 11861 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11858 11862 $tmp1$$XMMRegister);
11859 11863 %}
11860 11864 ins_pipe( pipe_slow );
11861 11865 %}
11862 11866
11863 11867 // fast string equals
11864 11868 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11865 11869 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11866 11870 match(Set result (StrEquals (Binary str1 str2) cnt));
11867 11871 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11868 11872
11869 11873 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11870 11874 ins_encode %{
11871 11875 __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11872 11876 $cnt$$Register, $result$$Register, $tmp3$$Register,
11873 11877 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11874 11878 %}
11875 11879 ins_pipe( pipe_slow );
11876 11880 %}
11877 11881
11878 11882 // fast search of substring with known size.
11879 11883 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11880 11884 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11881 11885 predicate(UseSSE42Intrinsics);
11882 11886 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11883 11887 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11884 11888
11885 11889 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %}
11886 11890 ins_encode %{
11887 11891 int icnt2 = (int)$int_cnt2$$constant;
11888 11892 if (icnt2 >= 8) {
11889 11893 // IndexOf for constant substrings with size >= 8 elements
11890 11894 // which don't need to be loaded through stack.
11891 11895 __ string_indexofC8($str1$$Register, $str2$$Register,
11892 11896 $cnt1$$Register, $cnt2$$Register,
11893 11897 icnt2, $result$$Register,
11894 11898 $vec$$XMMRegister, $tmp$$Register);
11895 11899 } else {
11896 11900 // Small strings are loaded through stack if they cross page boundary.
11897 11901 __ string_indexof($str1$$Register, $str2$$Register,
11898 11902 $cnt1$$Register, $cnt2$$Register,
11899 11903 icnt2, $result$$Register,
11900 11904 $vec$$XMMRegister, $tmp$$Register);
11901 11905 }
11902 11906 %}
11903 11907 ins_pipe( pipe_slow );
11904 11908 %}
11905 11909
11906 11910 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11907 11911 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11908 11912 predicate(UseSSE42Intrinsics);
11909 11913 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11910 11914 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11911 11915
11912 11916 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11913 11917 ins_encode %{
11914 11918 __ string_indexof($str1$$Register, $str2$$Register,
11915 11919 $cnt1$$Register, $cnt2$$Register,
11916 11920 (-1), $result$$Register,
11917 11921 $vec$$XMMRegister, $tmp$$Register);
11918 11922 %}
11919 11923 ins_pipe( pipe_slow );
11920 11924 %}
11921 11925
11922 11926 // fast array equals
11923 11927 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11924 11928 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11925 11929 %{
11926 11930 match(Set result (AryEq ary1 ary2));
11927 11931 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11928 11932 //ins_cost(300);
11929 11933
11930 11934 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11931 11935 ins_encode %{
11932 11936 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11933 11937 $tmp3$$Register, $result$$Register, $tmp4$$Register,
11934 11938 $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11935 11939 %}
11936 11940 ins_pipe( pipe_slow );
11937 11941 %}
11938 11942
11939 11943 //----------Control Flow Instructions------------------------------------------
11940 11944 // Signed compare Instructions
11941 11945 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
11942 11946 match(Set cr (CmpI op1 op2));
11943 11947 effect( DEF cr, USE op1, USE op2 );
11944 11948 format %{ "CMP $op1,$op2" %}
11945 11949 opcode(0x3B); /* Opcode 3B /r */
11946 11950 ins_encode( OpcP, RegReg( op1, op2) );
11947 11951 ins_pipe( ialu_cr_reg_reg );
11948 11952 %}
11949 11953
11950 11954 instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
11951 11955 match(Set cr (CmpI op1 op2));
11952 11956 effect( DEF cr, USE op1 );
11953 11957 format %{ "CMP $op1,$op2" %}
11954 11958 opcode(0x81,0x07); /* Opcode 81 /7 */
11955 11959 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
11956 11960 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11957 11961 ins_pipe( ialu_cr_reg_imm );
11958 11962 %}
11959 11963
11960 11964 // Cisc-spilled version of cmpI_eReg
11961 11965 instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
11962 11966 match(Set cr (CmpI op1 (LoadI op2)));
11963 11967
11964 11968 format %{ "CMP $op1,$op2" %}
11965 11969 ins_cost(500);
11966 11970 opcode(0x3B); /* Opcode 3B /r */
11967 11971 ins_encode( OpcP, RegMem( op1, op2) );
11968 11972 ins_pipe( ialu_cr_reg_mem );
11969 11973 %}
11970 11974
11971 11975 instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
11972 11976 match(Set cr (CmpI src zero));
11973 11977 effect( DEF cr, USE src );
11974 11978
11975 11979 format %{ "TEST $src,$src" %}
11976 11980 opcode(0x85);
11977 11981 ins_encode( OpcP, RegReg( src, src ) );
11978 11982 ins_pipe( ialu_cr_reg_imm );
11979 11983 %}
11980 11984
11981 11985 instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
11982 11986 match(Set cr (CmpI (AndI src con) zero));
11983 11987
11984 11988 format %{ "TEST $src,$con" %}
11985 11989 opcode(0xF7,0x00);
11986 11990 ins_encode( OpcP, RegOpc(src), Con32(con) );
11987 11991 ins_pipe( ialu_cr_reg_imm );
11988 11992 %}
11989 11993
11990 11994 instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
11991 11995 match(Set cr (CmpI (AndI src mem) zero));
11992 11996
11993 11997 format %{ "TEST $src,$mem" %}
11994 11998 opcode(0x85);
11995 11999 ins_encode( OpcP, RegMem( src, mem ) );
11996 12000 ins_pipe( ialu_cr_reg_mem );
11997 12001 %}
11998 12002
11999 12003 // Unsigned compare Instructions; really, same as signed except they
12000 12004 // produce an eFlagsRegU instead of eFlagsReg.
12001 12005 instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12002 12006 match(Set cr (CmpU op1 op2));
12003 12007
12004 12008 format %{ "CMPu $op1,$op2" %}
12005 12009 opcode(0x3B); /* Opcode 3B /r */
12006 12010 ins_encode( OpcP, RegReg( op1, op2) );
12007 12011 ins_pipe( ialu_cr_reg_reg );
12008 12012 %}
12009 12013
12010 12014 instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12011 12015 match(Set cr (CmpU op1 op2));
12012 12016
12013 12017 format %{ "CMPu $op1,$op2" %}
12014 12018 opcode(0x81,0x07); /* Opcode 81 /7 */
12015 12019 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12016 12020 ins_pipe( ialu_cr_reg_imm );
12017 12021 %}
12018 12022
12019 12023 // // Cisc-spilled version of cmpU_eReg
12020 12024 instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12021 12025 match(Set cr (CmpU op1 (LoadI op2)));
12022 12026
12023 12027 format %{ "CMPu $op1,$op2" %}
12024 12028 ins_cost(500);
12025 12029 opcode(0x3B); /* Opcode 3B /r */
12026 12030 ins_encode( OpcP, RegMem( op1, op2) );
12027 12031 ins_pipe( ialu_cr_reg_mem );
12028 12032 %}
12029 12033
12030 12034 // // Cisc-spilled version of cmpU_eReg
12031 12035 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12032 12036 // match(Set cr (CmpU (LoadI op1) op2));
12033 12037 //
12034 12038 // format %{ "CMPu $op1,$op2" %}
12035 12039 // ins_cost(500);
12036 12040 // opcode(0x39); /* Opcode 39 /r */
12037 12041 // ins_encode( OpcP, RegMem( op1, op2) );
12038 12042 //%}
12039 12043
12040 12044 instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12041 12045 match(Set cr (CmpU src zero));
12042 12046
12043 12047 format %{ "TESTu $src,$src" %}
12044 12048 opcode(0x85);
12045 12049 ins_encode( OpcP, RegReg( src, src ) );
12046 12050 ins_pipe( ialu_cr_reg_imm );
12047 12051 %}
12048 12052
12049 12053 // Unsigned pointer compare Instructions
12050 12054 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12051 12055 match(Set cr (CmpP op1 op2));
12052 12056
12053 12057 format %{ "CMPu $op1,$op2" %}
12054 12058 opcode(0x3B); /* Opcode 3B /r */
12055 12059 ins_encode( OpcP, RegReg( op1, op2) );
12056 12060 ins_pipe( ialu_cr_reg_reg );
12057 12061 %}
12058 12062
12059 12063 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12060 12064 match(Set cr (CmpP op1 op2));
12061 12065
12062 12066 format %{ "CMPu $op1,$op2" %}
12063 12067 opcode(0x81,0x07); /* Opcode 81 /7 */
12064 12068 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12065 12069 ins_pipe( ialu_cr_reg_imm );
12066 12070 %}
12067 12071
12068 12072 // // Cisc-spilled version of cmpP_eReg
12069 12073 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12070 12074 match(Set cr (CmpP op1 (LoadP op2)));
12071 12075
12072 12076 format %{ "CMPu $op1,$op2" %}
12073 12077 ins_cost(500);
12074 12078 opcode(0x3B); /* Opcode 3B /r */
12075 12079 ins_encode( OpcP, RegMem( op1, op2) );
12076 12080 ins_pipe( ialu_cr_reg_mem );
12077 12081 %}
12078 12082
12079 12083 // // Cisc-spilled version of cmpP_eReg
12080 12084 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12081 12085 // match(Set cr (CmpP (LoadP op1) op2));
12082 12086 //
12083 12087 // format %{ "CMPu $op1,$op2" %}
12084 12088 // ins_cost(500);
12085 12089 // opcode(0x39); /* Opcode 39 /r */
12086 12090 // ins_encode( OpcP, RegMem( op1, op2) );
12087 12091 //%}
12088 12092
12089 12093 // Compare raw pointer (used in out-of-heap check).
12090 12094 // Only works because non-oop pointers must be raw pointers
12091 12095 // and raw pointers have no anti-dependencies.
12092 12096 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12093 12097 predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12094 12098 match(Set cr (CmpP op1 (LoadP op2)));
12095 12099
12096 12100 format %{ "CMPu $op1,$op2" %}
12097 12101 opcode(0x3B); /* Opcode 3B /r */
12098 12102 ins_encode( OpcP, RegMem( op1, op2) );
12099 12103 ins_pipe( ialu_cr_reg_mem );
12100 12104 %}
12101 12105
12102 12106 //
12103 12107 // This will generate a signed flags result. This should be ok
12104 12108 // since any compare to a zero should be eq/neq.
12105 12109 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12106 12110 match(Set cr (CmpP src zero));
12107 12111
12108 12112 format %{ "TEST $src,$src" %}
12109 12113 opcode(0x85);
12110 12114 ins_encode( OpcP, RegReg( src, src ) );
12111 12115 ins_pipe( ialu_cr_reg_imm );
12112 12116 %}
12113 12117
12114 12118 // Cisc-spilled version of testP_reg
12115 12119 // This will generate a signed flags result. This should be ok
12116 12120 // since any compare to a zero should be eq/neq.
12117 12121 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12118 12122 match(Set cr (CmpP (LoadP op) zero));
12119 12123
12120 12124 format %{ "TEST $op,0xFFFFFFFF" %}
12121 12125 ins_cost(500);
12122 12126 opcode(0xF7); /* Opcode F7 /0 */
12123 12127 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12124 12128 ins_pipe( ialu_cr_reg_imm );
12125 12129 %}
12126 12130
12127 12131 // Yanked all unsigned pointer compare operations.
12128 12132 // Pointer compares are done with CmpP which is already unsigned.
12129 12133
12130 12134 //----------Max and Min--------------------------------------------------------
12131 12135 // Min Instructions
12132 12136 ////
12133 12137 // *** Min and Max using the conditional move are slower than the
12134 12138 // *** branch version on a Pentium III.
12135 12139 // // Conditional move for min
12136 12140 //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12137 12141 // effect( USE_DEF op2, USE op1, USE cr );
12138 12142 // format %{ "CMOVlt $op2,$op1\t! min" %}
12139 12143 // opcode(0x4C,0x0F);
12140 12144 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12141 12145 // ins_pipe( pipe_cmov_reg );
12142 12146 //%}
12143 12147 //
12144 12148 //// Min Register with Register (P6 version)
12145 12149 //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12146 12150 // predicate(VM_Version::supports_cmov() );
12147 12151 // match(Set op2 (MinI op1 op2));
12148 12152 // ins_cost(200);
12149 12153 // expand %{
12150 12154 // eFlagsReg cr;
12151 12155 // compI_eReg(cr,op1,op2);
12152 12156 // cmovI_reg_lt(op2,op1,cr);
12153 12157 // %}
12154 12158 //%}
12155 12159
12156 12160 // Min Register with Register (generic version)
12157 12161 instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12158 12162 match(Set dst (MinI dst src));
12159 12163 effect(KILL flags);
12160 12164 ins_cost(300);
12161 12165
12162 12166 format %{ "MIN $dst,$src" %}
12163 12167 opcode(0xCC);
12164 12168 ins_encode( min_enc(dst,src) );
12165 12169 ins_pipe( pipe_slow );
12166 12170 %}
12167 12171
12168 12172 // Max Register with Register
12169 12173 // *** Min and Max using the conditional move are slower than the
12170 12174 // *** branch version on a Pentium III.
12171 12175 // // Conditional move for max
12172 12176 //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12173 12177 // effect( USE_DEF op2, USE op1, USE cr );
12174 12178 // format %{ "CMOVgt $op2,$op1\t! max" %}
12175 12179 // opcode(0x4F,0x0F);
12176 12180 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12177 12181 // ins_pipe( pipe_cmov_reg );
12178 12182 //%}
12179 12183 //
12180 12184 // // Max Register with Register (P6 version)
12181 12185 //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12182 12186 // predicate(VM_Version::supports_cmov() );
12183 12187 // match(Set op2 (MaxI op1 op2));
12184 12188 // ins_cost(200);
12185 12189 // expand %{
12186 12190 // eFlagsReg cr;
12187 12191 // compI_eReg(cr,op1,op2);
12188 12192 // cmovI_reg_gt(op2,op1,cr);
12189 12193 // %}
12190 12194 //%}
12191 12195
12192 12196 // Max Register with Register (generic version)
12193 12197 instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12194 12198 match(Set dst (MaxI dst src));
12195 12199 effect(KILL flags);
12196 12200 ins_cost(300);
12197 12201
12198 12202 format %{ "MAX $dst,$src" %}
12199 12203 opcode(0xCC);
12200 12204 ins_encode( max_enc(dst,src) );
12201 12205 ins_pipe( pipe_slow );
12202 12206 %}
12203 12207
12204 12208 // ============================================================================
12205 12209 // Counted Loop limit node which represents exact final iterator value.
12206 12210 // Note: the resulting value should fit into integer range since
12207 12211 // counted loops have limit check on overflow.
12208 12212 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12209 12213 match(Set limit (LoopLimit (Binary init limit) stride));
12210 12214 effect(TEMP limit_hi, TEMP tmp, KILL flags);
12211 12215 ins_cost(300);
12212 12216
12213 12217 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12214 12218 ins_encode %{
12215 12219 int strd = (int)$stride$$constant;
12216 12220 assert(strd != 1 && strd != -1, "sanity");
12217 12221 int m1 = (strd > 0) ? 1 : -1;
12218 12222 // Convert limit to long (EAX:EDX)
12219 12223 __ cdql();
12220 12224 // Convert init to long (init:tmp)
12221 12225 __ movl($tmp$$Register, $init$$Register);
12222 12226 __ sarl($tmp$$Register, 31);
12223 12227 // $limit - $init
12224 12228 __ subl($limit$$Register, $init$$Register);
12225 12229 __ sbbl($limit_hi$$Register, $tmp$$Register);
12226 12230 // + ($stride - 1)
12227 12231 if (strd > 0) {
12228 12232 __ addl($limit$$Register, (strd - 1));
12229 12233 __ adcl($limit_hi$$Register, 0);
12230 12234 __ movl($tmp$$Register, strd);
12231 12235 } else {
12232 12236 __ addl($limit$$Register, (strd + 1));
12233 12237 __ adcl($limit_hi$$Register, -1);
12234 12238 __ lneg($limit_hi$$Register, $limit$$Register);
12235 12239 __ movl($tmp$$Register, -strd);
12236 12240 }
12237 12241 // signed devision: (EAX:EDX) / pos_stride
12238 12242 __ idivl($tmp$$Register);
12239 12243 if (strd < 0) {
12240 12244 // restore sign
12241 12245 __ negl($tmp$$Register);
12242 12246 }
12243 12247 // (EAX) * stride
12244 12248 __ mull($tmp$$Register);
12245 12249 // + init (ignore upper bits)
12246 12250 __ addl($limit$$Register, $init$$Register);
12247 12251 %}
12248 12252 ins_pipe( pipe_slow );
12249 12253 %}
12250 12254
12251 12255 // ============================================================================
12252 12256 // Branch Instructions
12253 12257 // Jump Table
12254 12258 instruct jumpXtnd(eRegI switch_val) %{
12255 12259 match(Jump switch_val);
12256 12260 ins_cost(350);
12257 12261 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %}
12258 12262 ins_encode %{
12259 12263 // Jump to Address(table_base + switch_reg)
12260 12264 Address index(noreg, $switch_val$$Register, Address::times_1);
12261 12265 __ jump(ArrayAddress($constantaddress, index));
12262 12266 %}
12263 12267 ins_pipe(pipe_jmp);
12264 12268 %}
12265 12269
12266 12270 // Jump Direct - Label defines a relative address from JMP+1
12267 12271 instruct jmpDir(label labl) %{
12268 12272 match(Goto);
12269 12273 effect(USE labl);
12270 12274
12271 12275 ins_cost(300);
12272 12276 format %{ "JMP $labl" %}
12273 12277 size(5);
12274 12278 ins_encode %{
12275 12279 Label* L = $labl$$label;
12276 12280 __ jmp(*L, false); // Always long jump
12277 12281 %}
12278 12282 ins_pipe( pipe_jmp );
12279 12283 %}
12280 12284
12281 12285 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12282 12286 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12283 12287 match(If cop cr);
12284 12288 effect(USE labl);
12285 12289
12286 12290 ins_cost(300);
12287 12291 format %{ "J$cop $labl" %}
12288 12292 size(6);
12289 12293 ins_encode %{
12290 12294 Label* L = $labl$$label;
12291 12295 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12292 12296 %}
12293 12297 ins_pipe( pipe_jcc );
12294 12298 %}
12295 12299
12296 12300 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12297 12301 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12298 12302 match(CountedLoopEnd cop cr);
12299 12303 effect(USE labl);
12300 12304
12301 12305 ins_cost(300);
12302 12306 format %{ "J$cop $labl\t# Loop end" %}
12303 12307 size(6);
12304 12308 ins_encode %{
12305 12309 Label* L = $labl$$label;
12306 12310 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12307 12311 %}
12308 12312 ins_pipe( pipe_jcc );
12309 12313 %}
12310 12314
12311 12315 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12312 12316 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12313 12317 match(CountedLoopEnd cop cmp);
12314 12318 effect(USE labl);
12315 12319
12316 12320 ins_cost(300);
12317 12321 format %{ "J$cop,u $labl\t# Loop end" %}
12318 12322 size(6);
12319 12323 ins_encode %{
12320 12324 Label* L = $labl$$label;
12321 12325 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12322 12326 %}
12323 12327 ins_pipe( pipe_jcc );
12324 12328 %}
12325 12329
12326 12330 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12327 12331 match(CountedLoopEnd cop cmp);
12328 12332 effect(USE labl);
12329 12333
12330 12334 ins_cost(200);
12331 12335 format %{ "J$cop,u $labl\t# Loop end" %}
12332 12336 size(6);
12333 12337 ins_encode %{
12334 12338 Label* L = $labl$$label;
12335 12339 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12336 12340 %}
12337 12341 ins_pipe( pipe_jcc );
12338 12342 %}
12339 12343
12340 12344 // Jump Direct Conditional - using unsigned comparison
12341 12345 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12342 12346 match(If cop cmp);
12343 12347 effect(USE labl);
12344 12348
12345 12349 ins_cost(300);
12346 12350 format %{ "J$cop,u $labl" %}
12347 12351 size(6);
12348 12352 ins_encode %{
12349 12353 Label* L = $labl$$label;
12350 12354 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12351 12355 %}
12352 12356 ins_pipe(pipe_jcc);
12353 12357 %}
12354 12358
12355 12359 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12356 12360 match(If cop cmp);
12357 12361 effect(USE labl);
12358 12362
12359 12363 ins_cost(200);
12360 12364 format %{ "J$cop,u $labl" %}
12361 12365 size(6);
12362 12366 ins_encode %{
12363 12367 Label* L = $labl$$label;
12364 12368 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12365 12369 %}
12366 12370 ins_pipe(pipe_jcc);
12367 12371 %}
12368 12372
12369 12373 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12370 12374 match(If cop cmp);
12371 12375 effect(USE labl);
12372 12376
12373 12377 ins_cost(200);
12374 12378 format %{ $$template
12375 12379 if ($cop$$cmpcode == Assembler::notEqual) {
12376 12380 $$emit$$"JP,u $labl\n\t"
12377 12381 $$emit$$"J$cop,u $labl"
12378 12382 } else {
12379 12383 $$emit$$"JP,u done\n\t"
12380 12384 $$emit$$"J$cop,u $labl\n\t"
12381 12385 $$emit$$"done:"
12382 12386 }
12383 12387 %}
12384 12388 ins_encode %{
12385 12389 Label* l = $labl$$label;
12386 12390 if ($cop$$cmpcode == Assembler::notEqual) {
12387 12391 __ jcc(Assembler::parity, *l, false);
12388 12392 __ jcc(Assembler::notEqual, *l, false);
12389 12393 } else if ($cop$$cmpcode == Assembler::equal) {
12390 12394 Label done;
12391 12395 __ jccb(Assembler::parity, done);
12392 12396 __ jcc(Assembler::equal, *l, false);
12393 12397 __ bind(done);
12394 12398 } else {
12395 12399 ShouldNotReachHere();
12396 12400 }
12397 12401 %}
12398 12402 ins_pipe(pipe_jcc);
12399 12403 %}
12400 12404
12401 12405 // ============================================================================
12402 12406 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12403 12407 // array for an instance of the superklass. Set a hidden internal cache on a
12404 12408 // hit (cache is checked with exposed code in gen_subtype_check()). Return
12405 12409 // NZ for a miss or zero for a hit. The encoding ALSO sets flags.
12406 12410 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12407 12411 match(Set result (PartialSubtypeCheck sub super));
12408 12412 effect( KILL rcx, KILL cr );
12409 12413
12410 12414 ins_cost(1100); // slightly larger than the next version
12411 12415 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12412 12416 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
12413 12417 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12414 12418 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12415 12419 "JNE,s miss\t\t# Missed: EDI not-zero\n\t"
12416 12420 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12417 12421 "XOR $result,$result\t\t Hit: EDI zero\n\t"
12418 12422 "miss:\t" %}
12419 12423
12420 12424 opcode(0x1); // Force a XOR of EDI
12421 12425 ins_encode( enc_PartialSubtypeCheck() );
12422 12426 ins_pipe( pipe_slow );
12423 12427 %}
12424 12428
12425 12429 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12426 12430 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12427 12431 effect( KILL rcx, KILL result );
12428 12432
12429 12433 ins_cost(1000);
12430 12434 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12431 12435 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
12432 12436 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12433 12437 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12434 12438 "JNE,s miss\t\t# Missed: flags NZ\n\t"
12435 12439 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12436 12440 "miss:\t" %}
12437 12441
12438 12442 opcode(0x0); // No need to XOR EDI
12439 12443 ins_encode( enc_PartialSubtypeCheck() );
12440 12444 ins_pipe( pipe_slow );
12441 12445 %}
12442 12446
12443 12447 // ============================================================================
12444 12448 // Branch Instructions -- short offset versions
12445 12449 //
12446 12450 // These instructions are used to replace jumps of a long offset (the default
12447 12451 // match) with jumps of a shorter offset. These instructions are all tagged
12448 12452 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12449 12453 // match rules in general matching. Instead, the ADLC generates a conversion
12450 12454 // method in the MachNode which can be used to do in-place replacement of the
12451 12455 // long variant with the shorter variant. The compiler will determine if a
12452 12456 // branch can be taken by the is_short_branch_offset() predicate in the machine
12453 12457 // specific code section of the file.
12454 12458
12455 12459 // Jump Direct - Label defines a relative address from JMP+1
12456 12460 instruct jmpDir_short(label labl) %{
12457 12461 match(Goto);
12458 12462 effect(USE labl);
12459 12463
12460 12464 ins_cost(300);
12461 12465 format %{ "JMP,s $labl" %}
12462 12466 size(2);
12463 12467 ins_encode %{
12464 12468 Label* L = $labl$$label;
12465 12469 __ jmpb(*L);
12466 12470 %}
12467 12471 ins_pipe( pipe_jmp );
12468 12472 ins_short_branch(1);
12469 12473 %}
12470 12474
12471 12475 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12472 12476 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12473 12477 match(If cop cr);
12474 12478 effect(USE labl);
12475 12479
12476 12480 ins_cost(300);
12477 12481 format %{ "J$cop,s $labl" %}
12478 12482 size(2);
12479 12483 ins_encode %{
12480 12484 Label* L = $labl$$label;
12481 12485 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12482 12486 %}
12483 12487 ins_pipe( pipe_jcc );
12484 12488 ins_short_branch(1);
12485 12489 %}
12486 12490
12487 12491 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12488 12492 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12489 12493 match(CountedLoopEnd cop cr);
12490 12494 effect(USE labl);
12491 12495
12492 12496 ins_cost(300);
12493 12497 format %{ "J$cop,s $labl\t# Loop end" %}
12494 12498 size(2);
12495 12499 ins_encode %{
12496 12500 Label* L = $labl$$label;
12497 12501 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12498 12502 %}
12499 12503 ins_pipe( pipe_jcc );
12500 12504 ins_short_branch(1);
12501 12505 %}
12502 12506
12503 12507 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12504 12508 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12505 12509 match(CountedLoopEnd cop cmp);
12506 12510 effect(USE labl);
12507 12511
12508 12512 ins_cost(300);
12509 12513 format %{ "J$cop,us $labl\t# Loop end" %}
12510 12514 size(2);
12511 12515 ins_encode %{
12512 12516 Label* L = $labl$$label;
12513 12517 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12514 12518 %}
12515 12519 ins_pipe( pipe_jcc );
12516 12520 ins_short_branch(1);
12517 12521 %}
12518 12522
12519 12523 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12520 12524 match(CountedLoopEnd cop cmp);
12521 12525 effect(USE labl);
12522 12526
12523 12527 ins_cost(300);
12524 12528 format %{ "J$cop,us $labl\t# Loop end" %}
12525 12529 size(2);
12526 12530 ins_encode %{
12527 12531 Label* L = $labl$$label;
12528 12532 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12529 12533 %}
12530 12534 ins_pipe( pipe_jcc );
12531 12535 ins_short_branch(1);
12532 12536 %}
12533 12537
12534 12538 // Jump Direct Conditional - using unsigned comparison
12535 12539 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12536 12540 match(If cop cmp);
12537 12541 effect(USE labl);
12538 12542
12539 12543 ins_cost(300);
12540 12544 format %{ "J$cop,us $labl" %}
12541 12545 size(2);
12542 12546 ins_encode %{
12543 12547 Label* L = $labl$$label;
12544 12548 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12545 12549 %}
12546 12550 ins_pipe( pipe_jcc );
12547 12551 ins_short_branch(1);
12548 12552 %}
12549 12553
12550 12554 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12551 12555 match(If cop cmp);
12552 12556 effect(USE labl);
12553 12557
12554 12558 ins_cost(300);
12555 12559 format %{ "J$cop,us $labl" %}
12556 12560 size(2);
12557 12561 ins_encode %{
12558 12562 Label* L = $labl$$label;
12559 12563 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12560 12564 %}
12561 12565 ins_pipe( pipe_jcc );
12562 12566 ins_short_branch(1);
12563 12567 %}
12564 12568
12565 12569 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12566 12570 match(If cop cmp);
12567 12571 effect(USE labl);
12568 12572
12569 12573 ins_cost(300);
12570 12574 format %{ $$template
12571 12575 if ($cop$$cmpcode == Assembler::notEqual) {
12572 12576 $$emit$$"JP,u,s $labl\n\t"
12573 12577 $$emit$$"J$cop,u,s $labl"
12574 12578 } else {
12575 12579 $$emit$$"JP,u,s done\n\t"
12576 12580 $$emit$$"J$cop,u,s $labl\n\t"
12577 12581 $$emit$$"done:"
12578 12582 }
12579 12583 %}
12580 12584 size(4);
12581 12585 ins_encode %{
12582 12586 Label* l = $labl$$label;
12583 12587 if ($cop$$cmpcode == Assembler::notEqual) {
12584 12588 __ jccb(Assembler::parity, *l);
12585 12589 __ jccb(Assembler::notEqual, *l);
12586 12590 } else if ($cop$$cmpcode == Assembler::equal) {
12587 12591 Label done;
12588 12592 __ jccb(Assembler::parity, done);
12589 12593 __ jccb(Assembler::equal, *l);
12590 12594 __ bind(done);
12591 12595 } else {
12592 12596 ShouldNotReachHere();
12593 12597 }
12594 12598 %}
12595 12599 ins_pipe(pipe_jcc);
12596 12600 ins_short_branch(1);
12597 12601 %}
12598 12602
12599 12603 // ============================================================================
12600 12604 // Long Compare
12601 12605 //
12602 12606 // Currently we hold longs in 2 registers. Comparing such values efficiently
12603 12607 // is tricky. The flavor of compare used depends on whether we are testing
12604 12608 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit.
12605 12609 // The GE test is the negated LT test. The LE test can be had by commuting
12606 12610 // the operands (yielding a GE test) and then negating; negate again for the
12607 12611 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the
12608 12612 // NE test is negated from that.
12609 12613
12610 12614 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12611 12615 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
12612 12616 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections
12613 12617 // are collapsed internally in the ADLC's dfa-gen code. The match for
12614 12618 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12615 12619 // foo match ends up with the wrong leaf. One fix is to not match both
12616 12620 // reg-reg and reg-zero forms of long-compare. This is unfortunate because
12617 12621 // both forms beat the trinary form of long-compare and both are very useful
12618 12622 // on Intel which has so few registers.
12619 12623
12620 12624 // Manifest a CmpL result in an integer register. Very painful.
12621 12625 // This is the test to avoid.
12622 12626 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12623 12627 match(Set dst (CmpL3 src1 src2));
12624 12628 effect( KILL flags );
12625 12629 ins_cost(1000);
12626 12630 format %{ "XOR $dst,$dst\n\t"
12627 12631 "CMP $src1.hi,$src2.hi\n\t"
12628 12632 "JLT,s m_one\n\t"
12629 12633 "JGT,s p_one\n\t"
12630 12634 "CMP $src1.lo,$src2.lo\n\t"
12631 12635 "JB,s m_one\n\t"
12632 12636 "JEQ,s done\n"
12633 12637 "p_one:\tINC $dst\n\t"
12634 12638 "JMP,s done\n"
12635 12639 "m_one:\tDEC $dst\n"
12636 12640 "done:" %}
12637 12641 ins_encode %{
12638 12642 Label p_one, m_one, done;
12639 12643 __ xorptr($dst$$Register, $dst$$Register);
12640 12644 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12641 12645 __ jccb(Assembler::less, m_one);
12642 12646 __ jccb(Assembler::greater, p_one);
12643 12647 __ cmpl($src1$$Register, $src2$$Register);
12644 12648 __ jccb(Assembler::below, m_one);
12645 12649 __ jccb(Assembler::equal, done);
12646 12650 __ bind(p_one);
12647 12651 __ incrementl($dst$$Register);
12648 12652 __ jmpb(done);
12649 12653 __ bind(m_one);
12650 12654 __ decrementl($dst$$Register);
12651 12655 __ bind(done);
12652 12656 %}
12653 12657 ins_pipe( pipe_slow );
12654 12658 %}
12655 12659
12656 12660 //======
12657 12661 // Manifest a CmpL result in the normal flags. Only good for LT or GE
12658 12662 // compares. Can be used for LE or GT compares by reversing arguments.
12659 12663 // NOT GOOD FOR EQ/NE tests.
12660 12664 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12661 12665 match( Set flags (CmpL src zero ));
12662 12666 ins_cost(100);
12663 12667 format %{ "TEST $src.hi,$src.hi" %}
12664 12668 opcode(0x85);
12665 12669 ins_encode( OpcP, RegReg_Hi2( src, src ) );
12666 12670 ins_pipe( ialu_cr_reg_reg );
12667 12671 %}
12668 12672
12669 12673 // Manifest a CmpL result in the normal flags. Only good for LT or GE
12670 12674 // compares. Can be used for LE or GT compares by reversing arguments.
12671 12675 // NOT GOOD FOR EQ/NE tests.
12672 12676 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
12673 12677 match( Set flags (CmpL src1 src2 ));
12674 12678 effect( TEMP tmp );
12675 12679 ins_cost(300);
12676 12680 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12677 12681 "MOV $tmp,$src1.hi\n\t"
12678 12682 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %}
12679 12683 ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12680 12684 ins_pipe( ialu_cr_reg_reg );
12681 12685 %}
12682 12686
12683 12687 // Long compares reg < zero/req OR reg >= zero/req.
12684 12688 // Just a wrapper for a normal branch, plus the predicate test.
12685 12689 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12686 12690 match(If cmp flags);
12687 12691 effect(USE labl);
12688 12692 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12689 12693 expand %{
12690 12694 jmpCon(cmp,flags,labl); // JLT or JGE...
12691 12695 %}
12692 12696 %}
12693 12697
12694 12698 // Compare 2 longs and CMOVE longs.
12695 12699 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12696 12700 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12697 12701 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12698 12702 ins_cost(400);
12699 12703 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12700 12704 "CMOV$cmp $dst.hi,$src.hi" %}
12701 12705 opcode(0x0F,0x40);
12702 12706 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12703 12707 ins_pipe( pipe_cmov_reg_long );
12704 12708 %}
12705 12709
12706 12710 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12707 12711 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12708 12712 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12709 12713 ins_cost(500);
12710 12714 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12711 12715 "CMOV$cmp $dst.hi,$src.hi" %}
12712 12716 opcode(0x0F,0x40);
12713 12717 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12714 12718 ins_pipe( pipe_cmov_reg_long );
12715 12719 %}
12716 12720
12717 12721 // Compare 2 longs and CMOVE ints.
12718 12722 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
12719 12723 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12720 12724 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12721 12725 ins_cost(200);
12722 12726 format %{ "CMOV$cmp $dst,$src" %}
12723 12727 opcode(0x0F,0x40);
12724 12728 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12725 12729 ins_pipe( pipe_cmov_reg );
12726 12730 %}
12727 12731
12728 12732 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
12729 12733 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12730 12734 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12731 12735 ins_cost(250);
12732 12736 format %{ "CMOV$cmp $dst,$src" %}
12733 12737 opcode(0x0F,0x40);
12734 12738 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12735 12739 ins_pipe( pipe_cmov_mem );
12736 12740 %}
12737 12741
12738 12742 // Compare 2 longs and CMOVE ints.
12739 12743 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12740 12744 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12741 12745 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12742 12746 ins_cost(200);
12743 12747 format %{ "CMOV$cmp $dst,$src" %}
12744 12748 opcode(0x0F,0x40);
12745 12749 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12746 12750 ins_pipe( pipe_cmov_reg );
12747 12751 %}
12748 12752
12749 12753 // Compare 2 longs and CMOVE doubles
12750 12754 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12751 12755 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12752 12756 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12753 12757 ins_cost(200);
12754 12758 expand %{
12755 12759 fcmovDPR_regS(cmp,flags,dst,src);
12756 12760 %}
12757 12761 %}
12758 12762
12759 12763 // Compare 2 longs and CMOVE doubles
12760 12764 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12761 12765 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12762 12766 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12763 12767 ins_cost(200);
12764 12768 expand %{
12765 12769 fcmovD_regS(cmp,flags,dst,src);
12766 12770 %}
12767 12771 %}
12768 12772
12769 12773 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12770 12774 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12771 12775 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12772 12776 ins_cost(200);
12773 12777 expand %{
12774 12778 fcmovFPR_regS(cmp,flags,dst,src);
12775 12779 %}
12776 12780 %}
12777 12781
12778 12782 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12779 12783 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12780 12784 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12781 12785 ins_cost(200);
12782 12786 expand %{
12783 12787 fcmovF_regS(cmp,flags,dst,src);
12784 12788 %}
12785 12789 %}
12786 12790
12787 12791 //======
12788 12792 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
12789 12793 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
12790 12794 match( Set flags (CmpL src zero ));
12791 12795 effect(TEMP tmp);
12792 12796 ins_cost(200);
12793 12797 format %{ "MOV $tmp,$src.lo\n\t"
12794 12798 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12795 12799 ins_encode( long_cmp_flags0( src, tmp ) );
12796 12800 ins_pipe( ialu_reg_reg_long );
12797 12801 %}
12798 12802
12799 12803 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
12800 12804 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12801 12805 match( Set flags (CmpL src1 src2 ));
12802 12806 ins_cost(200+300);
12803 12807 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12804 12808 "JNE,s skip\n\t"
12805 12809 "CMP $src1.hi,$src2.hi\n\t"
12806 12810 "skip:\t" %}
12807 12811 ins_encode( long_cmp_flags1( src1, src2 ) );
12808 12812 ins_pipe( ialu_cr_reg_reg );
12809 12813 %}
12810 12814
12811 12815 // Long compare reg == zero/reg OR reg != zero/reg
12812 12816 // Just a wrapper for a normal branch, plus the predicate test.
12813 12817 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12814 12818 match(If cmp flags);
12815 12819 effect(USE labl);
12816 12820 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12817 12821 expand %{
12818 12822 jmpCon(cmp,flags,labl); // JEQ or JNE...
12819 12823 %}
12820 12824 %}
12821 12825
12822 12826 // Compare 2 longs and CMOVE longs.
12823 12827 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12824 12828 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12825 12829 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12826 12830 ins_cost(400);
12827 12831 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12828 12832 "CMOV$cmp $dst.hi,$src.hi" %}
12829 12833 opcode(0x0F,0x40);
12830 12834 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12831 12835 ins_pipe( pipe_cmov_reg_long );
12832 12836 %}
12833 12837
12834 12838 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12835 12839 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12836 12840 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12837 12841 ins_cost(500);
12838 12842 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12839 12843 "CMOV$cmp $dst.hi,$src.hi" %}
12840 12844 opcode(0x0F,0x40);
12841 12845 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12842 12846 ins_pipe( pipe_cmov_reg_long );
12843 12847 %}
12844 12848
12845 12849 // Compare 2 longs and CMOVE ints.
12846 12850 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
12847 12851 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12848 12852 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12849 12853 ins_cost(200);
12850 12854 format %{ "CMOV$cmp $dst,$src" %}
12851 12855 opcode(0x0F,0x40);
12852 12856 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12853 12857 ins_pipe( pipe_cmov_reg );
12854 12858 %}
12855 12859
12856 12860 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
12857 12861 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12858 12862 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12859 12863 ins_cost(250);
12860 12864 format %{ "CMOV$cmp $dst,$src" %}
12861 12865 opcode(0x0F,0x40);
12862 12866 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12863 12867 ins_pipe( pipe_cmov_mem );
12864 12868 %}
12865 12869
12866 12870 // Compare 2 longs and CMOVE ints.
12867 12871 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12868 12872 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12869 12873 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12870 12874 ins_cost(200);
12871 12875 format %{ "CMOV$cmp $dst,$src" %}
12872 12876 opcode(0x0F,0x40);
12873 12877 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12874 12878 ins_pipe( pipe_cmov_reg );
12875 12879 %}
12876 12880
12877 12881 // Compare 2 longs and CMOVE doubles
12878 12882 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12879 12883 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12880 12884 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12881 12885 ins_cost(200);
12882 12886 expand %{
12883 12887 fcmovDPR_regS(cmp,flags,dst,src);
12884 12888 %}
12885 12889 %}
12886 12890
12887 12891 // Compare 2 longs and CMOVE doubles
12888 12892 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12889 12893 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12890 12894 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12891 12895 ins_cost(200);
12892 12896 expand %{
12893 12897 fcmovD_regS(cmp,flags,dst,src);
12894 12898 %}
12895 12899 %}
12896 12900
12897 12901 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12898 12902 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12899 12903 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12900 12904 ins_cost(200);
12901 12905 expand %{
12902 12906 fcmovFPR_regS(cmp,flags,dst,src);
12903 12907 %}
12904 12908 %}
12905 12909
12906 12910 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12907 12911 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12908 12912 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12909 12913 ins_cost(200);
12910 12914 expand %{
12911 12915 fcmovF_regS(cmp,flags,dst,src);
12912 12916 %}
12913 12917 %}
12914 12918
12915 12919 //======
12916 12920 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
12917 12921 // Same as cmpL_reg_flags_LEGT except must negate src
12918 12922 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
12919 12923 match( Set flags (CmpL src zero ));
12920 12924 effect( TEMP tmp );
12921 12925 ins_cost(300);
12922 12926 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12923 12927 "CMP $tmp,$src.lo\n\t"
12924 12928 "SBB $tmp,$src.hi\n\t" %}
12925 12929 ins_encode( long_cmp_flags3(src, tmp) );
12926 12930 ins_pipe( ialu_reg_reg_long );
12927 12931 %}
12928 12932
12929 12933 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
12930 12934 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
12931 12935 // requires a commuted test to get the same result.
12932 12936 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
12933 12937 match( Set flags (CmpL src1 src2 ));
12934 12938 effect( TEMP tmp );
12935 12939 ins_cost(300);
12936 12940 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12937 12941 "MOV $tmp,$src2.hi\n\t"
12938 12942 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %}
12939 12943 ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12940 12944 ins_pipe( ialu_cr_reg_reg );
12941 12945 %}
12942 12946
12943 12947 // Long compares reg < zero/req OR reg >= zero/req.
12944 12948 // Just a wrapper for a normal branch, plus the predicate test
12945 12949 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12946 12950 match(If cmp flags);
12947 12951 effect(USE labl);
12948 12952 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12949 12953 ins_cost(300);
12950 12954 expand %{
12951 12955 jmpCon(cmp,flags,labl); // JGT or JLE...
12952 12956 %}
12953 12957 %}
12954 12958
12955 12959 // Compare 2 longs and CMOVE longs.
12956 12960 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12957 12961 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12958 12962 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12959 12963 ins_cost(400);
12960 12964 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12961 12965 "CMOV$cmp $dst.hi,$src.hi" %}
12962 12966 opcode(0x0F,0x40);
12963 12967 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12964 12968 ins_pipe( pipe_cmov_reg_long );
12965 12969 %}
12966 12970
12967 12971 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12968 12972 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12969 12973 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12970 12974 ins_cost(500);
12971 12975 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12972 12976 "CMOV$cmp $dst.hi,$src.hi+4" %}
12973 12977 opcode(0x0F,0x40);
12974 12978 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12975 12979 ins_pipe( pipe_cmov_reg_long );
12976 12980 %}
12977 12981
12978 12982 // Compare 2 longs and CMOVE ints.
12979 12983 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
12980 12984 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12981 12985 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12982 12986 ins_cost(200);
12983 12987 format %{ "CMOV$cmp $dst,$src" %}
12984 12988 opcode(0x0F,0x40);
12985 12989 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12986 12990 ins_pipe( pipe_cmov_reg );
12987 12991 %}
12988 12992
12989 12993 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
12990 12994 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12991 12995 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12992 12996 ins_cost(250);
12993 12997 format %{ "CMOV$cmp $dst,$src" %}
12994 12998 opcode(0x0F,0x40);
12995 12999 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12996 13000 ins_pipe( pipe_cmov_mem );
12997 13001 %}
12998 13002
12999 13003 // Compare 2 longs and CMOVE ptrs.
13000 13004 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13001 13005 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13002 13006 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13003 13007 ins_cost(200);
13004 13008 format %{ "CMOV$cmp $dst,$src" %}
13005 13009 opcode(0x0F,0x40);
13006 13010 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13007 13011 ins_pipe( pipe_cmov_reg );
13008 13012 %}
13009 13013
13010 13014 // Compare 2 longs and CMOVE doubles
13011 13015 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13012 13016 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13013 13017 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13014 13018 ins_cost(200);
13015 13019 expand %{
13016 13020 fcmovDPR_regS(cmp,flags,dst,src);
13017 13021 %}
13018 13022 %}
13019 13023
13020 13024 // Compare 2 longs and CMOVE doubles
13021 13025 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13022 13026 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13023 13027 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13024 13028 ins_cost(200);
13025 13029 expand %{
13026 13030 fcmovD_regS(cmp,flags,dst,src);
13027 13031 %}
13028 13032 %}
13029 13033
13030 13034 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13031 13035 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13032 13036 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13033 13037 ins_cost(200);
13034 13038 expand %{
13035 13039 fcmovFPR_regS(cmp,flags,dst,src);
13036 13040 %}
13037 13041 %}
13038 13042
13039 13043
13040 13044 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13041 13045 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13042 13046 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13043 13047 ins_cost(200);
13044 13048 expand %{
13045 13049 fcmovF_regS(cmp,flags,dst,src);
13046 13050 %}
13047 13051 %}
13048 13052
13049 13053
13050 13054 // ============================================================================
13051 13055 // Procedure Call/Return Instructions
13052 13056 // Call Java Static Instruction
13053 13057 // Note: If this code changes, the corresponding ret_addr_offset() and
13054 13058 // compute_padding() functions will have to be adjusted.
13055 13059 instruct CallStaticJavaDirect(method meth) %{
13056 13060 match(CallStaticJava);
13057 13061 predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13058 13062 effect(USE meth);
13059 13063
13060 13064 ins_cost(300);
13061 13065 format %{ "CALL,static " %}
13062 13066 opcode(0xE8); /* E8 cd */
13063 13067 ins_encode( pre_call_FPU,
13064 13068 Java_Static_Call( meth ),
13065 13069 call_epilog,
13066 13070 post_call_FPU );
13067 13071 ins_pipe( pipe_slow );
13068 13072 ins_alignment(4);
13069 13073 %}
13070 13074
13071 13075 // Call Java Static Instruction (method handle version)
13072 13076 // Note: If this code changes, the corresponding ret_addr_offset() and
13073 13077 // compute_padding() functions will have to be adjusted.
13074 13078 instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
13075 13079 match(CallStaticJava);
13076 13080 predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13077 13081 effect(USE meth);
13078 13082 // EBP is saved by all callees (for interpreter stack correction).
13079 13083 // We use it here for a similar purpose, in {preserve,restore}_SP.
13080 13084
13081 13085 ins_cost(300);
13082 13086 format %{ "CALL,static/MethodHandle " %}
13083 13087 opcode(0xE8); /* E8 cd */
13084 13088 ins_encode( pre_call_FPU,
13085 13089 preserve_SP,
13086 13090 Java_Static_Call( meth ),
13087 13091 restore_SP,
13088 13092 call_epilog,
13089 13093 post_call_FPU );
13090 13094 ins_pipe( pipe_slow );
13091 13095 ins_alignment(4);
13092 13096 %}
13093 13097
13094 13098 // Call Java Dynamic Instruction
13095 13099 // Note: If this code changes, the corresponding ret_addr_offset() and
13096 13100 // compute_padding() functions will have to be adjusted.
13097 13101 instruct CallDynamicJavaDirect(method meth) %{
13098 13102 match(CallDynamicJava);
13099 13103 effect(USE meth);
13100 13104
13101 13105 ins_cost(300);
13102 13106 format %{ "MOV EAX,(oop)-1\n\t"
13103 13107 "CALL,dynamic" %}
13104 13108 opcode(0xE8); /* E8 cd */
13105 13109 ins_encode( pre_call_FPU,
13106 13110 Java_Dynamic_Call( meth ),
13107 13111 call_epilog,
13108 13112 post_call_FPU );
13109 13113 ins_pipe( pipe_slow );
13110 13114 ins_alignment(4);
13111 13115 %}
13112 13116
13113 13117 // Call Runtime Instruction
13114 13118 instruct CallRuntimeDirect(method meth) %{
13115 13119 match(CallRuntime );
13116 13120 effect(USE meth);
13117 13121
13118 13122 ins_cost(300);
13119 13123 format %{ "CALL,runtime " %}
13120 13124 opcode(0xE8); /* E8 cd */
13121 13125 // Use FFREEs to clear entries in float stack
13122 13126 ins_encode( pre_call_FPU,
13123 13127 FFree_Float_Stack_All,
13124 13128 Java_To_Runtime( meth ),
13125 13129 post_call_FPU );
13126 13130 ins_pipe( pipe_slow );
13127 13131 %}
13128 13132
13129 13133 // Call runtime without safepoint
13130 13134 instruct CallLeafDirect(method meth) %{
13131 13135 match(CallLeaf);
13132 13136 effect(USE meth);
13133 13137
13134 13138 ins_cost(300);
13135 13139 format %{ "CALL_LEAF,runtime " %}
13136 13140 opcode(0xE8); /* E8 cd */
13137 13141 ins_encode( pre_call_FPU,
13138 13142 FFree_Float_Stack_All,
13139 13143 Java_To_Runtime( meth ),
13140 13144 Verify_FPU_For_Leaf, post_call_FPU );
13141 13145 ins_pipe( pipe_slow );
13142 13146 %}
13143 13147
13144 13148 instruct CallLeafNoFPDirect(method meth) %{
13145 13149 match(CallLeafNoFP);
13146 13150 effect(USE meth);
13147 13151
13148 13152 ins_cost(300);
13149 13153 format %{ "CALL_LEAF_NOFP,runtime " %}
13150 13154 opcode(0xE8); /* E8 cd */
13151 13155 ins_encode(Java_To_Runtime(meth));
13152 13156 ins_pipe( pipe_slow );
13153 13157 %}
13154 13158
13155 13159
13156 13160 // Return Instruction
13157 13161 // Remove the return address & jump to it.
13158 13162 instruct Ret() %{
13159 13163 match(Return);
13160 13164 format %{ "RET" %}
13161 13165 opcode(0xC3);
13162 13166 ins_encode(OpcP);
13163 13167 ins_pipe( pipe_jmp );
13164 13168 %}
13165 13169
13166 13170 // Tail Call; Jump from runtime stub to Java code.
13167 13171 // Also known as an 'interprocedural jump'.
13168 13172 // Target of jump will eventually return to caller.
13169 13173 // TailJump below removes the return address.
13170 13174 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13171 13175 match(TailCall jump_target method_oop );
13172 13176 ins_cost(300);
13173 13177 format %{ "JMP $jump_target \t# EBX holds method oop" %}
13174 13178 opcode(0xFF, 0x4); /* Opcode FF /4 */
13175 13179 ins_encode( OpcP, RegOpc(jump_target) );
13176 13180 ins_pipe( pipe_jmp );
13177 13181 %}
13178 13182
13179 13183
13180 13184 // Tail Jump; remove the return address; jump to target.
13181 13185 // TailCall above leaves the return address around.
13182 13186 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13183 13187 match( TailJump jump_target ex_oop );
13184 13188 ins_cost(300);
13185 13189 format %{ "POP EDX\t# pop return address into dummy\n\t"
13186 13190 "JMP $jump_target " %}
13187 13191 opcode(0xFF, 0x4); /* Opcode FF /4 */
13188 13192 ins_encode( enc_pop_rdx,
13189 13193 OpcP, RegOpc(jump_target) );
13190 13194 ins_pipe( pipe_jmp );
13191 13195 %}
13192 13196
13193 13197 // Create exception oop: created by stack-crawling runtime code.
13194 13198 // Created exception is now available to this handler, and is setup
13195 13199 // just prior to jumping to this handler. No code emitted.
13196 13200 instruct CreateException( eAXRegP ex_oop )
13197 13201 %{
13198 13202 match(Set ex_oop (CreateEx));
13199 13203
13200 13204 size(0);
13201 13205 // use the following format syntax
13202 13206 format %{ "# exception oop is in EAX; no code emitted" %}
13203 13207 ins_encode();
13204 13208 ins_pipe( empty );
13205 13209 %}
13206 13210
13207 13211
13208 13212 // Rethrow exception:
13209 13213 // The exception oop will come in the first argument position.
13210 13214 // Then JUMP (not call) to the rethrow stub code.
13211 13215 instruct RethrowException()
13212 13216 %{
13213 13217 match(Rethrow);
13214 13218
13215 13219 // use the following format syntax
13216 13220 format %{ "JMP rethrow_stub" %}
13217 13221 ins_encode(enc_rethrow);
13218 13222 ins_pipe( pipe_jmp );
13219 13223 %}
13220 13224
13221 13225 // inlined locking and unlocking
13222 13226
13223 13227
13224 13228 instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13225 13229 match( Set cr (FastLock object box) );
13226 13230 effect( TEMP tmp, TEMP scr, USE_KILL box );
13227 13231 ins_cost(300);
13228 13232 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13229 13233 ins_encode( Fast_Lock(object,box,tmp,scr) );
13230 13234 ins_pipe( pipe_slow );
13231 13235 %}
13232 13236
13233 13237 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13234 13238 match( Set cr (FastUnlock object box) );
13235 13239 effect( TEMP tmp, USE_KILL box );
13236 13240 ins_cost(300);
13237 13241 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13238 13242 ins_encode( Fast_Unlock(object,box,tmp) );
13239 13243 ins_pipe( pipe_slow );
13240 13244 %}
13241 13245
13242 13246
13243 13247
13244 13248 // ============================================================================
13245 13249 // Safepoint Instruction
13246 13250 instruct safePoint_poll(eFlagsReg cr) %{
13247 13251 match(SafePoint);
13248 13252 effect(KILL cr);
13249 13253
13250 13254 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13251 13255 // On SPARC that might be acceptable as we can generate the address with
13252 13256 // just a sethi, saving an or. By polling at offset 0 we can end up
13253 13257 // putting additional pressure on the index-0 in the D$. Because of
13254 13258 // alignment (just like the situation at hand) the lower indices tend
13255 13259 // to see more traffic. It'd be better to change the polling address
13256 13260 // to offset 0 of the last $line in the polling page.
13257 13261
13258 13262 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %}
13259 13263 ins_cost(125);
13260 13264 size(6) ;
13261 13265 ins_encode( Safepoint_Poll() );
13262 13266 ins_pipe( ialu_reg_mem );
13263 13267 %}
13264 13268
13265 13269
13266 13270 // ============================================================================
13267 13271 // This name is KNOWN by the ADLC and cannot be changed.
13268 13272 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13269 13273 // for this guy.
13270 13274 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13271 13275 match(Set dst (ThreadLocal));
13272 13276 effect(DEF dst, KILL cr);
13273 13277
13274 13278 format %{ "MOV $dst, Thread::current()" %}
13275 13279 ins_encode %{
13276 13280 Register dstReg = as_Register($dst$$reg);
13277 13281 __ get_thread(dstReg);
13278 13282 %}
13279 13283 ins_pipe( ialu_reg_fat );
13280 13284 %}
13281 13285
13282 13286
13283 13287
13284 13288 //----------PEEPHOLE RULES-----------------------------------------------------
13285 13289 // These must follow all instruction definitions as they use the names
13286 13290 // defined in the instructions definitions.
13287 13291 //
13288 13292 // peepmatch ( root_instr_name [preceding_instruction]* );
13289 13293 //
13290 13294 // peepconstraint %{
13291 13295 // (instruction_number.operand_name relational_op instruction_number.operand_name
13292 13296 // [, ...] );
13293 13297 // // instruction numbers are zero-based using left to right order in peepmatch
13294 13298 //
13295 13299 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
13296 13300 // // provide an instruction_number.operand_name for each operand that appears
13297 13301 // // in the replacement instruction's match rule
13298 13302 //
13299 13303 // ---------VM FLAGS---------------------------------------------------------
13300 13304 //
13301 13305 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13302 13306 //
13303 13307 // Each peephole rule is given an identifying number starting with zero and
13304 13308 // increasing by one in the order seen by the parser. An individual peephole
13305 13309 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13306 13310 // on the command-line.
13307 13311 //
13308 13312 // ---------CURRENT LIMITATIONS----------------------------------------------
13309 13313 //
13310 13314 // Only match adjacent instructions in same basic block
13311 13315 // Only equality constraints
13312 13316 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13313 13317 // Only one replacement instruction
13314 13318 //
13315 13319 // ---------EXAMPLE----------------------------------------------------------
13316 13320 //
13317 13321 // // pertinent parts of existing instructions in architecture description
13318 13322 // instruct movI(eRegI dst, eRegI src) %{
13319 13323 // match(Set dst (CopyI src));
13320 13324 // %}
13321 13325 //
13322 13326 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
13323 13327 // match(Set dst (AddI dst src));
13324 13328 // effect(KILL cr);
13325 13329 // %}
13326 13330 //
13327 13331 // // Change (inc mov) to lea
13328 13332 // peephole %{
13329 13333 // // increment preceeded by register-register move
13330 13334 // peepmatch ( incI_eReg movI );
13331 13335 // // require that the destination register of the increment
13332 13336 // // match the destination register of the move
13333 13337 // peepconstraint ( 0.dst == 1.dst );
13334 13338 // // construct a replacement instruction that sets
13335 13339 // // the destination to ( move's source register + one )
13336 13340 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13337 13341 // %}
13338 13342 //
13339 13343 // Implementation no longer uses movX instructions since
13340 13344 // machine-independent system no longer uses CopyX nodes.
13341 13345 //
13342 13346 // peephole %{
13343 13347 // peepmatch ( incI_eReg movI );
13344 13348 // peepconstraint ( 0.dst == 1.dst );
13345 13349 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13346 13350 // %}
13347 13351 //
13348 13352 // peephole %{
13349 13353 // peepmatch ( decI_eReg movI );
13350 13354 // peepconstraint ( 0.dst == 1.dst );
13351 13355 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13352 13356 // %}
13353 13357 //
13354 13358 // peephole %{
13355 13359 // peepmatch ( addI_eReg_imm movI );
13356 13360 // peepconstraint ( 0.dst == 1.dst );
13357 13361 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13358 13362 // %}
13359 13363 //
13360 13364 // peephole %{
13361 13365 // peepmatch ( addP_eReg_imm movP );
13362 13366 // peepconstraint ( 0.dst == 1.dst );
13363 13367 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13364 13368 // %}
13365 13369
13366 13370 // // Change load of spilled value to only a spill
13367 13371 // instruct storeI(memory mem, eRegI src) %{
13368 13372 // match(Set mem (StoreI mem src));
13369 13373 // %}
13370 13374 //
13371 13375 // instruct loadI(eRegI dst, memory mem) %{
13372 13376 // match(Set dst (LoadI mem));
13373 13377 // %}
13374 13378 //
13375 13379 peephole %{
13376 13380 peepmatch ( loadI storeI );
13377 13381 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13378 13382 peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13379 13383 %}
13380 13384
13381 13385 //----------SMARTSPILL RULES---------------------------------------------------
13382 13386 // These must follow all instruction definitions as they use the names
13383 13387 // defined in the instructions definitions.
↓ open down ↓ |
7628 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX